In order to build, please create a new `build` directory and run `cmake ..` from there.")
endif()
-# Check build type
-if (NOT CMAKE_BUILD_TYPE)
- message(STATUS "No build type selected, default to Debug")
- set(CMAKE_BUILD_TYPE "Debug")
-endif()
-message(STATUS "Building ${PROJECT_NAME} in ${CMAKE_BUILD_TYPE}")
-
# Set compiler flags in HAILORT_COMPILE_OPTIONS
# TODO: Change HAILORT_COMPILE_OPTIONS to add_compile_options
if(WIN32)
# TODO: set this eventually? set(HAILORT_COMPILE_OPTIONS /Wall)
set(HAILORT_COMPILE_OPTIONS ${HAILORT_COMPILE_OPTIONS}
/W4
- /WX
/DWIN32_LEAN_AND_MEAN
/DNOMINMAX # NOMINMAX is required in order to play nice with std::min/std::max (otherwise Windows.h defines it's own)
/D_HAILO_EXPORTING
add_definitions(-D_CRT_SECURE_NO_WARNINGS) # Disable "unsafe function" warnings
elseif(UNIX)
if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU" OR CMAKE_CXX_COMPILER_ID STREQUAL "QCC")
- set(HAILORT_COMPILE_OPTIONS ${HAILORT_COMPILE_OPTIONS} -Werror -Wall -Wextra -Wconversion)
+ set(HAILORT_COMPILE_OPTIONS ${HAILORT_COMPILE_OPTIONS} -Wall -Wextra -Wconversion)
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
- set(HAILORT_COMPILE_OPTIONS ${HAILORT_COMPILE_OPTIONS} -Werror -Wall -Wextra
+ set(HAILORT_COMPILE_OPTIONS ${HAILORT_COMPILE_OPTIONS} -Wall -Wextra
# TODO: remove me warnings
-Wno-conversion
-Wno-deprecated-declarations # On c structures with deprecated attribute, clang generates implicit move ctor
<p align="left">
- <img src=".hailort.png" />
+ <img src=".hailort.jpg" />
</p>
# HailoRT #
HailoRT is a lightweight, production-grade runtime library that runs on the host processor and provides a robust
-user-space runtime library (the HailoRT Library) with intuitive APIs in C/C++ for optimized performance
+user-space library (the HailoRT Library) with intuitive APIs in C/C++ for optimized performance
HailoRT consists of the following main components:
- HailoRT Library.
## About Hailo-8â„¢
-Hailo-8 is a deep learning processor for edge devices. The Hailo-8 provides groundbraking efficiency for neural network deployment.
-The Hailo-8 edge AI processor, featuring up to 26 tera-operations per second (TOPS), significantly outperforms all other edge processors.
+Hailo-8 is a deep learning processor for edge devices. The Hailo-8 provides groundbreaking efficiency for neural network deployment.
+The Hailo-8 edge AI processor, featuring up to 26 Tera-Operations-Per-Second (TOPS), significantly outperforms all other edge processors.
Hailo-8 is available in various form-factors, including the Hailo-8 M.2 Module.
The Hailo-8 AI processor is designed to fit into a multitude of smart machines and devices, for a wide variety of sectors including Automotive, Smart Cities, Industry 4.0,
(vdma_channel_index) = ((src) & CONTEXT_SWITCH_DEFS__PACKED_VDMA_CHANNEL_ID__VDMA_CHANNEL_INDEX_MASK); \
} while (0)
+#define CONTEXT_SWITCH_DEFS__WRITE_ACTION_BY_TYPE_MAX_SIZE (4)
+
+
#pragma pack(push, 1)
typedef struct {
uint16_t core_bytes_per_buffer;
CONTEXT_SWITCH_DEFS__ACTION_TYPE_OPEN_BOUNDARY_INPUT_CHANNEL,
CONTEXT_SWITCH_DEFS__ACTION_TYPE_OPEN_BOUNDARY_OUTPUT_CHANNEL,
CONTEXT_SWITCH_DEFS__ACTION_TYPE_ENABLE_NMS,
+ CONTEXT_SWITCH_DEFS__ACTION_TYPE_WRITE_DATA_BY_TYPE,
+ CONTEXT_SWITCH_DEFS__ACTION_TYPE_SWITCH_LCU_BATCH,
/* Must be last */
CONTEXT_SWITCH_DEFS__ACTION_TYPE_COUNT
typedef struct {
uint8_t nms_unit_index;
uint8_t network_index;
+ uint16_t number_of_classes;
+ uint16_t burst_size;
} CONTEXT_SWITCH_DEFS__enable_nms_action_t;
+typedef enum {
+ WRITE_ACTION_TYPE_GENERAL = 0,
+ WRITE_ACTION_TYPE_WRITE_BATCH = 1,
+
+ /* Must be last */
+ WRITE_ACTION_BY_TYPE_COUNT
+} CONTEXT_SWITCH_DEFS__WRITE_ACTION_TYPE_t;
+
+typedef struct {
+ uint32_t address;
+ uint8_t data_type; //CONTEXT_SWITCH_DEFS__WRITE_ACTION_TYPE_t
+ uint32_t data;
+ uint8_t shift;
+ uint32_t mask;
+ uint8_t network_index;
+} CONTEXT_SWITCH_DEFS__write_data_by_type_action_t;
+
+typedef struct {
+ uint8_t packed_lcu_id;
+ uint8_t network_index;
+ uint32_t kernel_done_count;
+} CONTEXT_SWITCH_DEFS__switch_lcu_batch_action_data_t;
+
#pragma pack(pop)
#ifdef __cplusplus
CONTROL_PROTOCOL__CONTEXT_SWITCH_STATUS_COUNT,
} CONTROL_PROTOCOL__CONTEXT_SWITCH_STATUS_t;
+#define CONTROL_PROTOCOL__INIFINITE_BATCH_COUNT (0)
typedef struct {
uint32_t state_machine_status_length;
uint8_t state_machine_status;
uint8_t application_index;
uint32_t dynamic_batch_size_length;
uint16_t dynamic_batch_size;
+ uint32_t batch_count_length;
+ uint16_t batch_count;
uint32_t keep_nn_config_during_reset_length;
uint8_t keep_nn_config_during_reset;
} CONTROL_PROTOCOL__change_context_switch_status_request_t;
uint8_t application_index;
uint32_t dynamic_batch_size_length;
uint16_t dynamic_batch_size;
+ uint32_t batch_count_length;
+ uint16_t batch_count;
uint32_t channels_info_length;
CONTROL_PROTOCOL__hw_infer_channels_info_t channels_info;
} CONTROL_PROTOCOL__change_hw_infer_status_request_t;
HEALTH_MONITOR_CPU_ECC_FATAL_EVENT_ID,
CONTEXT_SWITCH_BREAKPOINT_REACHED,
HEALTH_MONITOR_CLOCK_CHANGED_EVENT_ID,
+ HW_INFER_MANAGER_INFER_DONE,
+
D2H_EVENT_ID_COUNT /* Must be last*/
} D2H_EVENT_ID_t;
#define D2H_EVENT_HEALTH_MONITOR_CLOCK_CHANGED_EVENT_PARAMETER_COUNT (2)
+typedef struct {
+ uint32_t infer_cycles;
+} D2H_EVENT_hw_infer_mamager_infer_done_message_t;
+
+#define D2H_EVENT_HW_INFER_MANAGER_INFER_DONE_PARAMETER_COUNT (1)
+
/* D2H_EVENT__message_parameters_t should be in the same order as hailo_notification_message_parameters_t */
typedef union {
D2H_EVENT_rx_error_event_message_t rx_error_event;
D2H_EVENT_health_monitor_cpu_ecc_event_message_t health_monitor_cpu_ecc_event;
D2H_EVENT_context_switch_breakpoint_reached_event_massage_t context_switch_breakpoint_reached_event;
D2H_EVENT_health_monitor_clock_changed_event_message_t health_monitor_clock_changed_event;
+ D2H_EVENT_hw_infer_mamager_infer_done_message_t hw_infer_manager_infer_done_event;
} D2H_EVENT__message_parameters_t;
typedef struct {
FIRMWARE_STATUS__X(CONTROL_PROTOCOL_STATUS_INVALID_SLEEP_STATE)\
FIRMWARE_STATUS__X(CONTROL_PROTOCOL_STATUS_INVALID_HW_INFER_STATE_LENGTH)\
FIRMWARE_STATUS__X(CONTROL_PROTOCOL_STATUS_INVALID_CHANNELS_INFO_LENGTH)\
+ FIRMWARE_STATUS__X(CONTROL_PROTOCOL_STATUS_INVALID_BATCH_COUNT_LENGTH)\
\
FIRMWARE_MODULE__X(FIRMWARE_MODULE__POWER_MEASUREMENT)\
FIRMWARE_STATUS__X(HAILO_POWER_MEASUREMENT_STATUS_POWER_INIT_ERROR)\
FIRMWARE_STATUS__X(PCIE_SERVICE_STATUS_INVALID_H2D_CHANNEL_INDEX)\
FIRMWARE_STATUS__X(PCIE_SERVICE_STATUS_INVALID_D2H_CHANNEL_INDEX)\
FIRMWARE_STATUS__X(PCIE_SERVICE_INVALID_INITIAL_CREDIT_SIZE)\
+ FIRMWARE_STATUS__X(PCIE_SERVICE_ERROR_ADDING_CREDITS_TO_PCIE_CHANNEL)\
\
FIRMWARE_MODULE__X(FIRMWARE_MODULE__FIRMWARE_UPDATE)\
FIRMWARE_STATUS__X(FIRMWARE_UPDATE_STATUS_INVALID_PARAMETERS)\
FIRMWARE_STATUS__X(CONTEXT_SWITCH_STATUS_INVALID_DYNAMIC_CONTEXT_COUNT)\
FIRMWARE_STATUS__X(CONTEXT_SWITCH_STATUS_CONTEXT_INDEX_OUT_OF_RANGE)\
FIRMWARE_STATUS__X(CONTEXT_SWITCH_STATUS_TOTAL_PROVIDED_EDGE_LAYERS_LARGER_THEN_EXPECTED)\
+ FIRMWARE_STATUS__X(CONTEXT_SWITCH_STATUS_REACHED_TIMEOUT_WHILE_WAITING_FOR_NETWORK_IDLE)\
+ FIRMWARE_STATUS__X(CONTEXT_SWITCH_STATUS_WRITE_DATA_BY_TYPE_ACTION_INVALID_TYPE)\
+ FIRMWARE_STATUS__X(CONTEXT_SWITCH_STATUS_WRITE_DATA_BY_TYPE_ACTION_INVALID_MEMORY_SPACE)\
\
FIRMWARE_MODULE__X(FIRMWARE_MODULE__D2H_EVENT_MANAGER)\
FIRMWARE_STATUS__X(HAILO_D2H_EVENT_MANAGER_STATUS_MESSAGE_HIGH_PRIORITY_QUEUE_CREATE_FAILED)\
FIRMWARE_STATUS__X(VDMA_SERVICE_STATUS_INVALID_CONSTANTS)\
FIRMWARE_STATUS__X(VDMA_SERVICE_STATUS_INVALID_CHANNEL_INDEX)\
FIRMWARE_STATUS__X(VDMA_SERVICE_STATUS_INVALID_EDGE_LAYER_DIRECTION)\
+ FIRMWARE_STATUS__X(VDMA_SERVICE_INSUFFICIENT_DESCRIPTORS_COUNT)\
\
FIRMWARE_MODULE__X(FIRMWARE_MODULE__MEMORY_LOGGER)\
FIRMWARE_STATUS__X(MEMORY_LOGGER_STATUS_DEBUG_INSUFFICIENT_MEMORY)\
FIRMWARE_STATUS__X(NMS_MANAGER_STATUS_INVALID_NETWORK_INDEX)\
FIRMWARE_STATUS__X(NMS_MANAGER_STATUS_INVALID_NMS_UNIT_INDEX)\
FIRMWARE_STATUS__X(NMS_MANAGER_STATUS_INVALID_BATCH_SIZE)\
+ FIRMWARE_STATUS__X(NMS_MANAGER_STATUS_INVALID_NUM_CLASSES_SIZE)\
+ FIRMWARE_STATUS__X(NMS_MANAGER_STATUS_INVALID_BURST_SIZE)\
+ FIRMWARE_STATUS__X(NMS_MANAGER_STATUS_INVALID_LAST_FRAME_IN_BATCH_SIZE)\
\
FIRMWARE_MODULE__X(FIRMWARE_MODULE__CLUSTER_MANAGER)\
FIRMWARE_STATUS__X(CLUSTER_MANAGER_STATUS_INVALID_CLUSTER_INDEX)\
FIRMWARE_STATUS__X(CLUSTER_MANAGER_STATUS_INVALID_LCU_INDEX)\
FIRMWARE_STATUS__X(CLUSTER_MANAGER_STATUS_INVALID_KERNEL_DONE_ADDRESS)\
FIRMWARE_STATUS__X(CLUSTER_MANAGER_STATUS_RECEIVED_UNEXPECTED_INTERRUPT)\
+ FIRMWARE_STATUS__X(CLUSTER_MANAGER_STATUS_INVALID_NETWORK_INDEX)\
\
FIRMWARE_MODULE__X(FIRMWARE_MODULE__HW_INFER_MANAGER)\
FIRMWARE_STATUS__X(HW_INFER_MANAGER_STATUS_NETWORK_GROUP_NOT_CONFIGURED_BEFORE_INFER_START)\
#ifndef __UTILS_H__
#define __UTILS_H__
+#include <stdint.h>
+
/** A compile time assertion check.
*
* Validate at compile time that the predicate is true without
#define MICROSECONDS_IN_MILLISECOND (1000)
+static inline uint8_t ceil_log2(uint32_t n)
+{
+ uint8_t result = 0;
+
+ if (n <= 1) {
+ return 0;
+ }
+
+ while (n > 1) {
+ result++;
+ n = (n + 1) >> 1;
+ }
+
+ return result;
+}
+
#endif /* __UTILS_H__ */
option(HAILO_OFFLINE_COMPILATION "Don't download external dependencies" OFF)
option(HAILO_BUILD_SERVICE "Build hailort service" OFF)
option(HAILO_BUILD_PROFILER "Build hailort profiler" ON)
+option(HAILO_COMPILE_WARNING_AS_ERROR "Add compilation flag for treating compilation warnings as errors" OFF)
+option(HAILO_SUPPORT_PACKAGING "Create HailoRT package (internal)" OFF)
+
+if (HAILO_COMPILE_WARNING_AS_ERROR)
+ if(WIN32)
+ set(HAILORT_COMPILE_OPTIONS ${HAILORT_COMPILE_OPTIONS} /WX)
+ elseif(UNIX)
+ set(HAILORT_COMPILE_OPTIONS ${HAILORT_COMPILE_OPTIONS} -Werror)
+ else()
+ message(FATAL_ERROR "Unexpeced host, stopping build")
+ endif()
+endif()
# Flag for emulator (FPGA/Veloce)
if(HAILO_BUILD_EMULATOR)
# Set firmware version
add_definitions( -DFIRMWARE_VERSION_MAJOR=4 )
-add_definitions( -DFIRMWARE_VERSION_MINOR=13 )
+add_definitions( -DFIRMWARE_VERSION_MINOR=14 )
add_definitions( -DFIRMWARE_VERSION_REVISION=0 )
if(HAILO_BUILD_SERVICE)
add_definitions( -DHAILO_SUPPORT_MULTI_PROCESS )
set(DRIVER_INC_DIR ${PROJECT_SOURCE_DIR}/hailort/drivers/common)
set(RPC_DIR ${PROJECT_SOURCE_DIR}/hailort/rpc)
-if(HAILO_BUILD_PYBIND)
- if(NOT PYTHON_EXECUTABLE AND PYBIND11_PYTHON_VERSION)
- # PYBIND11_PYTHON_VERSION is prioritized (not virtual environment) if PYTHON_EXECUTABLE is not set.
- # See https://pybind11.readthedocs.io/en/stable/changelog.html#v2-6-0-oct-21-2020
- if((${CMAKE_VERSION} VERSION_LESS "3.22.0") AND (NOT WIN32))
- find_package(PythonInterp ${PYBIND11_PYTHON_VERSION} REQUIRED)
- set(PYTHON_EXECUTABLE ${Python_EXECUTABLE})
- else()
- find_package(Python3 ${PYBIND11_PYTHON_VERSION} REQUIRED EXACT COMPONENTS Interpreter Development)
- set(PYTHON_EXECUTABLE ${Python3_EXECUTABLE})
- endif()
- endif()
- add_subdirectory(external/pybind11 EXCLUDE_FROM_ALL)
-endif()
add_subdirectory(external/Catch2 EXCLUDE_FROM_ALL)
add_subdirectory(external/CLI11 EXCLUDE_FROM_ALL)
add_subdirectory(external/json EXCLUDE_FROM_ALL)
if(HAILO_WIN_DRIVER)
add_subdirectory(drivers/win)
+endif()
+
+if(HAILO_SUPPORT_PACKAGING)
add_subdirectory(packaging)
endif()
${CMAKE_CURRENT_SOURCE_DIR}/barrier.cpp
${CMAKE_CURRENT_SOURCE_DIR}/file_utils.cpp
${CMAKE_CURRENT_SOURCE_DIR}/string_utils.cpp
+ ${CMAKE_CURRENT_SOURCE_DIR}/event_internal.cpp
${CMAKE_CURRENT_SOURCE_DIR}/device_measurements.cpp
)
namespace hailort
{
+class Barrier;
+using BarrierPtr = std::shared_ptr<Barrier>;
+
/**
* A barrier is a synchronization object that allows an expected number of threads to block until all of them
* arrive at the barrier.
return ptr;
}
-
TemperatureMeasurement::TemperatureMeasurement(Device &device, hailo_status &status) : BaseMeasurement(device, status)
-{}
+{
+ /* Executing the check only if BaseMeasurement constructor has succeeded */
+ if (HAILO_SUCCESS == status) {
+ status = sanity_check();
+ }
+}
-hailo_status TemperatureMeasurement::start_measurement()
+hailo_status TemperatureMeasurement::sanity_check()
{
- // Checking sensor before starting thread
- auto temp_info = m_device.get_chip_temperature();
- CHECK_EXPECTED_AS_STATUS(temp_info);
+ auto temp_measurement = m_device.get_chip_temperature();
+ return temp_measurement.status();
+}
+hailo_status TemperatureMeasurement::start_measurement()
+{
m_is_thread_running = true;
m_thread = std::thread([this] () {
while (m_is_thread_running.load()) {
PowerMeasurement::PowerMeasurement(Device &device, hailo_power_measurement_types_t measurement_type, hailo_status &status)
: BaseMeasurement(device, status), m_measurement_type(measurement_type)
-{}
+{
+ /* Executing the check only if BaseMeasurement constructor has succeeded */
+ if (HAILO_SUCCESS == status) {
+ status = sanity_check();
+ }
+}
-hailo_status PowerMeasurement::start_measurement()
+hailo_status PowerMeasurement::sanity_check()
{
- // Checking sensor before starting thread
- auto power_info = m_device.power_measurement(HAILO_DVM_OPTIONS_AUTO, m_measurement_type);
- CHECK_EXPECTED_AS_STATUS(power_info);
+ auto power_measurement = m_device.power_measurement(HAILO_DVM_OPTIONS_AUTO, m_measurement_type);
+ return power_measurement.status();
+}
+hailo_status PowerMeasurement::start_measurement()
+{
m_is_thread_running = true;
m_thread = std::thread([this] () {
while (m_is_thread_running.load()) {
std::atomic_bool m_is_thread_running;
std::mutex m_mutex;
hailort::AccumulatorPtr m_acc;
+
+private:
+ virtual hailo_status sanity_check() = 0;
};
}
TemperatureMeasurement(hailort::Device &device, hailo_status &status);
+
+private:
+ virtual hailo_status sanity_check() override;
};
private:
hailo_power_measurement_types_t m_measurement_type;
+ virtual hailo_status sanity_check() override;
};
#endif /* _HAILO_DEVICE_MEASUREMENTS_HPP_ */
static const uint32_t MAX_INTERFACE_SIZE = IFNAMSIZ;
#endif
- static hailo_status get_interface_from_board_ip(const char *board_ip, char *interface_name, size_t interface_name_length);
- static hailo_status get_ip_from_interface(const char *interface_name, char *ip, size_t ip_length);
+ static Expected<std::string> get_interface_from_board_ip(const std::string &board_ip);
+ static Expected<std::string> get_ip_from_interface(const std::string &interface_name);
private:
#if defined(__GNUG__)
- static hailo_status get_interface_from_arp_entry(char *arp_entry, char *interface_name,
- size_t max_interface_name_length);
+ static Expected<std::string> get_interface_from_arp_entry(char *arp_entry);
#endif
};
--- /dev/null
+/**
+ * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+**/
+/**
+ * @file event_internal.cpp
+ * @brief Internal implementation for events, shared between all os.
+ **/
+
+#include "common/event_internal.hpp"
+#include "common/logger_macros.hpp"
+#include "common/utils.hpp"
+
+namespace hailort
+{
+
+Waitable::Waitable(underlying_waitable_handle_t handle) :
+ m_handle(handle)
+{}
+
+hailo_status Waitable::wait(std::chrono::milliseconds timeout)
+{
+ auto status = wait_for_single_object(m_handle, timeout);
+ if (HAILO_TIMEOUT == status) {
+ LOGGER__TRACE("wait_for_single_object failed with timeout (timeout={}ms)", timeout.count());
+ return status;
+ }
+ CHECK_SUCCESS(status);
+
+ status = post_wait();
+ CHECK_SUCCESS(status);
+
+ return HAILO_SUCCESS;
+}
+
+underlying_waitable_handle_t Waitable::get_underlying_handle()
+{
+ return m_handle;
+}
+
+WaitOrShutdown::WaitOrShutdown(WaitablePtr waitable, EventPtr shutdown_event) :
+ m_waitable(waitable),
+ m_shutdown_event(shutdown_event),
+ m_waitable_group(create_waitable_group(m_waitable, m_shutdown_event))
+{}
+
+hailo_status WaitOrShutdown::wait(std::chrono::milliseconds timeout)
+{
+ auto index = m_waitable_group.wait_any(timeout);
+ if (index.status() == HAILO_TIMEOUT) {
+ return index.status();
+ }
+ CHECK_EXPECTED_AS_STATUS(index);
+
+ assert(index.value() <= WAITABLE_INDEX);
+ return (index.value() == SHUTDOWN_INDEX) ? HAILO_SHUTDOWN_EVENT_SIGNALED : HAILO_SUCCESS;
+}
+
+hailo_status WaitOrShutdown::signal()
+{
+ return m_waitable->signal();
+}
+
+WaitableGroup WaitOrShutdown::create_waitable_group(WaitablePtr waitable, EventPtr shutdown_event)
+{
+ // Note the order - consistent with SHUTDOWN_INDEX, WAITABLE_INDEX.
+ std::vector<std::reference_wrapper<Waitable>> waitables;
+ waitables.emplace_back(std::ref(*shutdown_event));
+ waitables.emplace_back(std::ref(*waitable));
+ return waitables;
+}
+
+} /* namespace hailort */
--- /dev/null
+/**
+ * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+ **/
+/**
+ * @file event.hpp
+ * @brief Event and Semaphore wrapper objects used for multithreading
+ **/
+
+#ifndef _EVENT_INTERNAL_HPP_
+#define _EVENT_INTERNAL_HPP_
+
+#include "hailo/event.hpp"
+
+#include <memory>
+#include <vector>
+#include <array>
+#include <chrono>
+#if defined(__GNUC__)
+#include <poll.h>
+#endif
+
+namespace hailort
+{
+
+// Group of Waitable objects that can be waited for together
+class WaitableGroup final
+{
+public:
+ WaitableGroup(std::vector<std::reference_wrapper<Waitable>> &&waitables) :
+ m_waitables(std::move(waitables)),
+ m_waitable_handles(create_waitable_handle_vector(m_waitables))
+ {}
+
+ /**
+ * Waits until any of the given waitables are signaled. Returns the index in the waitables vector
+ * of the signaled waitable with the smallest index value.
+ */
+ Expected<size_t> wait_any(std::chrono::milliseconds timeout);
+
+private:
+
+#if defined(__linux__)
+ using WaitableHandle = pollfd;
+#else
+ using WaitableHandle = underlying_waitable_handle_t;
+#endif
+
+ static std::vector<WaitableHandle> create_waitable_handle_vector(
+ const std::vector<std::reference_wrapper<Waitable>> &waitables)
+ {
+ std::vector<WaitableHandle> waitable_handles;
+ waitable_handles.reserve(waitables.size());
+ for (auto &waitable : waitables) {
+#if defined(__linux__)
+ waitable_handles.emplace_back(pollfd{waitable.get().get_underlying_handle(), POLLIN, 0});
+#else
+ waitable_handles.emplace_back(waitable.get().get_underlying_handle());
+#endif
+ }
+ return waitable_handles;
+ }
+
+ // Initialization dependency
+ std::vector<std::reference_wrapper<Waitable>> m_waitables;
+ // Store this vector here to avoid runtime allocations.
+ std::vector<WaitableHandle> m_waitable_handles;
+};
+
+class WaitOrShutdown final
+{
+public:
+ WaitOrShutdown(WaitablePtr waitable, EventPtr shutdown_event);
+ ~WaitOrShutdown() = default;
+
+ WaitOrShutdown(const WaitOrShutdown &other) = delete;
+ WaitOrShutdown &operator=(const WaitOrShutdown &other) = delete;
+ WaitOrShutdown(WaitOrShutdown &&other) noexcept = default;
+ WaitOrShutdown &operator=(WaitOrShutdown &&other) = delete;
+
+ // Waits on waitable or shutdown_event to be signaled:
+ // * If shutdown_event is signaled:
+ // - shutdown_event is not reset
+ // - HAILO_SHUTDOWN_EVENT_SIGNALED is returned
+ // * If waitable is signaled:
+ // - waitable is reset if waitable->is_auto_reset()
+ // - HAILO_SUCCESS is returned
+ // * If both waitable and shutdown_event are signaled:
+ // - shutdown_event is not reset
+ // - waitable is not reset
+ // - HAILO_SHUTDOWN_EVENT_SIGNALED is returned
+ // * If neither are signaled, then HAILO_TIMEOUT is returned
+ // * On any failure an appropriate status shall be returned
+ hailo_status wait(std::chrono::milliseconds timeout);
+ hailo_status signal();
+
+private:
+ static WaitableGroup create_waitable_group(WaitablePtr waitable, EventPtr shutdown_event);
+
+ // Note: We want to guarantee that if the shutdown event is signaled, HAILO_SHUTDOWN_EVENT_SIGNALED will be
+ // returned.
+ // Waitable::wait_any returns the smallest index value of all the signaled objects.
+ // Hence, SHUTDOWN_INDEX must come before WAITABLE_INDEX!
+ static const size_t SHUTDOWN_INDEX = 0;
+ static const size_t WAITABLE_INDEX = 1;
+
+ const WaitablePtr m_waitable;
+ const EventPtr m_shutdown_event;
+
+ WaitableGroup m_waitable_group;
+};
+
+} /* namespace hailort */
+
+#endif /* _EVENT_INTERNAL_HPP_ */
static Expected<time_t> get_file_modified_time(const std::string &file_path);
static Expected<bool> is_directory(const std::string &path);
static hailo_status create_directory(const std::string &dir_path);
+ static hailo_status remove_directory(const std::string &dir_path);
static Expected<std::string> get_current_dir();
static std::string get_home_directory();
static bool is_path_accesible(const std::string &path);
using duration = std::chrono::nanoseconds;
using TimestampsArray = CircularArray<duration>;
- explicit LatencyMeter(const std::set<std::string> &output_names, size_t timestamps_list_length) :
+ LatencyMeter(const std::set<std::string> &output_names, size_t timestamps_list_length) :
m_start_timestamps(timestamps_list_length),
m_latency_count(0),
m_latency_sum(0)
#include "common/utils.hpp"
#include "common/logger_macros.hpp"
#include "common/ethernet_utils.hpp"
+#include "common/socket.hpp"
+
+#include <fstream>
namespace hailort
{
#define ETHERNET_UTILS__ARP_DEVICE_NAME_INDEX (4)
-hailo_status EthernetUtils::get_interface_from_arp_entry(char *arp_entry, char *interface_name,
- size_t max_interface_name_length)
+Expected<std::string> EthernetUtils::get_interface_from_arp_entry(char *arp_entry)
{
/* This function parses the interface name out from the arp entry
* Each entry is built as follows:
* For example:
* 10.0.0.163 0x1 0x2 80:00:de:ad:be:3f * enp1s0
* */
- hailo_status status = HAILO_UNINITIALIZED;
size_t token_counter = 0;
char* token = NULL;
/* Start splitting the arp entry into tokens according to the delimiter */
token = strtok(arp_entry, ETHERNET_UTILS__ARP_ENTRY_DELIMIETERS);
- if (NULL == token) {
- LOGGER__ERROR("Invalid arp entry, could not split it to tokens");
- status = HAILO_ETH_FAILURE;
- goto l_exit;
- }
+ CHECK_AS_EXPECTED(nullptr != token, HAILO_ETH_FAILURE, "Invalid arp entry, could not split it to tokens");
/* Iterate over the tokens until the device name is found */
while (NULL != token) {
token = strtok(NULL, ETHERNET_UTILS__ARP_ENTRY_DELIMIETERS);
if (ETHERNET_UTILS__ARP_DEVICE_NAME_INDEX == token_counter) {
LOGGER__DEBUG("Interface name: {}", token);
- strncpy(interface_name, token, max_interface_name_length);
- break;
+ return std::string(token);
}
token_counter++;
}
- status = HAILO_SUCCESS;
-l_exit:
- return status;
+ return make_unexpected(HAILO_ETH_FAILURE);
}
-hailo_status EthernetUtils::get_interface_from_board_ip(const char *board_ip, char *interface_name, size_t interface_name_length)
+Expected<std::string> EthernetUtils::get_interface_from_board_ip(const std::string &board_ip)
{
- hailo_status status = HAILO_UNINITIALIZED;
- FILE* arp_file = NULL;
- int fclose_rc = -1;
- char buffer[ETHERNET_UTILS__ARP_MAX_ENTRY_LENGTH] = {};
-
- CHECK_ARG_NOT_NULL(interface_name);
- CHECK_ARG_NOT_NULL(board_ip);
+ std::ifstream arp_file(ETHERNET_UTILS__ARP_FILE, std::ios::in);
+ CHECK_AS_EXPECTED(arp_file, HAILO_OPEN_FILE_FAILURE, "Cannot open file {}. errno: {:#x}", ETHERNET_UTILS__ARP_FILE, errno);
- /* Open arp file */
- arp_file = fopen(ETHERNET_UTILS__ARP_FILE, "r");
- if (NULL == arp_file) {
- LOGGER__ERROR("Cannot open file {}. Errno: {:#x}", ETHERNET_UTILS__ARP_FILE, errno);
- status = HAILO_OPEN_FILE_FAILURE;
- goto l_exit;
- }
+ char buffer[ETHERNET_UTILS__ARP_MAX_ENTRY_LENGTH] = {};
/* Go over all of the lines at the file */
- while(fgets(buffer, ARRAY_LENGTH(buffer), arp_file)) {
- /* Check if the arp line contains the board_ip */
- if (strstr(buffer, board_ip)) {
- status = get_interface_from_arp_entry(buffer, interface_name, interface_name_length);
- if (HAILO_SUCCESS != status) {
- goto l_exit;
- }
- break;
+ while (arp_file.getline(buffer, sizeof(buffer))) {
+ if (strstr(buffer, board_ip.c_str())) {
+ return get_interface_from_arp_entry(buffer);
}
}
- status = HAILO_SUCCESS;
-l_exit:
- if (NULL != arp_file) {
- fclose_rc = fclose(arp_file);
- if (0 != fclose_rc) {
- LOGGER__ERROR("Cannot close arp file {} ", ETHERNET_UTILS__ARP_FILE);
- if (HAILO_SUCCESS == status) {
- status = HAILO_CLOSE_FAILURE;
- } else {
- LOGGER__ERROR("Did not override status. Left status value at: {} (not assigned {}",
- status,
- HAILO_CLOSE_FAILURE);
- }
- }
- }
-
- return status;
+ LOGGER__ERROR("Failed to find interface name for ip {}", board_ip);
+ return make_unexpected(HAILO_ETH_FAILURE);
}
-hailo_status EthernetUtils::get_ip_from_interface(const char *interface_name, char *ip, size_t ip_length)
+Expected<std::string> EthernetUtils::get_ip_from_interface(const std::string &interface_name)
{
- hailo_status status = HAILO_UNINITIALIZED;
struct ifreq ifr = {};
- int fd = 0;
- int posix_rc = 0;
-
- CHECK_ARG_NOT_NULL(interface_name);
- CHECK_ARG_NOT_NULL(ip);
/* Create socket */
- fd = socket(AF_INET, SOCK_DGRAM, 0);
- if (fd < 0) {
- LOGGER__ERROR("Failed to create socket. Errno: {:#x}", errno);
- status = HAILO_ETH_FAILURE;
- goto l_exit;
- }
+ auto socket = Socket::create(AF_INET, SOCK_DGRAM, 0);
+ CHECK_EXPECTED(socket);
/* Convert interface name to ip address */
ifr.ifr_addr.sa_family = AF_INET;
- (void)strncpy(ifr.ifr_name, interface_name, IFNAMSIZ-1);
- posix_rc = ioctl(fd, SIOCGIFADDR, &ifr);
- if (0 > posix_rc) {
- LOGGER__ERROR("Interface was not found. ioctl with SIOCGIFADDR has failed. Errno: {:#x}", errno);
- status = HAILO_ETH_INTERFACE_NOT_FOUND;
- goto l_exit;
- }
- (void)strncpy(ip, inet_ntoa(((struct sockaddr_in *)&ifr.ifr_addr)->sin_addr), ip_length);
- LOGGER__DEBUG("Interface {} | IP: {}", interface_name, ip);
-
- status = HAILO_SUCCESS;
-l_exit:
- /* Close the socket if it was created */
- if (0 < fd) {
- posix_rc = close(fd);
- if (0 != posix_rc) {
- LOGGER__ERROR("Failed closing socket. Errno: {:#x}", errno);
- /* Update status if only in case there was not previous error */
- if (HAILO_SUCCESS == status) {
- status = HAILO_CLOSE_FAILURE;
- } else {
- LOGGER__ERROR("Did not override status. Left status value at: {} (not assigned {}",
- status,
- HAILO_CLOSE_FAILURE);
- }
- }
- }
-
- return status;
+ (void)strncpy(ifr.ifr_name, interface_name.c_str(), IFNAMSIZ-1);
+ auto posix_rc = ioctl(socket->get_fd(), SIOCGIFADDR, &ifr);
+ CHECK_AS_EXPECTED(posix_rc >= 0, HAILO_ETH_INTERFACE_NOT_FOUND,
+ "Interface was not found. ioctl with SIOCGIFADDR has failed. errno: {:#x}", errno);
+
+ std::string res = inet_ntoa(((struct sockaddr_in *)&ifr.ifr_addr)->sin_addr);
+ LOGGER__DEBUG("Interface {} | IP: {}", interface_name, res);
+ return res;
}
} /* namespace hailort */
return HAILO_SUCCESS;
}
+hailo_status Filesystem::remove_directory(const std::string &dir_path)
+{
+ auto ret_val = rmdir(dir_path.c_str());
+ CHECK(0 == ret_val, HAILO_FILE_OPERATION_FAILURE, "Failed to remove directory {}", dir_path);
+ return HAILO_SUCCESS;
+}
+
Expected<std::string> Filesystem::get_current_dir()
{
char cwd[PATH_MAX];
**/
#include "hailo/hailort.h"
-
#include "common/os_utils.hpp"
-
+#include "common/utils.hpp"
#include "spdlog/sinks/syslog_sink.h"
+#include <unistd.h>
+#include <signal.h>
+#include <sched.h>
+
namespace hailort
{
+#define EXISTENCE_CHECK_SIGNAL (0)
+
HailoRTOSLogger::HailoRTOSLogger()
{
m_hailort_os_logger = spdlog::syslog_logger_mt("syslog", "hailort_service", LOG_PID);
return getpid();
}
+bool OsUtils::is_pid_alive(uint32_t pid)
+{
+ return (0 == kill(pid, EXISTENCE_CHECK_SIGNAL));
+}
+
+void OsUtils::set_current_thread_name(const std::string &name)
+{
+ (void)name;
+#ifndef NDEBUG
+ // pthread_setname_np name size is limited to 16 chars (including null terminator)
+ assert(name.size() < 16);
+ pthread_setname_np(pthread_self(), name.c_str());
+#endif /* NDEBUG */
+}
+
+hailo_status OsUtils::set_current_thread_affinity(uint8_t cpu_index)
+{
+#if defined(__linux__)
+ cpu_set_t cpuset;
+ CPU_ZERO(&cpuset);
+ CPU_SET(cpu_index, &cpuset);
+
+ static const pid_t CURRENT_THREAD = 0;
+ int rc = sched_setaffinity(CURRENT_THREAD, sizeof(cpu_set_t), &cpuset);
+ CHECK(rc == 0, HAILO_INTERNAL_FAILURE, "sched_setaffinity failed with status {}", rc);
+
+ return HAILO_SUCCESS;
+#elif defined(__QNX__)
+ (void)cpu_index;
+ // TODO: impl on qnx (HRT-10889)
+ return HAILO_NOT_IMPLEMENTED;
+#endif
+}
+
+size_t OsUtils::get_page_size()
+{
+ static const auto page_size = sysconf(_SC_PAGESIZE);
+ return page_size;
+}
+
CursorAdjustment::CursorAdjustment(){}
CursorAdjustment::~CursorAdjustment(){}
Expected<TrafficControlUtil> TrafficControlUtil::create(const std::string &ip, uint16_t port, uint32_t rate_bytes_per_sec)
{
- auto interface_name = get_interface_name(ip);
+ auto interface_name = EthernetUtils::get_interface_from_board_ip(ip);
CHECK_EXPECTED(interface_name, "get_interface_name failed with status {}", interface_name.status());
auto board_id = ip_to_board_id(ip);
return run_command(cmd.str(), m_is_sudo_needed, {}, true);
}
-Expected<std::string> TrafficControlUtil::get_interface_name(const std::string &ip)
-{
- auto interface_name = Buffer::create(EthernetUtils::MAX_INTERFACE_SIZE, 0);
- CHECK_EXPECTED(interface_name);
-
- CHECK_SUCCESS_AS_EXPECTED(EthernetUtils::get_interface_from_board_ip(ip.c_str(),
- interface_name->as_pointer<char>(), interface_name->size()));
-
- return interface_name->to_string();
-}
-
Expected<uint32_t> TrafficControlUtil::ip_to_board_id(const std::string &ip)
{
// Takes last digit from 3 octet + the whole 4th octet
{
public:
static Expected<TrafficControlUtil> create(const std::string &ip, uint16_t port, uint32_t rate_bytes_per_sec);
- static Expected<std::string> get_interface_name(const std::string &ip);
~TrafficControlUtil() = default;
TrafficControlUtil(TrafficControlUtil&) = delete;
TrafficControlUtil &operator=(const TrafficControlUtil &) = delete;
return result;
}
-hailo_status EthernetUtils::get_interface_from_board_ip(const char *board_ip, char *interface_name, size_t interface_name_length)
+Expected<std::string> EthernetUtils::get_interface_from_board_ip(const std::string &board_ip)
{
- CHECK_ARG_NOT_NULL(interface_name);
- CHECK_ARG_NOT_NULL(board_ip);
-
auto network_interfaces = NetworkInterface::get_all_interfaces();
- CHECK_EXPECTED_AS_STATUS(network_interfaces);
+ CHECK_EXPECTED(network_interfaces);
struct in_addr board_ip_struct{};
- auto status = Socket::pton(AF_INET, board_ip, &board_ip_struct);
- CHECK_SUCCESS(status, "Invalid board ip address {}", board_ip);
+ auto status = Socket::pton(AF_INET, board_ip.c_str(), &board_ip_struct);
+ CHECK_SUCCESS_AS_EXPECTED(status, "Invalid board ip address {}", board_ip);
for (const auto& network_interface : network_interfaces.value()) {
auto arp_table = ArpTable::create(network_interface.index());
- CHECK_EXPECTED_AS_STATUS(arp_table);
+ CHECK_EXPECTED(arp_table);
const auto mac_address = arp_table->get_mac_address(static_cast<uint32_t>(board_ip_struct.S_un.S_addr));
if (mac_address) {
- (void)strncpy(interface_name, network_interface.friendly_name().c_str(), interface_name_length);
- return HAILO_SUCCESS;
+ return network_interface.friendly_name();
}
}
- return HAILO_ETH_INTERFACE_NOT_FOUND;
+ return make_unexpected(HAILO_ETH_INTERFACE_NOT_FOUND);
}
-hailo_status EthernetUtils::get_ip_from_interface(const char *interface_name, char *ip, size_t ip_length)
+Expected<std::string> EthernetUtils::get_ip_from_interface(const std::string &interface_name)
{
- CHECK_ARG_NOT_NULL(interface_name);
- CHECK_ARG_NOT_NULL(ip);
-
auto network_interfaces = NetworkInterface::get_all_interfaces();
- CHECK_EXPECTED_AS_STATUS(network_interfaces);
+ CHECK_EXPECTED(network_interfaces);
for (const auto& network_interface : network_interfaces.value()) {
if (network_interface.friendly_name() == interface_name) {
- (void)strncpy(ip, network_interface.ip().c_str(), ip_length);
- return HAILO_SUCCESS;
+ return network_interface.ip();
}
}
- return HAILO_ETH_INTERFACE_NOT_FOUND;
+ return make_unexpected(HAILO_ETH_INTERFACE_NOT_FOUND);
}
} /* namespace hailort */
return HAILO_SUCCESS;
}
+hailo_status Filesystem::remove_directory(const std::string &dir_path)
+{
+ bool was_removed = RemoveDirectoryA(dir_path.c_str());
+ CHECK(was_removed, HAILO_FILE_OPERATION_FAILURE, "Failed to remove directory {}", dir_path);
+ return HAILO_SUCCESS;
+}
+
bool Filesystem::is_path_accesible(const std::string &path)
{
// The code is based on examples from: https://cpp.hotexamples.com/examples/-/-/AccessCheck/cpp-accesscheck-function-examples.html
**/
#include "common/os_utils.hpp"
+#include "common/utils.hpp"
#include "hailo/hailort.h"
#include <windows.h>
return static_cast<uint32_t>(GetCurrentProcessId());
}
+bool OsUtils::is_pid_alive(uint32_t pid)
+{
+ HANDLE hProcess = OpenProcess(PROCESS_QUERY_INFORMATION, FALSE, pid);
+ if (hProcess == NULL) {
+ // Process is not running
+ return false;
+ }
+
+ DWORD exitCode;
+ BOOL result = GetExitCodeProcess(hProcess, &exitCode);
+
+ CloseHandle(hProcess);
+
+ if (result && exitCode == STILL_ACTIVE) {
+ return true;
+ }
+ else {
+ return false;
+ }
+}
+
+void OsUtils::set_current_thread_name(const std::string &name)
+{
+ (void)name;
+}
+
+hailo_status OsUtils::set_current_thread_affinity(uint8_t cpu_index)
+{
+ const DWORD_PTR affinity_mask = static_cast<DWORD_PTR>(1ULL << cpu_index);
+ CHECK(0 != SetThreadAffinityMask(GetCurrentThread(), affinity_mask), HAILO_INTERNAL_FAILURE,
+ "SetThreadAffinityMask failed. LE={}", GetLastError());
+
+ return HAILO_SUCCESS;
+}
+
+static size_t get_page_size_impl()
+{
+ SYSTEM_INFO system_info{};
+ GetSystemInfo(&system_info);
+ return system_info.dwPageSize;
+}
+
+size_t OsUtils::get_page_size()
+{
+ static const auto page_size = get_page_size_impl();
+ return page_size;
+}
+
CursorAdjustment::CursorAdjustment()
{
// Enables Vitual Terminal Processing - enables ANSI Escape Sequences on Windows
{
public:
OsUtils() = delete;
- static uint32_t get_curr_pid();
-
- static void set_current_thread_name(const std::string &name)
- {
- (void)name;
-#ifndef NDEBUG
-#ifndef _WIN32
- // pthread_setname_np name size is limited to 16 chars (including null terminator)
- assert(name.size() < 16);
- pthread_setname_np(pthread_self(), name.c_str());
-#else
-// TODO: implement for windows
-#endif /* _WIN32 */
-#endif /* NDEBUG */
- }
+ static uint32_t get_curr_pid();
+ static bool is_pid_alive(uint32_t pid);
+ static void set_current_thread_name(const std::string &name);
+ static hailo_status set_current_thread_affinity(uint8_t cpu_index);
+ static size_t get_page_size();
};
} /* namespace hailort */
m_module_wrapper(std::move(other.m_module_wrapper)), m_socket_fd(std::exchange(other.m_socket_fd, INVALID_SOCKET))
{};
+ socket_t get_fd() { return m_socket_fd; }
+
static hailo_status ntop(int af, const void *src, char *dst, socklen_t size);
static hailo_status pton(int af, const char *src, void *dst);
#include <spdlog/fmt/bundled/core.h>
#include <map>
#include <set>
+#include <unordered_set>
namespace hailort
#define IS_FIT_IN_UINT8(number) ((std::numeric_limits<uint8_t>::max() >= ((int32_t)(number))) && (std::numeric_limits<uint8_t>::min() <= ((int32_t)(number))))
#define IS_FIT_IN_UINT16(number) ((std::numeric_limits<uint16_t>::max() >= ((int32_t)(number))) && (std::numeric_limits<uint16_t>::min() <= ((int32_t)(number))))
-
+#define IS_FIT_IN_UINT32(number) ((std::numeric_limits<uint32_t>::max() >= ((int64_t)(number))) && (std::numeric_limits<uint32_t>::min() <= ((int64_t)(number))))
template <typename T>
static inline bool contains(const std::vector<T> &container, const T &value)
return (container.find(value) != container.end());
}
+template <typename T>
+static inline bool contains(const std::unordered_set<T> &container, T value)
+{
+ return (container.find(value) != container.end());
+}
+
// From https://stackoverflow.com/questions/57092289/do-stdmake-shared-and-stdmake-unique-have-a-nothrow-version
template <class T, class... Args>
static inline std::unique_ptr<T> make_unique_nothrow(Args&&... args)
} while(0)
#define CHECK_SUCCESS_AS_EXPECTED(status, ...) _CHECK_SUCCESS_AS_EXPECTED(status, ISEMPTY(__VA_ARGS__), "" __VA_ARGS__)
+// Define macro CHECK_IN_DEBUG - that checks cond in debug with CHECK macro but in release does nothing and will get optimized out
+#ifdef NDEBUG
+// In release have this macro do nothing - empty macro
+#define CHECK_IN_DEBUG(cond, ret_val, ...)
+#else // NDEBUG
+#define CHECK_IN_DEBUG(cond, ret_val, ...) CHECK(cond, ret_val, __VA_ARGS__)
+#endif // NDEBUG
+
#ifdef HAILO_SUPPORT_MULTI_PROCESS
#define _CHECK_SUCCESS_AS_RPC_STATUS(status, reply, is_default, fmt, ...) \
do { \
return min_count;
}
+static inline bool is_env_variable_on(const char* env_var_name)
+{
+ auto env_var = std::getenv(env_var_name);
+ return ((nullptr != env_var) && (strnlen(env_var, 2) == 1) && (strncmp(env_var, "1", 1) == 0));
+}
+
} /* namespace hailort */
#endif /* HAILO_UTILS_H_ */
\ No newline at end of file
#define INVALID_VDMA_CHANNEL (0xff)
+#if !defined(__cplusplus) && defined(NTDDI_VERSION)
+#include <wdm.h>
+typedef ULONG uint32_t;
+typedef UCHAR uint8_t;
+typedef USHORT uint16_t;
+typedef ULONGLONG uint64_t;
+typedef uint64_t u64;
+typedef uint32_t u32;
+typedef uint16_t u16;
+typedef uint8_t u8;
+#endif /* !defined(__cplusplus) && defined(NTDDI_VERSION) */
+
+
#ifdef _MSC_VER
#if !defined(bool) && !defined(__cplusplus)
typedef uint8_t bool;
#include <stdint.h>
#include <sys/types.h>
#include <sys/mman.h>
+#include <stdbool.h>
+
// defines for devctl
#define _IOW_ __DIOF
#define _IOR_ __DIOT
/* structure used in ioctl HAILO_DESC_LIST_CREATE */
struct hailo_desc_list_create_params {
size_t desc_count; // in
+ bool is_circular; // in
uintptr_t desc_handle; // out
- // Note: The dma address is required for CONTEXT_SWITCH firmware controls
- uint64_t dma_address; // out
+ uint64_t dma_address; // out
};
/* structure used in ioctl HAILO_NON_LINUX_DESC_LIST_MMAP */
/* structure used in ioctl HAILO_VDMA_BUFFER_SYNC */
enum hailo_vdma_buffer_sync_type {
- HAILO_SYNC_FOR_HOST,
+ HAILO_SYNC_FOR_CPU,
HAILO_SYNC_FOR_DEVICE,
/** Max enum value to maintain ABI Integrity */
service_resource_manager.hpp
${HAILORT_SERVICE_OS_DIR}/hailort_service.cpp
${HAILORT_COMMON_CPP_SOURCES}
+ ${HAILO_FULL_OS_DIR}/event.cpp # TODO HRT-10681: move event.cpp to common
)
target_compile_options(hailort_service PRIVATE ${HAILORT_COMPILE_OPTIONS})
set_property(TARGET hailort_service PROPERTY CXX_STANDARD 14)
});
}
-void HailoRtRpcService::keep_alive()
+hailo_status HailoRtRpcService::abort_input_vstream(uint32_t handle)
{
- while (true) {
- std::this_thread::sleep_for(hailort::HAILO_KEEPALIVE_INTERVAL / 2);
- auto now = std::chrono::high_resolution_clock::now();
+ if (is_input_vstream_aborted(handle)) {
+ return HAILO_SUCCESS;
+ }
+
+ auto lambda = [](std::shared_ptr<InputVStream> input_vstream) {
+ return input_vstream->abort();
+ };
+ auto &manager = ServiceResourceManager<InputVStream>::get_instance();
+ auto status = manager.execute<hailo_status>(handle, lambda);
+ if (HAILO_SUCCESS != status) {
+ LOGGER__ERROR("Failed to abort input vstream with status {}", status);
+ }
+ return status;
+}
+
+hailo_status HailoRtRpcService::abort_output_vstream(uint32_t handle)
+{
+ if (is_output_vstream_aborted(handle)) {
+ return HAILO_SUCCESS;
+ }
+
+ auto lambda = [](std::shared_ptr<OutputVStream> output_vstream) {
+ return output_vstream->abort();
+ };
+ auto &manager = ServiceResourceManager<OutputVStream>::get_instance();
+ auto status = manager.execute<hailo_status>(handle, lambda);
+ if (HAILO_SUCCESS != status) {
+ LOGGER__ERROR("Failed to abort output vstream with status {}", status);
+ }
+ return status;
+}
+
+bool HailoRtRpcService::is_input_vstream_aborted(uint32_t handle)
+{
+ auto lambda = [](std::shared_ptr<InputVStream> input_vstream) {
+ return input_vstream->is_aborted();
+ };
+ auto &manager = ServiceResourceManager<InputVStream>::get_instance();
+ return manager.execute<bool>(handle, lambda);
+}
+
+bool HailoRtRpcService::is_output_vstream_aborted(uint32_t handle)
+{
+ auto lambda = [](std::shared_ptr<OutputVStream> output_vstream) {
+ return output_vstream->is_aborted();
+ };
+ auto &manager = ServiceResourceManager<OutputVStream>::get_instance();
+ return manager.execute<bool>(handle, lambda);
+}
+
+hailo_status HailoRtRpcService::resume_input_vstream(uint32_t handle)
+{
+ if (!is_input_vstream_aborted(handle)) {
+ return HAILO_SUCCESS;
+ }
+
+ auto lambda = [](std::shared_ptr<InputVStream> input_vstream) {
+ return input_vstream->resume();
+ };
+ auto &manager = ServiceResourceManager<InputVStream>::get_instance();
+ auto status = manager.execute<hailo_status>(handle, lambda);
+ if (HAILO_SUCCESS != status) {
+ LOGGER__ERROR("Failed to resume input vstream with status {}", status);
+ }
+ return status;
+}
+
+hailo_status HailoRtRpcService::resume_output_vstream(uint32_t handle)
+{
+ if (!is_output_vstream_aborted(handle)) {
+ return HAILO_SUCCESS;
+ }
+
+ auto lambda = [](std::shared_ptr<OutputVStream> output_vstream) {
+ return output_vstream->resume();
+ };
+ auto &manager = ServiceResourceManager<OutputVStream>::get_instance();
+ auto status = manager.execute<hailo_status>(handle, lambda);
+ if (HAILO_SUCCESS != status) {
+ LOGGER__ERROR("Failed to resume output vstream with status {}", status);
+ }
+ return status;
+}
+
+// TODO: Add a named templated release functions for InputVStream and OutputVStream to call abort before release.
+void HailoRtRpcService::abort_vstreams_by_pids(std::set<uint32_t> &pids)
+{
+ auto inputs_handles = ServiceResourceManager<InputVStream>::get_instance().resources_handles_by_pids(pids);
+ auto outputs_handles = ServiceResourceManager<OutputVStream>::get_instance().resources_handles_by_pids(pids);
+ for (auto &input_handle : inputs_handles) {
+ abort_input_vstream(input_handle);
+ }
+ for (auto &output_handle : outputs_handles) {
+ abort_output_vstream(output_handle);
+ }
+}
+
+
+void HailoRtRpcService::remove_disconnected_clients()
+{
+ std::this_thread::sleep_for(hailort::HAILO_KEEPALIVE_INTERVAL / 2);
+ auto now = std::chrono::high_resolution_clock::now();
+ std::set<uint32_t> pids_to_remove;
+ {
std::unique_lock<std::mutex> lock(m_mutex);
- std::set<uint32_t> pids_to_remove;
for (auto pid_to_last_alive : m_clients_pids) {
auto duration = std::chrono::duration_cast<std::chrono::seconds>(now - pid_to_last_alive.second);
if (duration > hailort::HAILO_KEEPALIVE_INTERVAL) {
- auto client_id = pid_to_last_alive.first;
- pids_to_remove.insert(client_id);
- LOGGER__INFO("Client disconnected, pid: {}", client_id);
- HAILORT_OS_LOG_INFO("Client disconnected, pid: {}", client_id);
- ServiceResourceManager<OutputVStream>::get_instance().release_by_pid(client_id);
- ServiceResourceManager<InputVStream>::get_instance().release_by_pid(client_id);
- ServiceResourceManager<ConfiguredNetworkGroup>::get_instance().release_by_pid(client_id);
- ServiceResourceManager<VDevice>::get_instance().release_by_pid(client_id);
+ auto client_pid = pid_to_last_alive.first;
+ pids_to_remove.insert(client_pid);
}
}
- for (auto &pid : pids_to_remove) {
- m_clients_pids.erase(pid);
+
+ // We abort vstreams before releasing them to avoid cases where the vstream is stuck in execute of a
+ // blocking operation (which will be finished with timeout).
+ // To release the vstream the ServiceResourceManager is waiting for the resource_mutex which is also locked in execute.
+ abort_vstreams_by_pids(pids_to_remove);
+ for (auto &client_pid : pids_to_remove) {
+ ServiceResourceManager<OutputVStream>::get_instance().release_by_pid(client_pid);
+ ServiceResourceManager<InputVStream>::get_instance().release_by_pid(client_pid);
+ ServiceResourceManager<ConfiguredNetworkGroup>::get_instance().release_by_pid(client_pid);
+ ServiceResourceManager<VDevice>::get_instance().release_by_pid(client_pid);
+
+ LOGGER__INFO("Client disconnected, pid: {}", client_pid);
+ HAILORT_OS_LOG_INFO("Client disconnected, pid: {}", client_pid);
+ m_clients_pids.erase(client_pid);
}
}
}
+
+void HailoRtRpcService::keep_alive()
+{
+ while (true) {
+ remove_disconnected_clients();
+ }
+}
+
grpc::Status HailoRtRpcService::client_keep_alive(grpc::ServerContext*, const keepalive_Request *request,
empty*)
{
grpc::Status HailoRtRpcService::VDevice_create(grpc::ServerContext *, const VDevice_create_Request *request,
VDevice_create_Reply *reply)
{
+ remove_disconnected_clients();
+
// Deserialization
const auto params_proto = request->hailo_vdevice_params();
std::vector<hailo_device_id_t> device_ids;
Release_Reply *reply)
{
auto &manager = ServiceResourceManager<VDevice>::get_instance();
- auto status = manager.release_resource(request->handle());
- reply->set_status(static_cast<uint32_t>(status));
+ manager.release_resource(request->handle(), request->pid());
+ reply->set_status(static_cast<uint32_t>(HAILO_SUCCESS));
return grpc::Status::OK;
}
Release_Reply *reply)
{
auto &manager = ServiceResourceManager<ConfiguredNetworkGroup>::get_instance();
- auto status = manager.release_resource(request->handle());
- reply->set_status(static_cast<uint32_t>(status));
+ manager.release_resource(request->handle(), request->pid());
+ reply->set_status(static_cast<uint32_t>(HAILO_SUCCESS));
return grpc::Status::OK;
}
const ConfiguredNetworkGroup_set_scheduler_timeout_Request *request,
ConfiguredNetworkGroup_set_scheduler_timeout_Reply *reply)
{
- auto lambda = [](std::shared_ptr<ConfiguredNetworkGroup> cng, std::chrono::milliseconds timeout_ms) {
- return cng->set_scheduler_timeout(timeout_ms);
+ auto lambda = [](std::shared_ptr<ConfiguredNetworkGroup> cng, std::chrono::milliseconds timeout_ms, std::string network_name) {
+ return cng->set_scheduler_timeout(timeout_ms, network_name);
};
auto &net_group_manager = ServiceResourceManager<ConfiguredNetworkGroup>::get_instance();
- auto status = net_group_manager.execute<hailo_status>(request->handle(), lambda, static_cast<std::chrono::milliseconds>(request->timeout_ms()));
+ auto status = net_group_manager.execute<hailo_status>(request->handle(), lambda, static_cast<std::chrono::milliseconds>(request->timeout_ms()),
+ request->network_name());
reply->set_status(status);
return grpc::Status::OK;
}
};
inputs_params.emplace(param_proto.name(), std::move(params));
}
+ auto network_group_handle = request->net_group();
+ auto client_pid = request->pid();
auto lambda = [](std::shared_ptr<ConfiguredNetworkGroup> cng, const std::map<std::string, hailo_vstream_params_t> &inputs_params) {
return cng->create_input_vstreams(inputs_params);
};
auto &net_group_manager = ServiceResourceManager<ConfiguredNetworkGroup>::get_instance();
- auto vstreams_expected = net_group_manager.execute<Expected<std::vector<InputVStream>>>(request->net_group(), lambda, inputs_params);
+ auto vstreams_expected = net_group_manager.execute<Expected<std::vector<InputVStream>>>(network_group_handle, lambda, inputs_params);
CHECK_EXPECTED_AS_RPC_STATUS(vstreams_expected, reply);
auto vstreams = vstreams_expected.release();
- auto &manager = ServiceResourceManager<InputVStream>::get_instance();
- auto client_pid = request->pid();
+ auto &manager = ServiceResourceManager<InputVStream>::get_instance();
for (size_t i = 0; i < vstreams.size(); i++) {
auto handle = manager.register_resource(client_pid, make_shared_nothrow<InputVStream>(std::move(vstreams[i])));
reply->add_handles(handle);
}
+ net_group_manager.dup_handle(client_pid, network_group_handle);
+
reply->set_status(static_cast<uint32_t>(HAILO_SUCCESS));
return grpc::Status::OK;
}
Release_Reply *reply)
{
auto &manager = ServiceResourceManager<InputVStream>::get_instance();
- auto status = manager.release_resource(request->handle());
- reply->set_status(static_cast<uint32_t>(status));
+ manager.release_resource(request->handle(), request->pid());
+ reply->set_status(static_cast<uint32_t>(HAILO_SUCCESS));
return grpc::Status::OK;
}
output_params.emplace(param_proto.name(), std::move(params));
}
+ auto network_group_handle = request->net_group();
+ auto client_pid = request->pid();
+
auto lambda = [](std::shared_ptr<ConfiguredNetworkGroup> cng, const std::map<std::string, hailo_vstream_params_t> &output_params) {
return cng->create_output_vstreams(output_params);
};
auto &net_group_manager = ServiceResourceManager<ConfiguredNetworkGroup>::get_instance();
- auto vstreams_expected = net_group_manager.execute<Expected<std::vector<OutputVStream>>>(request->net_group(), lambda, output_params);
+ auto vstreams_expected = net_group_manager.execute<Expected<std::vector<OutputVStream>>>(network_group_handle, lambda, output_params);
CHECK_EXPECTED_AS_RPC_STATUS(vstreams_expected, reply);
auto vstreams = vstreams_expected.release();
- auto &manager = ServiceResourceManager<OutputVStream>::get_instance();
- auto client_pid = request->pid();
+ auto &manager = ServiceResourceManager<OutputVStream>::get_instance();
for (size_t i = 0; i < vstreams.size(); i++) {
auto handle = manager.register_resource(client_pid, make_shared_nothrow<OutputVStream>(std::move(vstreams[i])));
reply->add_handles(handle);
}
+ net_group_manager.dup_handle(client_pid, network_group_handle);
+
reply->set_status(static_cast<uint32_t>(HAILO_SUCCESS));
return grpc::Status::OK;
}
grpc::Status HailoRtRpcService::OutputVStream_release(grpc::ServerContext *, const Release_Request *request,
Release_Reply *reply)
{
+ auto was_aborted = is_output_vstream_aborted(request->handle());
+ abort_output_vstream(request->handle());
auto &manager = ServiceResourceManager<OutputVStream>::get_instance();
- auto status = manager.release_resource(request->handle());
+ auto resource = manager.release_resource(request->handle(), request->pid());
+ auto status = HAILO_SUCCESS;
+ if (resource && (!was_aborted)) {
+ status = resource->resume();
+ if (HAILO_SUCCESS != status) {
+ LOGGER__INFO("Failed to resume output vstream {} after destruction", resource->name());
+ }
+ }
reply->set_status(static_cast<uint32_t>(status));
return grpc::Status::OK;
}
auto proto_nms_info_defuse_info = proto_nms_info->mutable_defuse_info();
proto_nms_info_defuse_info->set_class_group_index(stream_info.nms_info.defuse_info.class_group_index);
proto_nms_info_defuse_info->set_original_name(std::string(stream_info.nms_info.defuse_info.original_name));
+ proto_nms_info->set_burst_size(stream_info.nms_info.burst_size);
+ proto_nms_info->set_burst_type(static_cast<ProtoNmsBurstType>(proto_stream_info.nms_info().burst_type()));
} else {
auto proto_stream_shape = proto_stream_info.mutable_stream_shape();
auto proto_stream_shape_shape = proto_stream_shape->mutable_shape();
};
auto &manager = ServiceResourceManager<ConfiguredNetworkGroup>::get_instance();
auto expected_latency_result = manager.execute<Expected<LatencyMeasurementResult>>(request->handle(), lambda, request->network_name());
- CHECK_EXPECTED_AS_RPC_STATUS(expected_latency_result, reply);
- reply->set_avg_hw_latency(static_cast<uint32_t>(expected_latency_result.value().avg_hw_latency.count()));
- reply->set_status(static_cast<uint32_t>(HAILO_SUCCESS));
+ if (HAILO_NOT_AVAILABLE == expected_latency_result.status()) {
+ reply->set_status(static_cast<uint32_t>(HAILO_NOT_AVAILABLE));
+ } else {
+ CHECK_EXPECTED_AS_RPC_STATUS(expected_latency_result, reply);
+ reply->set_avg_hw_latency(static_cast<uint32_t>(expected_latency_result.value().avg_hw_latency.count()));
+ reply->set_status(static_cast<uint32_t>(HAILO_SUCCESS));
+ }
return grpc::Status::OK;
}
return grpc::Status::OK;
}
+grpc::Status HailoRtRpcService::ConfiguredNetworkGroup_get_sorted_output_names(grpc::ServerContext*,
+ const ConfiguredNetworkGroup_get_sorted_output_names_Request *request,
+ ConfiguredNetworkGroup_get_sorted_output_names_Reply *reply)
+{
+ auto lambda = [](std::shared_ptr<ConfiguredNetworkGroup> cng) {
+ return cng->get_sorted_output_names();
+ };
+ auto &manager = ServiceResourceManager<ConfiguredNetworkGroup>::get_instance();
+ auto sorted_output_names_expected = manager.execute<Expected<std::vector<std::string>>>(request->handle(), lambda);
+ CHECK_EXPECTED_AS_RPC_STATUS(sorted_output_names_expected, reply);
+ auto sorted_output_names_proto = reply->mutable_sorted_output_names();
+ for (auto &name : sorted_output_names_expected.value()) {
+ sorted_output_names_proto->Add(std::move(name));
+ }
+ reply->set_status(static_cast<uint32_t>(HAILO_SUCCESS));
+ return grpc::Status::OK;
+}
+
+grpc::Status HailoRtRpcService::ConfiguredNetworkGroup_get_stream_names_from_vstream_name(grpc::ServerContext*,
+ const ConfiguredNetworkGroup_get_stream_names_from_vstream_name_Request *request,
+ ConfiguredNetworkGroup_get_stream_names_from_vstream_name_Reply *reply)
+{
+ auto lambda = [](std::shared_ptr<ConfiguredNetworkGroup> cng, const std::string &vstream_name) {
+ return cng->get_stream_names_from_vstream_name(vstream_name);
+ };
+ auto &manager = ServiceResourceManager<ConfiguredNetworkGroup>::get_instance();
+ auto streams_names_expected = manager.execute<Expected<std::vector<std::string>>>(request->handle(), lambda, request->vstream_name());
+ CHECK_EXPECTED_AS_RPC_STATUS(streams_names_expected, reply);
+ auto streams_names_proto = reply->mutable_streams_names();
+ for (auto &name : streams_names_expected.value()) {
+ streams_names_proto->Add(std::move(name));
+ }
+ reply->set_status(static_cast<uint32_t>(HAILO_SUCCESS));
+ return grpc::Status::OK;
+}
+
+grpc::Status HailoRtRpcService::ConfiguredNetworkGroup_get_vstream_names_from_stream_name(grpc::ServerContext*,
+ const ConfiguredNetworkGroup_get_vstream_names_from_stream_name_Request *request,
+ ConfiguredNetworkGroup_get_vstream_names_from_stream_name_Reply *reply)
+{
+ auto lambda = [](std::shared_ptr<ConfiguredNetworkGroup> cng, const std::string &stream_name) {
+ return cng->get_vstream_names_from_stream_name(stream_name);
+ };
+ auto &manager = ServiceResourceManager<ConfiguredNetworkGroup>::get_instance();
+ auto vstreams_names_expected = manager.execute<Expected<std::vector<std::string>>>(request->handle(), lambda, request->stream_name());
+ CHECK_EXPECTED_AS_RPC_STATUS(vstreams_names_expected, reply);
+ auto vstreams_names_proto = reply->mutable_vstreams_names();
+ for (auto &name : vstreams_names_expected.value()) {
+ vstreams_names_proto->Add(std::move(name));
+ }
+ reply->set_status(static_cast<uint32_t>(HAILO_SUCCESS));
+ return grpc::Status::OK;
+}
+
grpc::Status HailoRtRpcService::InputVStream_get_frame_size(grpc::ServerContext*, const VStream_get_frame_size_Request *request,
VStream_get_frame_size_Reply *reply)
{
grpc::Status HailoRtRpcService::InputVStream_abort(grpc::ServerContext*, const VStream_abort_Request *request,
VStream_abort_Reply *reply)
{
- auto lambda = [](std::shared_ptr<InputVStream> input_vstream) {
- return input_vstream->abort();
- };
- auto &manager = ServiceResourceManager<InputVStream>::get_instance();
- auto status = manager.execute<hailo_status>(request->handle(), lambda);
+ auto status = abort_input_vstream(request->handle());
reply->set_status(status);
return grpc::Status::OK;
}
grpc::Status HailoRtRpcService::OutputVStream_abort(grpc::ServerContext*, const VStream_abort_Request *request,
VStream_abort_Reply *reply)
{
- auto lambda = [](std::shared_ptr<OutputVStream> output_vstream) {
- return output_vstream->abort();
- };
- auto &manager = ServiceResourceManager<OutputVStream>::get_instance();
- auto status = manager.execute<hailo_status>(request->handle(), lambda);
+ auto status = abort_output_vstream(request->handle());
reply->set_status(status);
return grpc::Status::OK;
}
return grpc::Status::OK;
}
+grpc::Status HailoRtRpcService::InputVStream_stop_and_clear(grpc::ServerContext*, const VStream_stop_and_clear_Request *request,
+ VStream_stop_and_clear_Reply *reply)
+{
+ auto lambda = [](std::shared_ptr<InputVStream> input_vstream) {
+ return input_vstream->stop_and_clear();
+ };
+ auto &manager = ServiceResourceManager<InputVStream>::get_instance();
+ auto status = manager.execute<hailo_status>(request->handle(), lambda);
+ reply->set_status(status);
+ return grpc::Status::OK;
+}
+
+grpc::Status HailoRtRpcService::OutputVStream_stop_and_clear(grpc::ServerContext*, const VStream_stop_and_clear_Request *request,
+ VStream_stop_and_clear_Reply *reply)
+{
+ auto lambda = [](std::shared_ptr<OutputVStream> output_vstream) {
+ return output_vstream->stop_and_clear();
+ };
+ auto &manager = ServiceResourceManager<OutputVStream>::get_instance();
+ auto status = manager.execute<hailo_status>(request->handle(), lambda);
+ reply->set_status(status);
+ return grpc::Status::OK;
+}
+
+grpc::Status HailoRtRpcService::InputVStream_start_vstream(grpc::ServerContext*, const VStream_start_vstream_Request *request,
+ VStream_start_vstream_Reply *reply)
+{
+ auto lambda = [](std::shared_ptr<InputVStream> input_vstream) {
+ return input_vstream->start_vstream();
+ };
+ auto &manager = ServiceResourceManager<InputVStream>::get_instance();
+ auto status = manager.execute<hailo_status>(request->handle(), lambda);
+ reply->set_status(status);
+ return grpc::Status::OK;
+}
+
+grpc::Status HailoRtRpcService::OutputVStream_start_vstream(grpc::ServerContext*, const VStream_start_vstream_Request *request,
+ VStream_start_vstream_Reply *reply)
+{
+ auto lambda = [](std::shared_ptr<OutputVStream> output_vstream) {
+ return output_vstream->start_vstream();
+ };
+ auto &manager = ServiceResourceManager<OutputVStream>::get_instance();
+ auto status = manager.execute<hailo_status>(request->handle(), lambda);
+ reply->set_status(status);
+ return grpc::Status::OK;
+}
+
grpc::Status HailoRtRpcService::InputVStream_get_user_buffer_format(grpc::ServerContext*, const VStream_get_user_buffer_format_Request *request,
VStream_get_user_buffer_format_Reply *reply)
{
return grpc::Status::OK;
}
+grpc::Status HailoRtRpcService::InputVStream_is_aborted(grpc::ServerContext*, const VStream_is_aborted_Request *request,
+ VStream_is_aborted_Reply *reply)
+{
+ auto lambda = [](std::shared_ptr<OutputVStream> input_vstream) {
+ return input_vstream->is_aborted();
+ };
+ auto &manager = ServiceResourceManager<OutputVStream>::get_instance();
+ auto is_aborted = manager.execute<bool>(request->handle(), lambda);
+ reply->set_is_aborted(is_aborted);
+ reply->set_status(static_cast<uint32_t>(HAILO_SUCCESS));
+ return grpc::Status::OK;
+}
+
+grpc::Status HailoRtRpcService::OutputVStream_is_aborted(grpc::ServerContext*, const VStream_is_aborted_Request *request,
+ VStream_is_aborted_Reply *reply)
+{
+ auto lambda = [](std::shared_ptr<InputVStream> input_vstream) {
+ return input_vstream->is_aborted();
+ };
+ auto &manager = ServiceResourceManager<InputVStream>::get_instance();
+ auto is_aborted = manager.execute<bool>(request->handle(), lambda);
+ reply->set_is_aborted(is_aborted);
+ reply->set_status(static_cast<uint32_t>(HAILO_SUCCESS));
+ return grpc::Status::OK;
+}
+
}
#endif
#include <thread>
+#include "hailo/hailort.h"
namespace hailort
{
dup_handle_Reply*) override;
virtual grpc::Status OutputVStream_dup_handle(grpc::ServerContext *ctx, const dup_handle_Request *request,
dup_handle_Reply*) override;
+ virtual grpc::Status InputVStream_stop_and_clear(grpc::ServerContext *ctx, const VStream_stop_and_clear_Request *request,
+ VStream_stop_and_clear_Reply*) override;
+ virtual grpc::Status OutputVStream_stop_and_clear(grpc::ServerContext *ctx, const VStream_stop_and_clear_Request *request,
+ VStream_stop_and_clear_Reply*) override;
+ virtual grpc::Status InputVStream_start_vstream(grpc::ServerContext *ctx, const VStream_start_vstream_Request *request,
+ VStream_start_vstream_Reply*) override;
+ virtual grpc::Status OutputVStream_start_vstream(grpc::ServerContext *ctx, const VStream_start_vstream_Request *request,
+ VStream_start_vstream_Reply*) override;
+ virtual grpc::Status InputVStream_is_aborted(grpc::ServerContext *ctx, const VStream_is_aborted_Request *request,
+ VStream_is_aborted_Reply*) override;
+ virtual grpc::Status OutputVStream_is_aborted(grpc::ServerContext *ctx, const VStream_is_aborted_Request *request,
+ VStream_is_aborted_Reply*) override;
virtual grpc::Status ConfiguredNetworkGroup_dup_handle(grpc::ServerContext *ctx, const dup_handle_Request *request,
dup_handle_Reply*) override;
virtual grpc::Status ConfiguredNetworkGroup_get_config_params(grpc::ServerContext*,
const ConfiguredNetworkGroup_get_config_params_Request *request,
ConfiguredNetworkGroup_get_config_params_Reply *reply) override;
+ virtual grpc::Status ConfiguredNetworkGroup_get_sorted_output_names(grpc::ServerContext*,
+ const ConfiguredNetworkGroup_get_sorted_output_names_Request *request,
+ ConfiguredNetworkGroup_get_sorted_output_names_Reply *reply) override;
+ virtual grpc::Status ConfiguredNetworkGroup_get_stream_names_from_vstream_name(grpc::ServerContext*,
+ const ConfiguredNetworkGroup_get_stream_names_from_vstream_name_Request *request,
+ ConfiguredNetworkGroup_get_stream_names_from_vstream_name_Reply *reply) override;
+ virtual grpc::Status ConfiguredNetworkGroup_get_vstream_names_from_stream_name(grpc::ServerContext*,
+ const ConfiguredNetworkGroup_get_vstream_names_from_stream_name_Request *request,
+ ConfiguredNetworkGroup_get_vstream_names_from_stream_name_Reply *reply) override;
private:
void keep_alive();
+ hailo_status abort_input_vstream(uint32_t handle);
+ hailo_status abort_output_vstream(uint32_t handle);
+ hailo_status resume_input_vstream(uint32_t handle);
+ hailo_status resume_output_vstream(uint32_t handle);
+ bool is_input_vstream_aborted(uint32_t handle);
+ bool is_output_vstream_aborted(uint32_t handle);
+ void abort_vstreams_by_pids(std::set<uint32_t> &pids);
+ void remove_disconnected_clients();
std::mutex m_mutex;
std::map<uint32_t, std::chrono::time_point<std::chrono::high_resolution_clock>> m_clients_pids;
[Service]
HAILORT_LOGGER_PATH="/var/log/hailo"
-HAILO_DISABLE_MULTIPLEXER=0
HAILO_MONITOR=0
#include "hailo/expected.hpp"
#include "common/utils.hpp"
+#include "common/os_utils.hpp"
#include <mutex>
#include <shared_mutex>
+#include <unordered_set>
namespace hailort
{
template<class T>
struct Resource {
Resource(uint32_t pid, std::shared_ptr<T> resource)
- : pid(pid), resource(std::move(resource))
- {}
+ : resource(std::move(resource))
+ {
+ pids.insert(pid);
+ }
- uint32_t pid;
std::shared_ptr<T> resource;
+ std::unordered_set<uint32_t> pids;
};
template<class T>
uint32_t dup_handle(uint32_t pid, uint32_t handle)
{
- // Keeping this function for future possible usage
- (void)pid;
+ std::unique_lock<std::mutex> lock(m_mutex);
+ auto resource_expected = resource_lookup(handle);
+ assert(resource_expected);
+ auto resource = resource_expected.release();
+
+ assert(contains(m_resources_mutexes, handle));
+ std::unique_lock<std::shared_timed_mutex> resource_lock(m_resources_mutexes[handle]);
+ resource->pids.insert(pid);
+
return handle;
}
- hailo_status release_resource(uint32_t handle)
+ std::shared_ptr<T> release_resource(uint32_t handle, uint32_t pid)
{
+ std::shared_ptr<T> res = nullptr;
std::unique_lock<std::mutex> lock(m_mutex);
auto found = m_resources.find(handle);
- CHECK(found != m_resources.end(), HAILO_NOT_FOUND, "Failed to release resource with handle {}, resource does not exist", handle);
+ if (found == m_resources.end()) {
+ LOGGER__INFO("Failed to release resource with handle {} and PID {}. The resource no longer exists or may have already been released",
+ handle, pid);
+ return res;
+ }
+
assert(contains(m_resources_mutexes, handle));
auto resource = m_resources[handle];
+ bool release_resource = false;
{
std::unique_lock<std::shared_timed_mutex> resource_lock(m_resources_mutexes[handle]);
- m_resources.erase(handle);
+ resource->pids.erase(pid);
+ if (all_pids_dead(resource)) {
+ release_resource = true;
+ res = resource->resource;
+ m_resources.erase(handle);
+ }
+ }
+ if (release_resource) {
+ m_resources_mutexes.erase(handle);
}
- m_resources_mutexes.erase(handle);
- return HAILO_SUCCESS;
+ return res;
}
- void release_by_pid(uint32_t pid)
+ std::vector<std::shared_ptr<T>> release_by_pid(uint32_t pid)
{
+ std::vector<std::shared_ptr<T>> res;
std::unique_lock<std::mutex> lock(m_mutex);
for (auto iter = m_resources.begin(); iter != m_resources.end(); ) {
auto handle = iter->first;
- if (iter->second->pid == pid) {
+ bool release_resource = false;
+ if (contains(iter->second->pids, pid)) {
assert(contains(m_resources_mutexes, handle));
{
std::unique_lock<std::shared_timed_mutex> resource_lock(m_resources_mutexes[handle]);
- iter = m_resources.erase(iter);
+ iter->second->pids.erase(pid);
+ if (iter->second->pids.empty()) {
+ release_resource = true;
+ res.push_back(iter->second->resource);
+ iter = m_resources.erase(iter);
+ }
}
+ }
+ if (release_resource) {
m_resources_mutexes.erase(handle);
} else {
++iter;
}
}
+
+ return res;
+ }
+
+ std::vector<uint32_t> resources_handles_by_pids(std::set<uint32_t> &pids)
+ {
+ std::unique_lock<std::mutex> lock(m_mutex);
+ std::vector<uint32_t> resources_handles;
+ for (auto &handle_resource_pair : m_resources) {
+ for (auto &pid : pids) {
+ if (contains(handle_resource_pair.second->pids, pid)) {
+ resources_handles.emplace_back(handle_resource_pair.first);
+ }
+ }
+ }
+ return resources_handles;
}
private:
return resource;
}
+ bool all_pids_dead(std::shared_ptr<Resource<T>> resource)
+ {
+ for (auto &pid : resource->pids) {
+ if (OsUtils::is_pid_alive(pid)) {
+ return false;
+ }
+ }
+ return true;
+ }
+
std::mutex m_mutex;
std::atomic<uint32_t> m_current_handle_index;
std::unordered_map<uint32_t, std::shared_ptr<Resource<T>>> m_resources;
@REM Running this script requires Administrator permissions.
reg ADD HKLM\SYSTEM\CurrentControlSet\Services\hailort_service /f /v Environment /t REG_MULTI_SZ /d ^
-HAILORT_LOGGER_PATH="%PROGRAMDATA%\HailoRT_Service\logs"\0^
-HAILO_DISABLE_MULTIPLEXER=0\0
\ No newline at end of file
+HAILORT_LOGGER_PATH="%PROGRAMDATA%\HailoRT_Service\logs"\0^
\ No newline at end of file
run2/run2_command.cpp
run2/network_runner.cpp
- run2/live_printer.cpp
+ run2/live_stats.cpp
run2/timer_live_track.cpp
run2/network_live_track.cpp
run2/measurement_live_track.cpp
+ run2/io_wrappers.cpp
)
-
+
if(UNIX)
# Unix only modules
set(HAILORTCLI_CPP_FILES ${HAILORTCLI_CPP_FILES}
udp_rate_limiter_command.cpp
# TODO: We dont compile download_action_list_command on windows, as it uses packed enums (HRT-5919)
download_action_list_command.cpp
- )
+ measure_nnc_performance_command.cpp
+ )
endif()
# 'config_definitions_json_file' is used in generate_definitions_json_str.in for configure_file()
scheduler_mon_proto)
if(WIN32)
- target_link_libraries(hailortcli Ws2_32 Iphlpapi Shlwapi)
+ target_link_libraries(hailortcli Ws2_32 Iphlpapi Shlwapi winmm.lib)
elseif(CMAKE_SYSTEM_NAME STREQUAL QNX)
target_link_libraries(hailortcli pevents)
endif()
data_json = *reinterpret_cast<CONTEXT_SWITCH_DEFS__enable_nms_action_t *>(action);
action_length_local = sizeof(CONTEXT_SWITCH_DEFS__enable_nms_action_t);
break;
+ case CONTEXT_SWITCH_DEFS__ACTION_TYPE_WRITE_DATA_BY_TYPE:
+ data_json = *reinterpret_cast<CONTEXT_SWITCH_DEFS__write_data_by_type_action_t *>(action);
+ action_length_local = sizeof(CONTEXT_SWITCH_DEFS__write_data_by_type_action_t);
+ break;
+ case CONTEXT_SWITCH_DEFS__ACTION_TYPE_SWITCH_LCU_BATCH:
+ data_json = *reinterpret_cast<CONTEXT_SWITCH_DEFS__switch_lcu_batch_action_data_t *>(action);
+ action_length_local = sizeof(CONTEXT_SWITCH_DEFS__switch_lcu_batch_action_data_t);
+ break;
case CONTEXT_SWITCH_DEFS__ACTION_TYPE_COUNT:
// Fallthrough
// Handling CONTEXT_SWITCH_DEFS__ACTION_TYPE_COUNT is needed because we compile this file with -Wswitch-enum
{
j = unpack_vdma_channel_id(data);
}
+
+void to_json(json& j, const CONTEXT_SWITCH_DEFS__switch_lcu_batch_action_data_t& data) {
+ const auto cluster_index = CONTEXT_SWITCH_DEFS__PACKED_LCU_ID_CLUSTER_INDEX_READ(data.packed_lcu_id);
+ const auto lcu_index = CONTEXT_SWITCH_DEFS__PACKED_LCU_ID_LCU_INDEX_READ(data.packed_lcu_id);
+ const auto network_index = data.network_index;
+ const auto kernel_done_count = data.kernel_done_count;
+ j = json{{"cluster_index", cluster_index}, {"lcu_index", lcu_index}, {"network_index", network_index},
+ {"kernel_done_count", kernel_done_count}};
+}
{CONTEXT_SWITCH_DEFS__ACTION_TYPE_OPEN_BOUNDARY_INPUT_CHANNEL, "open_boundary_input_channel"},
{CONTEXT_SWITCH_DEFS__ACTION_TYPE_OPEN_BOUNDARY_OUTPUT_CHANNEL, "open_boundary_output_channel"},
{CONTEXT_SWITCH_DEFS__ACTION_TYPE_ENABLE_NMS, "enable_nms"},
+ {CONTEXT_SWITCH_DEFS__ACTION_TYPE_WRITE_DATA_BY_TYPE, "write_data_by_type"},
+ {CONTEXT_SWITCH_DEFS__ACTION_TYPE_SWITCH_LCU_BATCH, "switch_lcu_batch"},
};
static_assert(ARRAY_ENTRIES(mapping) == CONTEXT_SWITCH_DEFS__ACTION_TYPE_COUNT,
"Missing a mapping from a CONTEXT_SWITCH_DEFS__ACTION_TYPE_t to it's string value");
NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE(CONTEXT_SWITCH_DEFS__sequencer_interrupt_data_t, sequencer_index);
NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE(CONTEXT_SWITCH_DEFS__wait_nms_data_t, aggregator_index);
NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE(CONTEXT_SWITCH_DEFS__module_config_done_interrupt_data_t, module_index);
-NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE(CONTEXT_SWITCH_DEFS__fetch_ccw_bursts_action_data_t, config_stream_index);
-NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE(CONTEXT_SWITCH_DEFS__enable_nms_action_t, nms_unit_index, network_index);
+NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE(CONTEXT_SWITCH_DEFS__fetch_ccw_bursts_action_data_t, config_stream_index, ccw_bursts);
+NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE(CONTEXT_SWITCH_DEFS__enable_nms_action_t, nms_unit_index, network_index, number_of_classes, burst_size);
+NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE(CONTEXT_SWITCH_DEFS__write_data_by_type_action_t, address, data_type, data, shift, mask, network_index);
// Non-default implementations
void to_json(json &j, const CONTEXT_SWITCH_DEFS__deactivate_vdma_channel_action_data_t &data);
void to_json(json &j, const CONTEXT_SWITCH_DEFS__add_ddr_pair_info_action_data_t &data);
void to_json(json &j, const CONTEXT_SWITCH_DEFS__open_boundary_input_channel_data_t &data);
void to_json(json &j, const CONTEXT_SWITCH_DEFS__open_boundary_output_channel_data_t &data);
+void to_json(json &j, const CONTEXT_SWITCH_DEFS__switch_lcu_batch_action_data_t &data);
#endif /* _HAILO_DOWNLOAD_ACTION_LIST_COMMAND_HPP_ */
#endif
#include "parse_hef_command.hpp"
#include "fw_control_command.hpp"
+#include "measure_nnc_performance_command.hpp"
#include "firmware_header_utils.h"
#include "hailo/hailort.h"
add_subcommand<MonCommand>();
#if defined(__GNUC__)
add_subcommand<UdpRateLimiterCommand>();
+ add_subcommand<HwInferEstimatorCommand>();
#endif
add_subcommand<ParseHefCommand>();
add_subcommand<FwControlCommand>();
Expected<std::vector<std::unique_ptr<Device>>> create_devices(const hailo_device_params &device_params);
Expected<std::vector<std::string>> get_device_ids(const hailo_device_params &device_params);
+
+enum class OptionVisibility {
+ VISIBLE,
+ HIDDEN
+};
+
/**
* CLI11 transformer object, converting enum argument from string.
* Use this object instead of CLI::CheckedTransformer in order
class HailoCheckedTransformer : public CLI::CheckedTransformer
{
public:
- HailoCheckedTransformer(std::vector<std::pair<std::string, EnumType>> values) :
- CLI::CheckedTransformer(values)
+
+ struct Enum
{
- desc_function_ = [values]() {
- return CLI::detail::generate_map(CLI::detail::smart_deref(values), true);
+ std::string name;
+ EnumType value;
+ OptionVisibility visibility = OptionVisibility::VISIBLE;
+
+ std::pair<std::string, EnumType> to_pair() const { return std::make_pair(name, value); }
+ };
+
+ HailoCheckedTransformer(std::vector<Enum> values) :
+ CLI::CheckedTransformer(to_values_vector(values, true)) // Getting hidden value for the enum transformer.
+ {
+ // Hide hidden values for help and autocomplete.
+ const auto non_hidden_values = to_values_vector(values, false);
+
+ desc_function_ = [non_hidden_values]() {
+ return CLI::detail::generate_map(CLI::detail::smart_deref(non_hidden_values), true);
+ };
+
+ autocomplete_func_ = [non_hidden_values](const std::string &) {
+ std::vector<std::string> completions;
+ for (const auto &completion : non_hidden_values) {
+ completions.emplace_back(completion.first);
+ }
+ return completions;
};
}
+
+private:
+ static std::vector<std::pair<std::string, EnumType>> to_values_vector(const std::vector<Enum> &values,
+ bool get_hidden)
+ {
+ std::vector<std::pair<std::string, EnumType>> values_vector;
+ for (const auto &value : values) {
+ if (get_hidden || (value.visibility == OptionVisibility::VISIBLE)) {
+ values_vector.emplace_back(value.to_pair());
+ }
+ }
+ return values_vector;
+
+ }
};
class DeprecationAction
--- /dev/null
+/**
+ * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+ **/
+/**
+ * @file measure_nnc_performance_command.cpp
+* @brief measure nerual network performance for given network using only the HW components without host SW
+ **/
+
+#include "measure_nnc_performance_command.hpp"
+#include "hailortcli.hpp"
+
+#include "hailo/hailort.h"
+#include "hailo/network_group.hpp"
+#include "hailo/hef.hpp"
+#include "hailo/vstream.hpp"
+#include "hailo/vdevice.hpp"
+
+#include <iostream>
+
+#define BYTES_TO_KILOBYTES (1024)
+
+HwInferEstimatorCommand::HwInferEstimatorCommand(CLI::App &parent_app) :
+ Command(parent_app.add_subcommand("measure-nnc-performance",
+ "measure nerual network performance for given network using only the HW components without host SW")),
+ m_params({})
+{
+ // This will make the command to be hidden in the --help print in the command line.
+ m_app->group("");
+
+ add_vdevice_options(m_app, m_params.vdevice_params);
+ m_app->add_option("hef", m_params.hef_path, "Path of the HEF to load")
+ ->check(CLI::ExistingFile)
+ ->required();
+ m_app->add_option("--batch-size", m_params.batch_size,
+ "Inference batch.\n"
+ "This batch applies to the whole network_group.")
+ ->check(CLI::NonNegativeNumber)
+ ->default_val(HAILO_DEFAULT_BATCH_SIZE);
+}
+
+Expected<std::map<std::string, ConfigureNetworkParams>> get_configure_params(const hw_infer_runner_params ¶ms,
+ hailort::Hef &hef, hailo_stream_interface_t interface)
+{
+ std::map<std::string, ConfigureNetworkParams> configure_params{};
+
+ hailo_configure_params_t config_params{};
+ hailo_status status = hailo_init_configure_params(reinterpret_cast<hailo_hef>(&hef), interface, &config_params);
+ CHECK_SUCCESS_AS_EXPECTED(status);
+
+ /* For default case overwrite batch to 1 */
+ uint16_t batch_size = (HAILO_DEFAULT_BATCH_SIZE == params.batch_size ? 1 : params.batch_size);
+
+ /* Fill all network and network group structs with batch size value */
+ for (size_t network_group_idx = 0; network_group_idx < config_params.network_group_params_count; network_group_idx++) {
+ config_params.network_group_params[network_group_idx].batch_size = batch_size;
+ }
+
+ for (size_t network_group_idx = 0; network_group_idx < config_params.network_group_params_count; network_group_idx++) {
+ config_params.network_group_params[network_group_idx].power_mode = params.power_mode;
+ configure_params.emplace(std::string(config_params.network_group_params[network_group_idx].name),
+ ConfigureNetworkParams(config_params.network_group_params[network_group_idx]));
+ }
+
+ return configure_params;
+}
+
+hailo_status HwInferEstimatorCommand::execute()
+{
+ auto devices = create_devices(m_params.vdevice_params.device_params);
+ CHECK_EXPECTED_AS_STATUS(devices, "Failed creating device");
+ /* This function supports controls for multiple devices.
+ We validate there is only 1 device generated as we are on a single device flow */
+ CHECK(1 == devices->size(), HAILO_INTERNAL_FAILURE, "Hw infer command support only one physical device");
+ auto &device = devices.value()[0];
+
+ auto hef = Hef::create(m_params.hef_path.c_str());
+ CHECK_EXPECTED_AS_STATUS(hef, "Failed reading hef file {}", m_params.hef_path);
+
+ auto interface = device->get_default_streams_interface();
+ CHECK_EXPECTED_AS_STATUS(interface, "Failed to get default streams interface");
+
+ auto configure_params = get_configure_params(m_params, hef.value(), interface.value());
+ CHECK_EXPECTED_AS_STATUS(configure_params);
+
+ /* Use Env var to configure all desc list with max depth */
+ setenv("HAILO_CONFIGURE_FOR_HW_INFER","Y",1);
+ auto network_group_list = device->configure(hef.value(), configure_params.value());
+ CHECK_EXPECTED_AS_STATUS(network_group_list, "Failed configure device from hef");
+ unsetenv("HAILO_CONFIGURE_FOR_HW_INFER");
+
+ CHECK(1 == network_group_list->size(), HAILO_INVALID_OPERATION,
+ "HW Inference is not supported on HEFs with multiple network groups");
+
+ auto network_group_ptr = network_group_list.value()[0];
+
+ std::cout << "Starting HW infer Estimator..." << std::endl;
+
+ auto results = network_group_ptr->run_hw_infer_estimator();
+ CHECK_EXPECTED_AS_STATUS(results);
+
+ std::cout << std::endl;
+ std::cout << "======================" << std::endl;
+ std::cout << " Summary" << std::endl;
+ std::cout << "======================" << std::endl;
+
+ std::cout << "Batch count: " << results->batch_count << std::endl;
+ std::cout << "Total transfer size [KB]: " << (results->total_transfer_size / BYTES_TO_KILOBYTES) << std::endl;
+ std::cout << "Total frames passed: " << results->total_frames_passed << std::endl;
+ std::cout << "Total time [s]: " << results->time_sec << std::endl;
+ std::cout << "Total FPS [1/s]: " << results->fps << std::endl;
+ std::cout << "BW [Gbps]: " << results->BW_Gbps << std::endl;
+
+ std::cout << "======================" << std::endl;
+ std::cout << " End of report" << std::endl;
+ std::cout << "======================" << std::endl;
+ return HAILO_SUCCESS;
+}
--- /dev/null
+/**
+ * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+ **/
+/**
+ * @file measure_nnc_performance_command.hpp
+ * @brief measure nerual network performance for given network using only the HW components without host SW
+ **/
+
+#ifndef _HAILO_HW_INFER_ESTIMATOR_COMMAND_HPP_
+#define _HAILO_HW_INFER_ESTIMATOR_COMMAND_HPP_
+
+#include "hailortcli.hpp"
+#include "command.hpp"
+#include "CLI/CLI.hpp"
+
+struct hw_infer_runner_params {
+ hailo_vdevice_params vdevice_params;
+ std::string hef_path;
+ uint16_t batch_size;
+ hailo_power_mode_t power_mode;
+};
+
+class HwInferEstimatorCommand : public Command {
+public:
+ explicit HwInferEstimatorCommand(CLI::App &parent_app);
+ hailo_status execute() override;
+
+private:
+ hw_infer_runner_params m_params;
+};
+
+#endif /*_HAILO_HW_INFER_ESTIMATOR_COMMAND_HPP_*/
\ No newline at end of file
#include "hailortcli.hpp"
#include "command.hpp"
-#include "vdevice/scheduler/scheduler_mon.hpp"
+#include "utils/profiler/monitor_handler.hpp"
#include "CLI/CLI.hpp"
CHECK_EXPECTED_AS_STATUS(hef_exp, "Failed to parse HEF");
auto hef = hef_exp.release();
- auto hef_info = hef.get_hef_description(stream_infos, vstream_infos);
+ auto hef_info = hef.get_description(stream_infos, vstream_infos);
CHECK_EXPECTED_AS_STATUS(hef_info, "Failed to parse HEF");
std::cout << hef_info.release();
return HAILO_SUCCESS;
--- /dev/null
+/**
+ * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+**/
+/**
+ * @file io_wrappers.cpp
+ **/
+
+#include "io_wrappers.hpp"
+
+FramerateThrottle::FramerateThrottle(uint32_t framerate) :
+ m_framerate(framerate),
+ m_framerate_interval(std::chrono::duration<double>(1) / framerate),
+ m_last_write_time(std::chrono::steady_clock::now())
+{}
+
+void FramerateThrottle::throttle()
+{
+ if (m_framerate == UNLIMITED_FRAMERATE) {
+ return;
+ }
+
+ const auto elapsed_time = std::chrono::steady_clock::now() - m_last_write_time;
+ std::this_thread::sleep_for(m_framerate_interval - elapsed_time);
+ m_last_write_time = std::chrono::steady_clock::now();
+}
--- /dev/null
+/**
+ * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+**/
+/**
+ * @file io_wrappers.hpp
+ * @brief Wrappers for Input/Output Stream/VStream. Manages buffer allocation, framerate throttle, latency meter and
+ * more.
+ **/
+
+#ifndef _HAILO_IO_WRAPPERS_HPP_
+#define _HAILO_IO_WRAPPERS_HPP_
+
+#include "network_live_track.hpp"
+
+#include "common/file_utils.hpp"
+#include "common/latency_meter.hpp"
+
+#include <chrono>
+#include <string>
+
+using namespace hailort;
+
+constexpr uint32_t UNLIMITED_FRAMERATE = 0;
+
+#ifndef HAILO_EMULATOR
+constexpr std::chrono::milliseconds HAILORTCLI_DEFAULT_TIMEOUT(HAILO_DEFAULT_VSTREAM_TIMEOUT_MS);
+#else /* ifndef HAILO_EMULATOR */
+constexpr std::chrono::milliseconds HAILORTCLI_DEFAULT_TIMEOUT(HAILO_DEFAULT_VSTREAM_TIMEOUT_MS * 100);
+#endif /* ifndef HAILO_EMULATOR */
+
+
+class FramerateThrottle final
+{
+public:
+ FramerateThrottle(uint32_t framerate);
+ ~FramerateThrottle() = default;
+ void throttle();
+
+private:
+ const uint32_t m_framerate;
+ const std::chrono::duration<double> m_framerate_interval;
+ decltype(std::chrono::steady_clock::now()) m_last_write_time;
+};
+
+// Wrapper for InputStream or InputVStream objects.
+template<typename Writer>
+class WriterWrapper final
+{
+public:
+ template<typename WriterParams>
+ static Expected<std::shared_ptr<WriterWrapper>> create(Writer &writer, const WriterParams ¶ms,
+ const LatencyMeterPtr &overall_latency_meter, uint32_t framerate)
+ {
+ auto dataset = create_dataset(writer, params);
+ CHECK_EXPECTED(dataset);
+
+ std::shared_ptr<WriterWrapper> wrapper(
+ new (std::nothrow) WriterWrapper(writer, dataset.release(), overall_latency_meter, framerate));
+ CHECK_NOT_NULL_AS_EXPECTED(wrapper, HAILO_OUT_OF_HOST_MEMORY);
+
+ return wrapper;
+ }
+
+ Writer &get() { return m_writer.get(); }
+ Writer &get() const { return m_writer.get(); }
+
+ hailo_status write()
+ {
+ before_write_start();
+ auto status = get().write(MemoryView(*next_buffer()));
+ if (HAILO_SUCCESS != status) {
+ return status;
+ }
+
+ m_framerate_throttle.throttle();
+ return HAILO_SUCCESS;
+ }
+
+ hailo_status wait_for_async_ready()
+ {
+ return get().wait_for_async_ready(m_dataset[0]->size(), HAILORTCLI_DEFAULT_TIMEOUT);
+ }
+
+ hailo_status write_async(typename Writer::TransferDoneCallback callback)
+ {
+ before_write_start();
+ // We can use the same buffer for multiple writes simultaneously. That is OK since we don't modify the buffers.
+ auto status = get().write_async(MemoryView(*next_buffer()), callback);
+ if (HAILO_SUCCESS != status) {
+ return status;
+ }
+
+ m_framerate_throttle.throttle();
+ return HAILO_SUCCESS;
+ }
+
+private:
+ WriterWrapper(Writer &writer, std::vector<BufferPtr> &&dataset, const LatencyMeterPtr &overall_latency_meter,
+ uint32_t framerate) :
+ m_writer(std::ref(writer)),
+ m_dataset(std::move(dataset)),
+ m_overall_latency_meter(overall_latency_meter),
+ m_framerate_throttle(framerate)
+ {}
+
+ void before_write_start()
+ {
+ if (m_overall_latency_meter) {
+ m_overall_latency_meter->add_start_sample(std::chrono::steady_clock::now().time_since_epoch());
+ }
+ }
+
+ size_t next_buffer_index()
+ {
+ const auto index = m_current_buffer_index;
+ m_current_buffer_index = (m_current_buffer_index + 1) % m_dataset.size();
+ return index;
+ }
+
+ BufferPtr next_buffer()
+ {
+ return m_dataset[next_buffer_index()];
+ }
+
+ template<typename WriterParams>
+ static Expected<std::vector<BufferPtr>> create_dataset(Writer &writer, const WriterParams ¶ms)
+ {
+ if (params.input_file_path.empty()) {
+ return create_constant_dataset(writer.get_frame_size());
+ } else {
+ return create_dataset_from_input_file(params.input_file_path, writer.get_frame_size());
+ }
+ }
+
+ static Expected<std::vector<BufferPtr>> create_constant_dataset(size_t frame_size)
+ {
+ const uint8_t const_byte = 0xAB;
+ auto constant_buffer = Buffer::create_shared(frame_size, const_byte, BufferStorageParams::create_dma());
+ CHECK_EXPECTED(constant_buffer);
+
+ return std::vector<BufferPtr>{constant_buffer.release()};
+ }
+
+ static Expected<std::vector<BufferPtr>> create_dataset_from_input_file(const std::string &file_path, size_t frame_size)
+ {
+ auto buffer = read_binary_file(file_path);
+ CHECK_EXPECTED(buffer);
+ CHECK_AS_EXPECTED(0 == (buffer->size() % frame_size), HAILO_INVALID_ARGUMENT,
+ "Input file ({}) size {} must be a multiple of the frame size {}",
+ file_path, buffer->size(), frame_size);
+
+ auto buffer_ptr = make_shared_nothrow<Buffer>(buffer.release());
+ CHECK_NOT_NULL_AS_EXPECTED(buffer_ptr, HAILO_OUT_OF_HOST_MEMORY);
+
+ std::vector<BufferPtr> dataset;
+ const size_t frames_count = buffer->size() / frame_size;
+ dataset.reserve(frames_count);
+ for (size_t i = 0; i < frames_count; i++) {
+ const auto offset = frame_size * i;
+ auto frame_buffer = Buffer::create_shared(buffer->data() + offset, frame_size, BufferStorageParams::create_dma());
+ CHECK_EXPECTED(frame_buffer);
+ dataset.emplace_back(frame_buffer.release());
+ }
+
+ return dataset;
+ }
+
+ std::reference_wrapper<Writer> m_writer;
+
+ std::vector<BufferPtr> m_dataset;
+ size_t m_current_buffer_index = 0;
+
+ LatencyMeterPtr m_overall_latency_meter;
+ FramerateThrottle m_framerate_throttle;
+};
+
+template<typename Writer>
+using WriterWrapperPtr = std::shared_ptr<WriterWrapper<Writer>>;
+
+// Wrapper for OutputStream or OutputVStream objects.
+// We use std::enable_from_this because on async api the callback is using `this`. We want to increase the reference
+// count until the callback is over.
+template<typename Reader>
+class ReaderWrapper final : public std::enable_shared_from_this<ReaderWrapper<Reader>>
+{
+public:
+ static Expected<std::shared_ptr<ReaderWrapper>> create(Reader &reader, const LatencyMeterPtr &overall_latency_meter,
+ std::shared_ptr<NetworkLiveTrack> net_live_track)
+ {
+ auto buffer = Buffer::create_shared(reader.get_frame_size(), BufferStorageParams::create_dma());
+ CHECK_EXPECTED(buffer);
+
+ std::shared_ptr<ReaderWrapper> wrapper(
+ new (std::nothrow) ReaderWrapper(reader, buffer.release(), overall_latency_meter, net_live_track));
+ CHECK_NOT_NULL_AS_EXPECTED(wrapper, HAILO_OUT_OF_HOST_MEMORY);
+
+ return wrapper;
+ }
+
+ Reader &get() { return m_reader.get(); }
+ Reader &get() const { return m_reader.get(); }
+
+ hailo_status read()
+ {
+ auto status = get().read(MemoryView(*m_buffer));
+ if (HAILO_SUCCESS != status) {
+ return status;
+ }
+
+ on_read_done();
+ return HAILO_SUCCESS;
+ }
+
+ hailo_status wait_for_async_ready()
+ {
+ return get().wait_for_async_ready(m_buffer->size(), HAILORTCLI_DEFAULT_TIMEOUT);
+ }
+
+ hailo_status read_async(typename Reader::TransferDoneCallback callback)
+ {
+ auto self = std::enable_shared_from_this<ReaderWrapper<Reader>>::shared_from_this();
+ return get().read_async(MemoryView(*m_buffer),
+ [self, original=callback](const typename Reader::CompletionInfo &completion_info) {
+ original(completion_info);
+ if (completion_info.status == HAILO_SUCCESS) {
+ self->on_read_done();
+ }
+ });
+ }
+
+private:
+ ReaderWrapper(Reader &reader, BufferPtr &&buffer, const LatencyMeterPtr &overall_latency_meter,
+ std::shared_ptr<NetworkLiveTrack> net_live_track) :
+ m_reader(std::ref(reader)),
+ m_buffer(std::move(buffer)),
+ m_overall_latency_meter(overall_latency_meter),
+ m_net_live_track(net_live_track)
+ {}
+
+ void on_read_done()
+ {
+ if (m_overall_latency_meter) {
+ m_overall_latency_meter->add_end_sample(get().name(), std::chrono::steady_clock::now().time_since_epoch());
+ }
+
+ if (m_net_live_track) {
+ m_net_live_track->progress();
+ }
+ }
+
+ std::reference_wrapper<Reader> m_reader;
+ BufferPtr m_buffer;
+ LatencyMeterPtr m_overall_latency_meter;
+ std::shared_ptr<NetworkLiveTrack> m_net_live_track;
+};
+
+template<typename Reader>
+using ReaderWrapperPtr = std::shared_ptr<ReaderWrapper<Reader>>;
+
+#endif /* _HAILO_IO_WRAPPERS_HPP_ */
+++ /dev/null
-/**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
- * Distributed under the MIT license (https://opensource.org/licenses/MIT)
- **/
-/**
- * @file live_printer.cpp
- * @brief Live printer
- **/
-
-#include "live_printer.hpp"
-#include "../common.hpp"
-#include "common/os_utils.hpp"
-#include "common/utils.hpp"
-#include <sstream>
-#include <iostream>
-
-using namespace hailort;
-
-LivePrinter::LivePrinter(std::chrono::milliseconds interval) :
- m_interval(interval),
- m_stop_event(Event::create_shared(Event::State::not_signalled)),
- m_tracks(),
- m_mutex(),
- m_prev_count(0),
- m_enable_ansi_escape_sequences(CursorAdjustment())
-{
-}
-
-LivePrinter::~LivePrinter()
-{
- (void)m_stop_event->signal();
- if (m_thread.joinable()) {
- m_thread.join();
- }
- print();
-}
-
-void LivePrinter::add(std::shared_ptr<Track> track, uint8_t level)
-{
- std::unique_lock<std::mutex> lock(m_mutex);
- if (!contains(m_tracks, level)) {
- m_tracks[level] = {};
- }
- m_tracks[level].emplace_back(track);
-}
-
-void LivePrinter::print()
-{
- std::stringstream ss;
- uint32_t count = 0;
-
- {
- std::unique_lock<std::mutex> lock(m_mutex);
- for (auto &level_pair : m_tracks) {
- for (auto &track : level_pair.second) {
- count += track->get_text(ss);
- }
- }
- }
- CliCommon::reset_cursor(m_prev_count);
- // On the first print m_prev_count = 0, so no lines will be deleted
- std::cout << ss.str() << std::flush;
- m_prev_count = count;
-}
-
-hailo_status LivePrinter::start()
-{
- for (auto &level_pair : m_tracks) {
- for (auto &track : level_pair.second) {
- CHECK_SUCCESS(track->start());
- }
- }
-
- m_thread = std::thread([this] () {
- OsUtils::set_current_thread_name("LIVE_PRINTER");
- while (true) {
- print();
- auto status = m_stop_event->wait(m_interval);
- if (HAILO_TIMEOUT != status) {
- break;
- }
- }
- });
-
- return HAILO_SUCCESS;
-}
+++ /dev/null
-/**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
- * Distributed under the MIT license (https://opensource.org/licenses/MIT)
- **/
-/**
- * @file live_printer.hpp
- * @brief Live printer
- **/
-
-#ifndef _HAILO_HAILORTCLI_RUN2_LIVE_PRINTER_HPP_
-#define _HAILO_HAILORTCLI_RUN2_LIVE_PRINTER_HPP_
-
-#include "common/os_utils.hpp"
-#include "hailo/event.hpp"
-#include <stdint.h>
-#include <chrono>
-#include <mutex>
-#include <thread>
-#include <atomic>
-#include <map>
-
-class LivePrinter final
-{
-public:
- class Track
- {
- public:
- Track() : m_started(false)
- {}
-
- virtual hailo_status start() = 0;
- virtual uint32_t get_text(std::stringstream &ss) = 0;
-
- protected:
- bool m_started;
- };
-
- LivePrinter(std::chrono::milliseconds interval);
- ~LivePrinter();
- void add(std::shared_ptr<Track> track, uint8_t level); // prints tracks in consecutive order from low-to-high levels
- void print();
- hailo_status start();
-
-private:
- std::chrono::milliseconds m_interval;
- hailort::EventPtr m_stop_event;
- std::map<uint8_t, std::vector<std::shared_ptr<Track>>> m_tracks;
- std::thread m_thread;
- std::mutex m_mutex;
- uint32_t m_prev_count;
- hailort::CursorAdjustment m_enable_ansi_escape_sequences;
-};
-
-#endif /* _HAILO_HAILORTCLI_RUN2_LIVE_PRINTER_HPP_ */
\ No newline at end of file
--- /dev/null
+/**
+ * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+ **/
+/**
+ * @file live_stats.cpp
+ * @brief Live stats
+ **/
+
+#include "live_stats.hpp"
+#include "../common.hpp"
+#include "common/os_utils.hpp"
+#include "common/utils.hpp"
+#include <nlohmann/json.hpp>
+#include <sstream>
+#include <iostream>
+
+using namespace hailort;
+
+hailo_status LiveStats::Track::start()
+{
+ CHECK_SUCCESS(start_impl());
+ m_started = true;
+ return HAILO_SUCCESS;
+}
+
+uint32_t LiveStats::Track::push_text(std::stringstream &ss)
+{
+ if (!m_started) {
+ return 0;
+ }
+ return push_text_impl(ss);
+}
+
+void LiveStats::Track::push_json(nlohmann::ordered_json &json)
+{
+ if (!m_started) {
+ return;
+ }
+ push_json_impl(json);
+}
+
+LiveStats::LiveStats(std::chrono::milliseconds interval) :
+ m_running(false),
+ m_interval(interval),
+ m_stop_event(Event::create_shared(Event::State::not_signalled)),
+ m_tracks(),
+ m_mutex(),
+ m_prev_count(0),
+ m_enable_ansi_escape_sequences(CursorAdjustment())
+{
+}
+
+LiveStats::~LiveStats()
+{
+ stop();
+ print();
+}
+
+void LiveStats::add(std::shared_ptr<Track> track, uint8_t level)
+{
+ std::unique_lock<std::mutex> lock(m_mutex);
+ m_tracks[level].emplace_back(track);
+}
+
+void LiveStats::print()
+{
+ std::stringstream ss;
+ uint32_t count = 0;
+
+ {
+ std::unique_lock<std::mutex> lock(m_mutex);
+ for (auto &level_pair : m_tracks) {
+ for (auto &track : level_pair.second) {
+ count += track->push_text(ss);
+ }
+ }
+ }
+ CliCommon::reset_cursor(m_prev_count);
+ // On the first print m_prev_count = 0, so no lines will be deleted
+ std::cout << ss.str() << std::flush;
+ m_prev_count = count;
+}
+
+hailo_status LiveStats::dump_stats(const std::string &json_path, const std::string &inference_mode)
+{
+ stop(); // stop measuring before creating json because we want the json to hold the last measurements
+ nlohmann::ordered_json json;
+
+ auto time = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now());
+ auto str_time = std::string(std::ctime(&time));
+ if (str_time.length()){
+ str_time.pop_back();
+ }
+
+ json["time"] = str_time;
+ json["inference_mode"] = inference_mode;
+ json["network_groups"] = nlohmann::ordered_json::array();
+
+ std::unique_lock<std::mutex> lock(m_mutex);
+ for (auto &level_pair : m_tracks) {
+ for (auto &track : level_pair.second) {
+ track->push_json(json);
+ }
+ }
+
+ std::ofstream output_json(json_path);
+ CHECK(output_json, HAILO_FILE_OPERATION_FAILURE, "Failed opening file '{}'", json_path);
+
+ output_json << std::setw(4) << json << std::endl; // 4: amount of spaces to indent (for pretty printing)
+ CHECK(!output_json.bad() && !output_json.fail(), HAILO_FILE_OPERATION_FAILURE,
+ "Failed writing to file '{}'", json_path);
+
+ return HAILO_SUCCESS;
+}
+
+hailo_status LiveStats::start()
+{
+ // In order to re-start LiveStats, we should add m_stop_event->reset() here
+ m_running = true;
+ for (auto &level_pair : m_tracks) {
+ for (auto &track : level_pair.second) {
+ CHECK_SUCCESS(track->start());
+ }
+ }
+
+ m_thread = std::thread([this] () {
+ OsUtils::set_current_thread_name("LIVE_PRINTER");
+ while (true) {
+ print();
+ auto status = m_stop_event->wait(m_interval);
+ if (HAILO_TIMEOUT != status) {
+ break;
+ }
+ }
+ });
+ return HAILO_SUCCESS;
+}
+
+void LiveStats::stop()
+{
+ if (m_running){
+ (void)m_stop_event->signal();
+ if (m_thread.joinable()) {
+ m_thread.join();
+ }
+ m_running = false;
+ }
+}
--- /dev/null
+/**
+ * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+ **/
+/**
+ * @file live_stats.hpp
+ * @brief Live stats
+ **/
+
+#ifndef _HAILO_HAILORTCLI_RUN2_LIVE_STATS_HPP_
+#define _HAILO_HAILORTCLI_RUN2_LIVE_STATS_HPP_
+
+#include "common/os_utils.hpp"
+#include "hailo/event.hpp"
+#include <nlohmann/json.hpp>
+#include <stdint.h>
+#include <chrono>
+#include <mutex>
+#include <thread>
+#include <atomic>
+#include <map>
+
+class LiveStats final
+{
+public:
+ class Track
+ {
+ public:
+ Track() : m_started(false)
+ {}
+
+ hailo_status start();
+ uint32_t push_text(std::stringstream &ss);
+ void push_json(nlohmann::ordered_json &json);
+
+ protected:
+ virtual hailo_status start_impl() = 0;
+ virtual uint32_t push_text_impl(std::stringstream &ss) = 0;
+ virtual void push_json_impl(nlohmann::ordered_json &json) = 0;
+
+ bool m_started;
+ };
+
+ LiveStats(std::chrono::milliseconds interval);
+ ~LiveStats();
+ void add(std::shared_ptr<Track> track, uint8_t level); // prints tracks in consecutive order from low-to-high levels
+ void print();
+ hailo_status dump_stats(const std::string &json_path, const std::string &inference_mode);
+ hailo_status start();
+ void stop();
+
+private:
+ bool m_running;
+ std::chrono::milliseconds m_interval;
+ hailort::EventPtr m_stop_event;
+ std::map<uint8_t, std::vector<std::shared_ptr<Track>>> m_tracks;
+ std::thread m_thread;
+ std::mutex m_mutex;
+ uint32_t m_prev_count;
+ hailort::CursorAdjustment m_enable_ansi_escape_sequences;
+};
+
+#endif /* _HAILO_HAILORTCLI_RUN2_LIVE_STATS_HPP_ */
\ No newline at end of file
#include <spdlog/fmt/fmt.h>
#include <sstream>
-
using namespace hailort;
Expected<std::shared_ptr<MeasurementLiveTrack>> MeasurementLiveTrack::create_shared(Device &device, bool measure_power, bool measure_current,
MeasurementLiveTrack::MeasurementLiveTrack(std::shared_ptr<PowerMeasurement> power_measurement,
std::shared_ptr<PowerMeasurement> current_measurement, std::shared_ptr<TemperatureMeasurement> temp_measurement,
const std::string &device_id) :
- LivePrinter::Track(), m_power_measurement(std::move(power_measurement)), m_current_measurement(std::move(current_measurement)),
+ LiveStats::Track(), m_power_measurement(std::move(power_measurement)), m_current_measurement(std::move(current_measurement)),
m_temp_measurement(std::move(temp_measurement)), m_device_id(device_id)
{}
-hailo_status MeasurementLiveTrack::start()
+hailo_status MeasurementLiveTrack::start_impl()
{
if (m_power_measurement) {
CHECK_SUCCESS(m_power_measurement->start_measurement());
}
-
if (m_current_measurement) {
CHECK_SUCCESS(m_current_measurement->start_measurement());
}
-
if (m_temp_measurement) {
CHECK_SUCCESS(m_temp_measurement->start_measurement());
}
- m_started = true;
-
return HAILO_SUCCESS;
}
-uint32_t MeasurementLiveTrack::get_text(std::stringstream &ss)
+uint32_t MeasurementLiveTrack::push_text_impl(std::stringstream &ss)
{
- if (!m_started) {
- return 0;
- }
-
auto rows_count = 0;
if (m_power_measurement || m_current_measurement || m_temp_measurement) {
}
return rows_count;
+}
+
+void MeasurementLiveTrack::push_json_measurment_val(nlohmann::ordered_json &device_json, std::shared_ptr<BaseMeasurement> measurment, const std::string &measurment_name)
+{
+ auto measurment_info = measurment->get_data();
+ auto measurement_unit = measurment->measurement_unit();
+ auto min = measurment_info.min();
+ auto max = measurment_info.max();
+ auto mean = measurment_info.mean();
+ if (min && max && mean){
+ device_json[measurment_name] = {
+ {"min", std::to_string(min.value()) + " " + measurement_unit},
+ {"max", std::to_string(max.value()) + " " + measurement_unit},
+ {"average", std::to_string(mean.value()) + " " + measurement_unit}
+ };
+ }
+}
+
+void MeasurementLiveTrack::push_json_impl(nlohmann::ordered_json &json)
+{
+ nlohmann::ordered_json device_json;
+ device_json["device_id"] = m_device_id;
+
+ if (m_power_measurement){
+ push_json_measurment_val(device_json, m_power_measurement, "power");
+ }
+ if (m_current_measurement){
+ push_json_measurment_val(device_json, m_current_measurement, "current");
+ }
+ if (m_temp_measurement){
+ push_json_measurment_val(device_json, m_temp_measurement, "temperature");
+ }
+ json["devices"].emplace_back(device_json);
}
\ No newline at end of file
#include "hailo/hailort.h"
#include "common/device_measurements.hpp"
+#include "live_stats.hpp"
-#include "live_printer.hpp"
+#include <nlohmann/json.hpp>
-
-class MeasurementLiveTrack : public LivePrinter::Track
+class MeasurementLiveTrack : public LiveStats::Track
{
public:
static hailort::Expected<std::shared_ptr<MeasurementLiveTrack>> create_shared(hailort::Device &vdevice, bool measure_power,
bool measure_current, bool measure_temp);
virtual ~MeasurementLiveTrack() = default;
- virtual hailo_status start() override;
- virtual uint32_t get_text(std::stringstream &ss) override;
+ virtual hailo_status start_impl() override;
+ virtual uint32_t push_text_impl(std::stringstream &ss) override;
+ virtual void push_json_impl(nlohmann::ordered_json &json) override;
MeasurementLiveTrack(std::shared_ptr<PowerMeasurement> power_measurement, std::shared_ptr<PowerMeasurement> current_measurement,
std::shared_ptr<TemperatureMeasurement> temp_measurement, const std::string &device_id);
private:
+ void push_json_measurment_val(nlohmann::ordered_json &device_json, std::shared_ptr<BaseMeasurement> measurment, const std::string &measurment_name);
std::shared_ptr<PowerMeasurement> m_power_measurement;
std::shared_ptr<PowerMeasurement> m_current_measurement;
std::shared_ptr<TemperatureMeasurement> m_temp_measurement;
#include <spdlog/fmt/fmt.h>
#include <sstream>
-NetworkLiveTrack::NetworkLiveTrack(const std::string &name, std::shared_ptr<ConfiguredNetworkGroup> cng, LatencyMeterPtr overall_latency_meter) :
- m_name(name), m_count(0), m_last_get_time(), m_cng(cng), m_overall_latency_meter(overall_latency_meter)
+size_t NetworkLiveTrack::max_ng_name = 0;
+std::mutex NetworkLiveTrack::mutex;
+
+NetworkLiveTrack::NetworkLiveTrack(const std::string &name, std::shared_ptr<ConfiguredNetworkGroup> cng,
+ LatencyMeterPtr overall_latency_meter, bool measure_fps, const std::string &hef_path) :
+ m_name(name),
+ m_count(0),
+ m_last_get_time(),
+ m_cng(cng),
+ m_overall_latency_meter(overall_latency_meter),
+ m_measure_fps(measure_fps),
+ m_hef_path(hef_path)
{
+ std::lock_guard<std::mutex> lock(mutex);
+ max_ng_name = std::max(m_name.size(), max_ng_name);
}
-hailo_status NetworkLiveTrack::start()
+hailo_status NetworkLiveTrack::start_impl()
{
m_last_get_time = std::chrono::steady_clock::now();
m_count = 0;
- m_started = true;
return HAILO_SUCCESS;
}
-uint32_t NetworkLiveTrack::get_text(std::stringstream &ss)
+double NetworkLiveTrack::get_fps()
{
- if (!m_started) {
- return 0;
- }
-
auto elapsed_time = std::chrono::steady_clock::now() - m_last_get_time;
auto count = m_count.load();
-
auto fps = count / std::chrono::duration<double>(elapsed_time).count();
- ss << fmt::format("{}:\n\t| fps: {:.2f}", m_name, fps);
+ return fps;
+}
+
+uint32_t NetworkLiveTrack::push_text_impl(std::stringstream &ss)
+{
+ ss << fmt::format("{}:", m_name);
+ ss << std::string(max_ng_name - m_name.size(), ' ');
+
+ bool first = true;
+ auto get_separator = [&first] () {
+ auto res = first ? " " : " | ";
+ first = false;
+ return res;
+ };
+
+ if (m_measure_fps) {
+ auto fps = get_fps();
+ ss << fmt::format("{}fps: {:.2f}", get_separator(), fps);
+ }
auto hw_latency_measurement = m_cng->get_latency_measurement();
if (hw_latency_measurement) {
- ss << fmt::format(" | hw latency: {:.2f} ms", InferResultsFormatUtils::latency_result_to_ms(hw_latency_measurement->avg_hw_latency));
+ ss << fmt::format("{}hw latency: {:.2f} ms", get_separator(), InferResultsFormatUtils::latency_result_to_ms(hw_latency_measurement->avg_hw_latency));
}
else if (HAILO_NOT_AVAILABLE != hw_latency_measurement.status()) { // HAILO_NOT_AVAILABLE is a valid error, we ignore it
- ss << fmt::format(" | hw latency: failed with status={}", hw_latency_measurement.status());
+ ss << fmt::format("{}hw latency: NaN (err)", get_separator());
}
if (m_overall_latency_meter) {
- auto overall_latency_measurement = m_overall_latency_meter->get_latency(true);
+ auto overall_latency_measurement = m_overall_latency_meter->get_latency(false);
if (overall_latency_measurement) {
- ss << fmt::format(" | overall latency: {:.2f} ms", InferResultsFormatUtils::latency_result_to_ms(*overall_latency_measurement));
+ ss << fmt::format("{}overall latency: {:.2f} ms", get_separator(), InferResultsFormatUtils::latency_result_to_ms(*overall_latency_measurement));
}
else if (HAILO_NOT_AVAILABLE != overall_latency_measurement.status()) { // HAILO_NOT_AVAILABLE is a valid error, we ignore it
- ss << fmt::format(" | overall latency: failed with status={}", overall_latency_measurement.status());
+ ss << fmt::format("{}overall latency: NaN (err)", get_separator());
}
}
ss << "\n";
- return 2;
+ return 1;
+}
+
+void NetworkLiveTrack::push_json_impl(nlohmann::ordered_json &json)
+{
+ nlohmann::ordered_json network_group_json;
+ network_group_json["name"] = m_name;
+ network_group_json["full_hef_path"] = m_hef_path;
+
+ // TODO: HRT-8695 Support stats display per network
+ // auto networks_info = m_cng->get_network_infos();
+ // if (networks_info){
+ // network_group_json["networks"] = nlohmann::ordered_json::array();
+ // for (const auto &network_info : networks_info.value()){
+ // network_group_json["networks"].emplace_back(nlohmann::json::object({ {"name", network_info.name} }));
+ // }
+ // }
+
+ if (m_measure_fps) {
+ auto fps = get_fps();
+ network_group_json["FPS"] = std::to_string(fps);
+ }
+
+ auto hw_latency_measurement = m_cng->get_latency_measurement();
+ if (hw_latency_measurement){
+ network_group_json["hw_latency"] = InferResultsFormatUtils::latency_result_to_ms(hw_latency_measurement->avg_hw_latency);
+ }
+
+ if (m_overall_latency_meter){
+ auto overall_latency_measurement = m_overall_latency_meter->get_latency(false);
+ if (overall_latency_measurement){
+ network_group_json["overall_latency"] = InferResultsFormatUtils::latency_result_to_ms(*overall_latency_measurement);
+ }
+ }
+ json["network_groups"].emplace_back(network_group_json);
}
void NetworkLiveTrack::progress()
#include "common/latency_meter.hpp"
-#include "live_printer.hpp"
+#include "live_stats.hpp"
+#include <nlohmann/json.hpp>
-class NetworkLiveTrack : public LivePrinter::Track
+
+class NetworkLiveTrack : public LiveStats::Track
{
public:
- NetworkLiveTrack(const std::string &name, std::shared_ptr<hailort::ConfiguredNetworkGroup> cng, hailort::LatencyMeterPtr overall_latency_meter);
+ NetworkLiveTrack(const std::string &name, std::shared_ptr<hailort::ConfiguredNetworkGroup> cng,
+ hailort::LatencyMeterPtr overall_latency_meter, bool measure_fps, const std::string &hef_path);
virtual ~NetworkLiveTrack() = default;
- virtual hailo_status start() override;
- virtual uint32_t get_text(std::stringstream &ss) override;
+ virtual hailo_status start_impl() override;
+ virtual uint32_t push_text_impl(std::stringstream &ss) override;
+ virtual void push_json_impl(nlohmann::ordered_json &json) override;
+
void progress();
private:
+ double get_fps();
+
+ static size_t max_ng_name;
+ static std::mutex mutex;
+
std::string m_name;
std::atomic<uint32_t> m_count;
std::chrono::time_point<std::chrono::steady_clock> m_last_get_time;
std::shared_ptr<hailort::ConfiguredNetworkGroup> m_cng;
hailort::LatencyMeterPtr m_overall_latency_meter;
+ const bool m_measure_fps;
+ const std::string &m_hef_path;
};
#endif /* _HAILO_HAILORTCLI_RUN2_NETWORK_LIVE_TRACK_HPP_ */
\ No newline at end of file
#include "hailo/hailort_common.hpp"
#include "hailo/hailort_defaults.hpp"
-#include "common/async_thread.hpp"
#include "common/file_utils.hpp"
#include "common/latency_meter.hpp"
#include "network_runner.hpp"
+#if defined(_MSC_VER)
+#include <mmsystem.h>
+#endif
using namespace hailort;
+SignalEventScopeGuard::SignalEventScopeGuard(Event &event) :
+ m_event(event)
+{}
-class SignalEventScopeGuard final
+SignalEventScopeGuard::~SignalEventScopeGuard()
{
-public:
- SignalEventScopeGuard(Event &event) : m_event(event)
- {}
+ m_event.signal();
+}
- ~SignalEventScopeGuard()
- {
- m_event.signal();
+BarrierTerminateScopeGuard::BarrierTerminateScopeGuard(BarrierPtr barrier) :
+ m_barrier(barrier)
+{}
+
+BarrierTerminateScopeGuard::~BarrierTerminateScopeGuard()
+{
+ if (m_barrier) {
+ m_barrier->terminate();
}
+}
- Event &m_event;
+#if defined(_MSC_VER)
+class TimeBeginScopeGuard final
+{
+public:
+ TimeBeginScopeGuard() {
+ // default interval between timer interrupts on Windows is 15.625 ms.
+ // This will change it to be 1 ms, enabling us to sleep in granularity of 1 milliseconds.
+ // As from Windows 10 2004, in general processes are no longer affected by other processes calling timeBeginPeriod.
+ // https://randomascii.wordpress.com/2020/10/04/windows-timer-resolution-the-great-rule-change/
+ timeBeginPeriod(1);
+ }
+ ~TimeBeginScopeGuard() {
+ timeEndPeriod(1);
+ }
};
+#endif
//TODO: duplicated
-static hailo_status wait_for_threads(std::vector<AsyncThreadPtr<hailo_status>> &threads)
+hailo_status NetworkRunner::wait_for_threads(std::vector<AsyncThreadPtr<hailo_status>> &threads)
{
auto last_error_status = HAILO_SUCCESS;
for (auto &thread : threads) {
auto thread_status = thread->get();
- if ((HAILO_SUCCESS != thread_status) && (HAILO_STREAM_ABORTED_BY_USER != thread_status)) {
+ if (!inference_succeeded(thread_status)) {
last_error_status = thread_status;
LOGGER__ERROR("Thread failed with with status {}", thread_status);
}
return last_error_status;
}
-VStreamParams::VStreamParams() : name(), params(HailoRTDefaults::get_vstreams_params())
+IoParams::IoParams() : name(), input_file_path()
{
}
-NetworkParams::NetworkParams() : hef_path(), net_group_name(), vstream_params(), scheduling_algorithm(HAILO_SCHEDULING_ALGORITHM_ROUND_ROBIN),
- batch_size(HAILO_DEFAULT_BATCH_SIZE), scheduler_threshold(0), scheduler_timeout_ms(0), framerate(UNLIMITED_FRAMERATE), measure_hw_latency(false),
+VStreamParams::VStreamParams() : IoParams(), params(HailoRTDefaults::get_vstreams_params())
+{
+}
+
+StreamParams::StreamParams() : IoParams(), flags(HAILO_STREAM_FLAGS_NONE)
+{
+}
+
+NetworkParams::NetworkParams() : hef_path(), net_group_name(), vstream_params(), stream_params(),
+ scheduling_algorithm(HAILO_SCHEDULING_ALGORITHM_ROUND_ROBIN), batch_size(HAILO_DEFAULT_BATCH_SIZE),
+ scheduler_threshold(0), scheduler_timeout_ms(0), framerate(UNLIMITED_FRAMERATE), measure_hw_latency(false),
measure_overall_latency(false)
{
}
NetworkRunner::NetworkRunner(const NetworkParams ¶ms, const std::string &name,
- std::vector<InputVStream> &&input_vstreams, std::vector<OutputVStream> &&output_vstreams,
- std::shared_ptr<ConfiguredNetworkGroup> cng, LatencyMeterPtr overall_latency_meter)
- : m_params(params), m_name(name), m_input_vstreams(std::move(input_vstreams)),
- m_output_vstreams(std::move(output_vstreams)), m_cng(cng), m_overall_latency_meter(overall_latency_meter)
+ VDevice &vdevice, std::shared_ptr<ConfiguredNetworkGroup> cng) :
+ m_vdevice(vdevice),
+ m_params(params),
+ m_name(name),
+ m_cng(cng),
+ m_overall_latency_meter(nullptr),
+ m_latency_barrier(nullptr)
{
}
Expected<std::shared_ptr<NetworkRunner>> NetworkRunner::create_shared(VDevice &vdevice, const NetworkParams ¶ms)
{
- auto hef = Hef::create(params.hef_path);
+ // The network params passed to the NetworkRunner may be changed by this function, hence we copy them.
+ auto final_net_params = params;
+
+ auto hef = Hef::create(final_net_params.hef_path);
CHECK_EXPECTED(hef);
// Get NG's name if single
- auto net_group_name = params.net_group_name;
+ auto net_group_name = final_net_params.net_group_name;
if (net_group_name.empty()) {
auto net_groups_names = hef->get_network_groups_names();
- CHECK_AS_EXPECTED(net_groups_names.size() == 1, HAILO_INVALID_ARGUMENT, "HEF {} doesn't contain a single NetworkGroup. Pass --name", params.hef_path);
+ CHECK_AS_EXPECTED(net_groups_names.size() == 1, HAILO_INVALID_ARGUMENT, "HEF {} doesn't contain a single NetworkGroup. Pass --name", final_net_params.hef_path);
net_group_name = net_groups_names[0];
}
auto cfg_params = vdevice.create_configure_params(hef.value(), net_group_name);
CHECK_EXPECTED(cfg_params);
- cfg_params->batch_size = params.batch_size;
- if (params.measure_hw_latency) {
+ cfg_params->batch_size = final_net_params.batch_size;
+ if (final_net_params.batch_size == HAILO_DEFAULT_BATCH_SIZE) {
+ // Changing batch_size to 1. If HAILO_DEFAULT_BATCH_SIZE is configured, the sched will send one frame per batch
+ final_net_params.batch_size = 1;
+ }
+ if (final_net_params.measure_hw_latency) {
cfg_params->latency |= HAILO_LATENCY_MEASURE;
}
+ if (final_net_params.is_async()) {
+ for (auto &stream_name_params_pair : cfg_params->stream_params_by_name) {
+ stream_name_params_pair.second.flags = HAILO_STREAM_FLAGS_ASYNC;
+ }
+ }
auto cfgr_net_groups = vdevice.configure(hef.value(), {{net_group_name, cfg_params.value()}});
CHECK_EXPECTED(cfgr_net_groups);
assert(1 == cfgr_net_groups->size());
auto cfgr_net_group = cfgr_net_groups.value()[0];
- if (HAILO_SCHEDULING_ALGORITHM_NONE!= params.scheduling_algorithm) {
- CHECK_SUCCESS_AS_EXPECTED(cfgr_net_group->set_scheduler_threshold(params.scheduler_threshold));
- CHECK_SUCCESS_AS_EXPECTED(cfgr_net_group->set_scheduler_timeout(std::chrono::milliseconds(params.scheduler_timeout_ms)));
- CHECK_SUCCESS_AS_EXPECTED(cfgr_net_group->set_scheduler_priority(params.scheduler_priority));
+ if (HAILO_SCHEDULING_ALGORITHM_NONE!= final_net_params.scheduling_algorithm) {
+ CHECK_SUCCESS_AS_EXPECTED(cfgr_net_group->set_scheduler_threshold(final_net_params.scheduler_threshold));
+ CHECK_SUCCESS_AS_EXPECTED(cfgr_net_group->set_scheduler_timeout(std::chrono::milliseconds(final_net_params.scheduler_timeout_ms)));
+ CHECK_SUCCESS_AS_EXPECTED(cfgr_net_group->set_scheduler_priority(final_net_params.scheduler_priority));
}
- std::map<std::string, hailo_vstream_params_t> vstreams_params;
- for (auto &vstream_params : params.vstream_params) {
- vstreams_params.emplace(vstream_params.name, vstream_params.params);
- }
- auto vstreams = create_vstreams(*cfgr_net_group, vstreams_params);
- CHECK_EXPECTED(vstreams);
-
- LatencyMeterPtr overall_latency_meter = nullptr;
- if (params.measure_overall_latency) {
- CHECK_AS_EXPECTED((1 == vstreams->first.size()), HAILO_INVALID_OPERATION,
- "Overall latency measurement over multiple inputs network is not supported");
-
- std::set<std::string> output_names;
- for (auto &output_vstream : vstreams->second) {
- output_names.insert(output_vstream.name());
+ std::shared_ptr<NetworkRunner> net_runner_ptr = nullptr;
+ switch (final_net_params.mode)
+ {
+ case InferenceMode::FULL:
+ {
+ std::map<std::string, hailo_vstream_params_t> vstreams_params;
+ for (auto &vstream_params : final_net_params.vstream_params) {
+ vstreams_params.emplace(vstream_params.name, vstream_params.params);
}
-
- overall_latency_meter = make_shared_nothrow<LatencyMeter>(output_names, OVERALL_LATENCY_TIMESTAMPS_LIST_LENGTH);
- CHECK_NOT_NULL_AS_EXPECTED(overall_latency_meter, HAILO_OUT_OF_HOST_MEMORY);
+ auto vstreams = create_vstreams(*cfgr_net_group, vstreams_params);
+ CHECK_EXPECTED(vstreams);
+
+ auto net_runner = make_shared_nothrow<FullNetworkRunner>(final_net_params, net_group_name, vdevice,
+ std::move(vstreams->first), std::move(vstreams->second), cfgr_net_group);
+ CHECK_NOT_NULL_AS_EXPECTED(net_runner, HAILO_OUT_OF_HOST_MEMORY);
+ net_runner_ptr = std::static_pointer_cast<NetworkRunner>(net_runner);
+ break;
}
- auto net_runner = make_shared_nothrow<NetworkRunner>(params, net_group_name, std::move(vstreams->first),
- std::move(vstreams->second), cfgr_net_group, overall_latency_meter);
- CHECK_NOT_NULL_AS_EXPECTED(net_runner, HAILO_OUT_OF_HOST_MEMORY);
- return net_runner;
-}
-Expected<BufferPtr> NetworkRunner::create_dataset_from_input_file(const std::string &file_path,
- const InputVStream &input_vstream)
-{
- auto buffer = read_binary_file(file_path);
- CHECK_EXPECTED(buffer);
- CHECK_AS_EXPECTED(0 == (buffer->size() % input_vstream.get_frame_size()), HAILO_INVALID_ARGUMENT,
- "Input file ({}) size {} must be a multiple of the frame size {} ({})",
- file_path, buffer->size(), input_vstream.get_frame_size(), input_vstream.name());
+ case InferenceMode::RAW: // Fallthrough
+ case InferenceMode::RAW_ASYNC: // Fallthrough
+ case InferenceMode::RAW_ASYNC_SINGLE_THREAD:
+ {
+ auto input_streams = cfgr_net_group->get_input_streams();
+ CHECK_AS_EXPECTED(input_streams.size() > 0, HAILO_INTERNAL_FAILURE);
- auto buffer_ptr = make_shared_nothrow<Buffer>(buffer.release());
- CHECK_NOT_NULL_AS_EXPECTED(buffer_ptr, HAILO_OUT_OF_HOST_MEMORY);
+ auto output_streams = cfgr_net_group->get_output_streams();
+ CHECK_AS_EXPECTED(output_streams.size() > 0, HAILO_INTERNAL_FAILURE);
- return buffer_ptr;
-}
+ auto net_runner = make_shared_nothrow<RawNetworkRunner>(final_net_params, net_group_name, vdevice,
+ std::move(input_streams), std::move(output_streams), cfgr_net_group);
+ CHECK_NOT_NULL_AS_EXPECTED(net_runner, HAILO_OUT_OF_HOST_MEMORY);
+ net_runner_ptr = std::static_pointer_cast<NetworkRunner>(net_runner);
+ break;
+ }
+ default:
+ // Shouldn't get here
+ return make_unexpected(HAILO_INTERNAL_FAILURE);
+ }
-Expected<BufferPtr> NetworkRunner::create_constant_dataset(const InputVStream &input_vstream)
-{
- const uint8_t const_byte = 0xAB;
- auto constant_buffer = Buffer::create_shared(input_vstream.get_frame_size(), const_byte);
- CHECK_EXPECTED(constant_buffer);
+ if (final_net_params.measure_overall_latency || final_net_params.measure_hw_latency) {
+ auto input_names = net_runner_ptr->get_input_names();
+ auto output_names = net_runner_ptr->get_output_names();
- return constant_buffer.release();
-}
+ CHECK_AS_EXPECTED((1 == input_names.size()), HAILO_INVALID_OPERATION,
+ "Latency measurement over multiple inputs network is not supported");
-hailo_status NetworkRunner::run_input_vstream(InputVStream &vstream, Event &shutdown_event, BufferPtr dataset,
- LatencyMeterPtr overall_latency_meter)
-{
- auto signal_event_scope_guard = SignalEventScopeGuard(shutdown_event);
-
- auto last_write_time = std::chrono::steady_clock::now();
- auto framerate_interval = std::chrono::duration<double>(1) / m_params.framerate;
- size_t buffer_offset = 0;
- while(true) {
- if (overall_latency_meter) {
- overall_latency_meter->add_start_sample(std::chrono::steady_clock::now().time_since_epoch());
- }
- auto status = vstream.write(MemoryView((dataset->data() + buffer_offset), vstream.get_frame_size()));
- if (status == HAILO_STREAM_ABORTED_BY_USER) {
- return status;
- }
- CHECK_SUCCESS(status);
- buffer_offset += vstream.get_frame_size();
- buffer_offset %= dataset->size();
-
- if (m_params.framerate != UNLIMITED_FRAMERATE) {
- auto elapsed_time = std::chrono::steady_clock::now() - last_write_time;
- std::this_thread::sleep_for(framerate_interval - elapsed_time);
- last_write_time = std::chrono::steady_clock::now();
+ if (final_net_params.measure_overall_latency) {
+ auto overall_latency_meter = make_shared_nothrow<LatencyMeter>(output_names, OVERALL_LATENCY_TIMESTAMPS_LIST_LENGTH);
+ CHECK_NOT_NULL_AS_EXPECTED(overall_latency_meter, HAILO_OUT_OF_HOST_MEMORY);
+ net_runner_ptr->set_overall_latency_meter(overall_latency_meter);
}
+
+ // We use a barrier for both hw and overall latency
+ auto latency_barrier = make_shared_nothrow<Barrier>(input_names.size() + output_names.size());
+ CHECK_NOT_NULL_AS_EXPECTED(latency_barrier, HAILO_OUT_OF_HOST_MEMORY);
+ net_runner_ptr->set_latency_barrier(latency_barrier);
}
- return HAILO_SUCCESS;
+
+ return net_runner_ptr;
}
-hailo_status NetworkRunner::run_output_vstream(OutputVStream &vstream, bool first, std::shared_ptr<NetworkLiveTrack> net_live_track,
- Event &shutdown_event, LatencyMeterPtr overall_latency_meter)
+bool NetworkRunner::inference_succeeded(hailo_status status)
{
- auto signal_event_scope_guard = SignalEventScopeGuard(shutdown_event);
-
- auto result = Buffer::create(vstream.get_frame_size());
- CHECK_EXPECTED_AS_STATUS(result);
- while(true) {
- auto status = vstream.read(MemoryView(result.value()));
- if (status == HAILO_STREAM_ABORTED_BY_USER) {
- return status;
- }
- CHECK_SUCCESS(status);
- if (overall_latency_meter) {
- overall_latency_meter->add_end_sample(vstream.name(), std::chrono::steady_clock::now().time_since_epoch());
- }
- if (first) {
- net_live_track->progress();
- }
- }
- return HAILO_SUCCESS;
+ const auto status_find_result = std::find(NetworkRunner::ALLOWED_INFERENCE_RETURN_VALUES.cbegin(),
+ NetworkRunner::ALLOWED_INFERENCE_RETURN_VALUES.cend(), status);
+ // If the status is in the allowed list, the inference has succeeded
+ return status_find_result != NetworkRunner::ALLOWED_INFERENCE_RETURN_VALUES.cend();
}
-hailo_status NetworkRunner::run(Event &shutdown_event, LivePrinter &live_printer, Barrier &barrier)
+hailo_status NetworkRunner::run(EventPtr shutdown_event, LiveStats &live_stats, Barrier &activation_barrier)
{
auto ang = std::unique_ptr<ActivatedNetworkGroup>(nullptr);
if (HAILO_SCHEDULING_ALGORITHM_NONE == m_params.scheduling_algorithm) {
auto ang_exp = m_cng->activate();
if (!ang_exp) {
- barrier.terminate();
+ activation_barrier.terminate();
}
CHECK_EXPECTED_AS_STATUS(ang_exp);
ang = ang_exp.release();
}
- auto net_live_track = std::make_shared<NetworkLiveTrack>(m_name, m_cng, m_overall_latency_meter);
- live_printer.add(net_live_track, 1); //support progress over multiple outputs
- barrier.arrive_and_wait();
+ // If we measure latency (hw or overall) we send frames one at a time. Hence we don't measure fps.
+ const auto measure_fps = !m_params.measure_hw_latency && !m_params.measure_overall_latency;
+ auto net_live_track = std::make_shared<NetworkLiveTrack>(m_name, m_cng, m_overall_latency_meter, measure_fps, m_params.hef_path);
+ live_stats.add(net_live_track, 1); //support progress over multiple outputs
- std::vector<AsyncThreadPtr<hailo_status>> threads;
- for (auto &input_vstream : m_input_vstreams) {
- BufferPtr dataset = nullptr;
- for (auto ¶ms : m_params.vstream_params) {
- if ((input_vstream.name() == params.name) && (!params.input_file_path.empty())) {
- auto dataset_exp = create_dataset_from_input_file(params.input_file_path, input_vstream);
- CHECK_EXPECTED_AS_STATUS(dataset_exp);
- dataset = dataset_exp.release();
- }
- }
- if (nullptr == dataset) {
- auto dataset_exp = create_constant_dataset(input_vstream);
- CHECK_EXPECTED_AS_STATUS(dataset_exp);
- dataset = dataset_exp.release();
- }
+#if defined(_MSC_VER)
+ TimeBeginScopeGuard time_begin_scope_guard;
+#endif
- threads.emplace_back(std::make_unique<AsyncThread<hailo_status>>("SEND", [this, &input_vstream, &shutdown_event,
- dataset](){
- return run_input_vstream(input_vstream, shutdown_event, dataset, m_overall_latency_meter);
- }));
- }
+ activation_barrier.arrive_and_wait();
- bool first = true; //TODO: check with multiple outputs
- for (auto &output_vstream : m_output_vstreams) {
- threads.emplace_back(std::make_unique<AsyncThread<hailo_status>>("RECV", [this, &output_vstream, first, net_live_track,
- &shutdown_event](){
- return run_output_vstream(output_vstream, first, net_live_track, shutdown_event, m_overall_latency_meter);
- }));
- first = false;
+ if (m_params.mode == InferenceMode::RAW_ASYNC_SINGLE_THREAD) {
+ return run_single_thread_async_infer(shutdown_event, net_live_track);
+ } else {
+ auto threads = start_inference_threads(shutdown_event, net_live_track);
+ CHECK_EXPECTED_AS_STATUS(threads);
+
+ CHECK_SUCCESS(shutdown_event->wait(HAILO_INFINITE_TIMEOUT));
+ stop();
+ return wait_for_threads(threads.value());
}
+}
- //TODO: return threads and move stop outside?
- CHECK_SUCCESS(shutdown_event.wait(HAILO_INFINITE_TIMEOUT));
- stop();
- return wait_for_threads(threads);
+void NetworkRunner::set_overall_latency_meter(LatencyMeterPtr latency_meter)
+{
+ m_overall_latency_meter = latency_meter;
}
-void NetworkRunner::stop()
+void NetworkRunner::set_latency_barrier(BarrierPtr latency_barrier)
{
- for (auto &input_vstream : m_input_vstreams) {
- (void) input_vstream.abort();
- }
- for (auto &output_vstream : m_output_vstreams) {
- (void) output_vstream.abort();
- }
+ m_latency_barrier = latency_barrier;
}
Expected<std::pair<std::vector<InputVStream>, std::vector<OutputVStream>>> NetworkRunner::create_vstreams(
if (elem_it != params.end()) {
input_vstreams_params.emplace(input_vstream_info.name, elem_it->second);
match_count++;
- }
- else {
+ } else {
input_vstreams_params.emplace(input_vstream_info.name, HailoRTDefaults::get_vstreams_params());
}
}
CHECK_EXPECTED(output_vstreams);
return {{input_vstreams.release(), output_vstreams.release()}};//TODO: move? copy elision?
+}
+
+const std::vector<hailo_status> NetworkRunner::ALLOWED_INFERENCE_RETURN_VALUES{
+ {HAILO_SUCCESS, HAILO_STREAM_ABORTED_BY_USER, HAILO_SHUTDOWN_EVENT_SIGNALED}
+};
+
+FullNetworkRunner::FullNetworkRunner(const NetworkParams ¶ms, const std::string &name, VDevice &vdevice,
+ std::vector<InputVStream> &&input_vstreams, std::vector<OutputVStream> &&output_vstreams,
+ std::shared_ptr<ConfiguredNetworkGroup> cng) :
+ NetworkRunner(params, name, vdevice, cng),
+ m_input_vstreams(std::move(input_vstreams)),
+ m_output_vstreams(std::move(output_vstreams))
+{
+}
+
+Expected<std::vector<AsyncThreadPtr<hailo_status>>> FullNetworkRunner::start_inference_threads(EventPtr shutdown_event,
+ std::shared_ptr<NetworkLiveTrack> net_live_track)
+{
+ std::vector<AsyncThreadPtr<hailo_status>> threads;
+ for (auto &input_vstream : m_input_vstreams) {
+ const auto vstream_params = get_params(input_vstream.name());
+ auto writer = WriterWrapper<InputVStream>::create(input_vstream, vstream_params, m_overall_latency_meter,
+ m_params.framerate);
+ CHECK_EXPECTED(writer);
+
+ threads.emplace_back(std::make_unique<AsyncThread<hailo_status>>("WRITE",
+ [this, writer = writer.release(), shutdown_event]() mutable {
+ return run_write(writer, shutdown_event, m_latency_barrier);
+ }));
+ }
+
+ bool first = true; //TODO: check with multiple outputs
+ for (auto &output_vstream : m_output_vstreams) {
+ auto reader = ReaderWrapper<OutputVStream>::create(output_vstream, m_overall_latency_meter,
+ first ? net_live_track : nullptr);
+ CHECK_EXPECTED(reader);
+
+ threads.emplace_back(std::make_unique<AsyncThread<hailo_status>>("READ",
+ [this, reader=reader.release(), shutdown_event]() mutable {
+ return run_read(reader, shutdown_event, m_latency_barrier);
+ }));
+ first = false;
+ }
+
+ return threads;
+}
+
+void FullNetworkRunner::stop()
+{
+ for (auto &input_vstream : m_input_vstreams) {
+ (void) input_vstream.abort();
+ }
+ for (auto &output_vstream : m_output_vstreams) {
+ (void) output_vstream.abort();
+ }
+}
+
+std::set<std::string> FullNetworkRunner::get_input_names()
+{
+ std::set<std::string> result;
+
+ for (const auto &vstream : m_input_vstreams) {
+ result.insert(vstream.name());
+ }
+
+ return result;
+}
+
+std::set<std::string> FullNetworkRunner::get_output_names()
+{
+ std::set<std::string> result;
+
+ for (const auto &vstream : m_output_vstreams) {
+ result.insert(vstream.name());
+ }
+
+ return result;
+}
+
+VStreamParams FullNetworkRunner::get_params(const std::string &name)
+{
+ for (const auto ¶ms : m_params.vstream_params) {
+ if (name == params.name) {
+ return params;
+ }
+ }
+ return VStreamParams();
+}
+
+RawNetworkRunner::RawNetworkRunner(const NetworkParams ¶ms, const std::string &name, VDevice &vdevice,
+ InputStreamRefVector &&input_streams, OutputStreamRefVector &&output_streams,
+ std::shared_ptr<ConfiguredNetworkGroup> cng) :
+ NetworkRunner(params, name, vdevice, cng),
+ m_input_streams(std::move(input_streams)),
+ m_output_streams(std::move(output_streams))
+{
+}
+
+Expected<std::vector<AsyncThreadPtr<hailo_status>>> RawNetworkRunner::start_inference_threads(EventPtr shutdown_event,
+ std::shared_ptr<NetworkLiveTrack> net_live_track)
+{
+ const bool async_streams = (m_params.is_async());
+ std::vector<AsyncThreadPtr<hailo_status>> threads;
+ for (auto &input_stream : m_input_streams) {
+ const auto stream_params = get_params(input_stream.get().name());
+ auto writer = WriterWrapper<InputStream>::create(input_stream.get(), stream_params, m_overall_latency_meter,
+ m_params.framerate);
+ CHECK_EXPECTED(writer);
+
+ if (async_streams) {
+ threads.emplace_back(std::make_unique<AsyncThread<hailo_status>>("WRITE_ASYNC",
+ [this, writer = writer.release(), shutdown_event]() mutable {
+ return run_write_async(writer, shutdown_event, m_latency_barrier);
+ }));
+ } else {
+ threads.emplace_back(std::make_unique<AsyncThread<hailo_status>>("WRITE",
+ [this, writer = writer.release(), shutdown_event]() mutable {
+ return run_write(writer, shutdown_event, m_latency_barrier);
+ }));
+ }
+ }
+
+ bool first = true; //TODO: check with multiple outputs
+ for (auto &output_stream : m_output_streams) {
+ auto reader = ReaderWrapper<OutputStream>::create(output_stream.get(), m_overall_latency_meter,
+ first ? net_live_track : nullptr);
+ CHECK_EXPECTED(reader);
+
+ if (async_streams) {
+ threads.emplace_back(std::make_unique<AsyncThread<hailo_status>>("READ_ASYNC",
+ [this, reader=reader.release(), shutdown_event]() mutable {
+ return run_read_async(reader, shutdown_event, m_latency_barrier);
+ }));
+ } else {
+ threads.emplace_back(std::make_unique<AsyncThread<hailo_status>>("READ",
+ [this, reader=reader.release(), shutdown_event]() mutable {
+ return run_read(reader, shutdown_event, m_latency_barrier);
+ }));
+ }
+ first = false;
+ }
+
+ return threads;
+}
+
+hailo_status RawNetworkRunner::run_single_thread_async_infer(EventPtr shutdown_event,
+ std::shared_ptr<NetworkLiveTrack> net_live_track)
+{
+ // Build output wrappers
+ std::vector<ReaderWrapperPtr<OutputStream>> reader_wrappers;
+ std::vector<SemaphorePtr> output_semaphores;
+ bool is_first_output = true;
+ for (auto &output_stream : m_output_streams) {
+ auto reader_wrapper = ReaderWrapper<OutputStream>::create(output_stream.get(), m_overall_latency_meter,
+ is_first_output ? net_live_track : nullptr);
+ CHECK_EXPECTED_AS_STATUS(reader_wrapper);
+ is_first_output = false;
+
+ auto max_queue_size = reader_wrapper.value()->get().get_async_max_queue_size();
+ CHECK_EXPECTED_AS_STATUS(max_queue_size);
+
+ auto semaphore = Semaphore::create_shared(static_cast<uint32_t>(*max_queue_size));
+ CHECK_NOT_NULL(semaphore, HAILO_OUT_OF_HOST_MEMORY);
+
+ output_semaphores.emplace_back(semaphore);
+ reader_wrappers.emplace_back(reader_wrapper.release());
+ }
+
+ // Build input wrappers
+ std::vector<WriterWrapperPtr<InputStream>> writer_wrappers;
+ std::vector<SemaphorePtr> input_semaphores;
+ for (auto &input_stream : m_input_streams) {
+ auto writer_wrapper = WriterWrapper<InputStream>::create(input_stream.get(),
+ get_params(input_stream.get().name()), m_overall_latency_meter, m_params.framerate);
+ CHECK_EXPECTED_AS_STATUS(writer_wrapper);
+
+ auto max_queue_size = writer_wrapper.value()->get().get_async_max_queue_size();
+ CHECK_EXPECTED_AS_STATUS(max_queue_size);
+
+ auto semaphore = Semaphore::create_shared(static_cast<uint32_t>(*max_queue_size));
+ CHECK_NOT_NULL(semaphore, HAILO_OUT_OF_HOST_MEMORY);
+
+ input_semaphores.emplace_back(semaphore);
+ writer_wrappers.emplace_back(writer_wrapper.release());
+ }
+
+ // Build waitables list with reference to previous input/output semaphores.
+ // We put output semaphores before inputs because we want to always have place to write
+ // the data into. It also makes sure that the framerate throttle will work properly.
+ const size_t shutdown_index = 0;
+ const size_t output_index_start = shutdown_index + 1;
+ const size_t input_index_start = output_index_start + output_semaphores.size();
+
+ std::vector<std::reference_wrapper<Waitable>> waitables;
+ waitables.emplace_back(std::ref(*shutdown_event));
+ auto add_to_waitables = [&waitables](const SemaphorePtr &sem) { waitables.emplace_back(std::ref(*sem)); };
+ std::for_each(output_semaphores.begin(), output_semaphores.end(), add_to_waitables);
+ std::for_each(input_semaphores.begin(), input_semaphores.end(), add_to_waitables);
+ WaitableGroup wait_group(std::move(waitables));
+
+ // Inference
+ while (true) {
+ auto wait_index = wait_group.wait_any(HAILORTCLI_DEFAULT_TIMEOUT);
+ CHECK_EXPECTED_AS_STATUS(wait_index);
+
+ if (*wait_index == shutdown_index) {
+ // Stopping the network so we won't get timeout on the flush. The async operations may still be active
+ // (until network deactivation).
+ stop();
+ break;
+ } else if ((*wait_index >= output_index_start) && (*wait_index < input_index_start)) {
+ // output is ready
+ const size_t output_index = *wait_index - output_index_start;
+ auto status = reader_wrappers[output_index]->read_async(
+ [semaphore=output_semaphores[output_index]](const OutputStream::CompletionInfo &) {
+ (void)semaphore->signal();
+ }
+ );
+ CHECK_SUCCESS(status);
+ } else {
+ // input is ready
+ const size_t input_index = *wait_index - input_index_start;
+ auto status = writer_wrappers[input_index]->write_async(
+ [semaphore=input_semaphores[input_index]](const InputStream::CompletionInfo &) {
+ (void)semaphore->signal();
+ }
+ );
+ CHECK_SUCCESS(status);
+ }
+ }
+
+ return HAILO_SUCCESS;
+}
+
+void RawNetworkRunner::stop()
+{
+ for (auto &input_stream : m_input_streams) {
+ (void) input_stream.get().abort();
+ }
+ for (auto &output_stream : m_output_streams) {
+ (void) output_stream.get().abort();
+ }
+}
+
+std::set<std::string> RawNetworkRunner::get_input_names()
+{
+ std::set<std::string> result;
+ for (const auto &stream : m_input_streams) {
+ result.insert(stream.get().name());
+ }
+
+ return result;
+}
+
+std::set<std::string> RawNetworkRunner::get_output_names()
+{
+ std::set<std::string> result;
+ for (const auto &stream : m_output_streams) {
+ result.insert(stream.get().name());
+ }
+
+ return result;
+}
+
+StreamParams RawNetworkRunner::get_params(const std::string &name)
+{
+ for (const auto ¶ms : m_params.stream_params) {
+ if (name == params.name) {
+ return params;
+ }
+ }
+ return StreamParams();
}
\ No newline at end of file
#ifndef _HAILO_HAILORTCLI_RUN2_NETWORK_RUNNER_HPP_
#define _HAILO_HAILORTCLI_RUN2_NETWORK_RUNNER_HPP_
+#include "io_wrappers.hpp"
+#include "live_stats.hpp"
+#include "network_live_track.hpp"
+
+#include "../hailortcli.hpp"
+
#include "common/barrier.hpp"
+#include "common/async_thread.hpp"
+#include "common/event_internal.hpp"
#include "hailo/vdevice.hpp"
#include "hailo/vstream.hpp"
#include "hailo/expected.hpp"
#include "hailo/buffer.hpp"
-#include "../hailortcli.hpp"
-
-#include "live_printer.hpp"
-#include "network_live_track.hpp"
-
#include <string>
#include <vector>
-constexpr uint32_t UNLIMITED_FRAMERATE = 0;
+using namespace hailort;
+
+constexpr std::chrono::milliseconds SYNC_EVENT_TIMEOUT(1000);
+
-struct VStreamParams
+enum class InferenceMode {
+ FULL,
+
+ RAW,
+ RAW_ASYNC,
+ RAW_ASYNC_SINGLE_THREAD,
+};
+
+struct IoParams
{
- VStreamParams();
+ IoParams();
std::string name;
- hailo_vstream_params_t params;
std::string input_file_path;
};
+struct VStreamParams : public IoParams
+{
+ VStreamParams();
+
+ hailo_vstream_params_t params;
+};
+
+struct StreamParams : public IoParams
+{
+ StreamParams();
+
+ hailo_stream_flags_t flags;
+};
+
struct NetworkParams
{
NetworkParams();
std::string hef_path;
std::string net_group_name;
std::vector<VStreamParams> vstream_params;
+ std::vector<StreamParams> stream_params;
hailo_scheduling_algorithm_t scheduling_algorithm;
// Network parameters
bool measure_hw_latency;
bool measure_overall_latency;
+ InferenceMode mode;
+
+ bool is_async() const
+ {
+ return (mode == InferenceMode::RAW_ASYNC) || (mode == InferenceMode::RAW_ASYNC_SINGLE_THREAD);
+ }
+};
+
+class SignalEventScopeGuard final
+{
+public:
+ SignalEventScopeGuard(Event &event);
+ ~SignalEventScopeGuard();
+
+private:
+ Event &m_event;
+};
+
+class BarrierTerminateScopeGuard final
+{
+public:
+ BarrierTerminateScopeGuard(BarrierPtr barrier);
+ ~BarrierTerminateScopeGuard();
+
+private:
+ BarrierPtr m_barrier;
};
class NetworkRunner
{
public:
+ static Expected<std::shared_ptr<NetworkRunner>> create_shared(VDevice &vdevice, const NetworkParams ¶ms);
+
NetworkRunner(const NetworkParams ¶ms, const std::string &name,
- std::vector<hailort::InputVStream> &&input_vstreams, std::vector<hailort::OutputVStream> &&output_vstreams,
- std::shared_ptr<hailort::ConfiguredNetworkGroup> cng, hailort::LatencyMeterPtr overall_latency_meter);
- static hailort::Expected<std::shared_ptr<NetworkRunner>> create_shared(hailort::VDevice &vdevice, const NetworkParams ¶ms);
- hailo_status run(hailort::Event &shutdown_event, LivePrinter &live_printer, hailort::Barrier &barrier);
- void stop();
+ VDevice &vdevice, std::shared_ptr<ConfiguredNetworkGroup> cng);
+ virtual ~NetworkRunner() = default;
-private:
- static hailort::Expected<std::pair<std::vector<hailort::InputVStream>, std::vector<hailort::OutputVStream>>> create_vstreams(
- hailort::ConfiguredNetworkGroup &net_group, const std::map<std::string, hailo_vstream_params_t> ¶ms);
- hailo_status run_input_vstream(hailort::InputVStream &vstream, hailort::Event &shutdown_event, hailort::BufferPtr dataset,
- hailort::LatencyMeterPtr overall_latency_meter);
- static hailo_status run_output_vstream(hailort::OutputVStream &vstream, bool first, std::shared_ptr<NetworkLiveTrack> net_live_track,
- hailort::Event &shutdown_event, hailort::LatencyMeterPtr overall_latency_meter);
+ hailo_status run(EventPtr shutdown_event, LiveStats &live_stats, Barrier &activation_barrier);
+ virtual void stop() = 0;
+ // Must be called prior to run
+ void set_overall_latency_meter(LatencyMeterPtr latency_meter);
+ void set_latency_barrier(BarrierPtr latency_barrier);
-static hailort::Expected<hailort::BufferPtr> create_constant_dataset(const hailort::InputVStream &input_vstream);
-static hailort::Expected<hailort::BufferPtr> create_dataset_from_input_file(const std::string &file_path, const hailort::InputVStream &input_vstream);
+protected:
+ static bool inference_succeeded(hailo_status status);
+ // Use 'inference_succeeded(async_thread->get())' to check for a thread's success
+ virtual Expected<std::vector<AsyncThreadPtr<hailo_status>>> start_inference_threads(EventPtr shutdown_event,
+ std::shared_ptr<NetworkLiveTrack> net_live_track) = 0;
+ virtual hailo_status run_single_thread_async_infer(EventPtr shutdown_event,
+ std::shared_ptr<NetworkLiveTrack> net_live_track) = 0;
- const NetworkParams &m_params;//TODO: copy instead of ref?
+ virtual std::set<std::string> get_input_names() = 0;
+ virtual std::set<std::string> get_output_names() = 0;
+
+ static Expected<std::pair<std::vector<InputVStream>, std::vector<OutputVStream>>> create_vstreams(
+ ConfiguredNetworkGroup &net_group, const std::map<std::string, hailo_vstream_params_t> ¶ms);
+
+ template <typename Writer>
+ hailo_status run_write(WriterWrapperPtr<Writer> writer, EventPtr shutdown_event,
+ std::shared_ptr<Barrier> latency_barrier)
+ {
+ auto latency_barrier_scope_guard = BarrierTerminateScopeGuard(latency_barrier);
+ auto signal_event_scope_guard = SignalEventScopeGuard(*shutdown_event);
+
+ while (true) {
+ if (latency_barrier) {
+ latency_barrier->arrive_and_wait();
+ }
+
+ for (auto i = 0; i < m_params.batch_size; i++) {
+ auto status = writer->write();
+ if (status == HAILO_STREAM_ABORTED_BY_USER) {
+ return status;
+ }
+ CHECK_SUCCESS(status);
+ }
+ }
+ return HAILO_SUCCESS;
+ }
+
+ template <typename Writer>
+ hailo_status run_write_async(WriterWrapperPtr<Writer> writer, EventPtr shutdown_event,
+ std::shared_ptr<Barrier> latency_barrier)
+ {
+ auto latency_barrier_scope_guard = BarrierTerminateScopeGuard(latency_barrier);
+ auto signal_event_scope_guard = SignalEventScopeGuard(*shutdown_event);
+
+ // When measuring latency we want to send one frame at a time (to avoid back-pressure)
+ // sync_event will be used to send one frame at a time
+ EventPtr sync_event = nullptr;
+ if (m_params.measure_hw_latency || m_params.measure_overall_latency) {
+ sync_event = Event::create_shared(Event::State::not_signalled);
+ CHECK_NOT_NULL(sync_event, HAILO_OUT_OF_HOST_MEMORY);
+ }
+
+ while (true) {
+ if (latency_barrier) {
+ latency_barrier->arrive_and_wait();
+ }
+
+ for (auto i = 0; i < m_params.batch_size; i++) {
+ auto status = writer->wait_for_async_ready();
+ if (status == HAILO_STREAM_ABORTED_BY_USER) {
+ return status;
+ }
+ CHECK_SUCCESS(status);
+
+ status = writer->write_async(
+ [sync_event](const typename Writer::CompletionInfo &) {
+ if (sync_event) {
+ (void)sync_event->signal();
+ }
+ });
+ if (status == HAILO_STREAM_ABORTED_BY_USER) {
+ return status;
+ }
+ CHECK_SUCCESS(status);
+
+ if (m_params.measure_hw_latency || m_params.measure_overall_latency) {
+ status = WaitOrShutdown(sync_event, shutdown_event).wait(SYNC_EVENT_TIMEOUT);
+ if (HAILO_SHUTDOWN_EVENT_SIGNALED == status) {
+ // Don't print an error for this
+ return status;
+ }
+ CHECK_SUCCESS(status);
+ status = sync_event->reset();
+ CHECK_SUCCESS(status);
+ }
+ }
+ }
+ return HAILO_SUCCESS;
+ }
+
+ template <typename Reader>
+ hailo_status run_read(ReaderWrapperPtr<Reader> reader, EventPtr shutdown_event,
+ std::shared_ptr<Barrier> latency_barrier)
+ {
+ auto latency_barrier_scope_guard = BarrierTerminateScopeGuard(latency_barrier);
+ auto signal_event_scope_guard = SignalEventScopeGuard(*shutdown_event);
+
+ while (true) {
+ if (latency_barrier) {
+ latency_barrier->arrive_and_wait();
+ }
+
+ for (auto i = 0; i < m_params.batch_size; i++) {
+ auto status = reader->read();
+ if (status == HAILO_STREAM_ABORTED_BY_USER) {
+ return status;
+ }
+ CHECK_SUCCESS(status);
+ }
+ }
+ return HAILO_SUCCESS;
+ }
+
+ template <typename Reader>
+ hailo_status run_read_async(ReaderWrapperPtr<Reader> reader, EventPtr shutdown_event,
+ std::shared_ptr<Barrier> latency_barrier)
+ {
+ auto latency_barrier_scope_guard = BarrierTerminateScopeGuard(latency_barrier);
+ auto signal_event_scope_guard = SignalEventScopeGuard(*shutdown_event);
+
+ // When measuring latency we want to send one frame at a time (to avoid back-pressure)
+ // sync_event will be used to send one frame at a time
+ EventPtr sync_event = nullptr;
+ if (m_params.measure_hw_latency || m_params.measure_overall_latency) {
+ sync_event = Event::create_shared(Event::State::not_signalled);
+ CHECK_NOT_NULL(sync_event, HAILO_OUT_OF_HOST_MEMORY);
+ }
+
+ while (true) {
+ if (latency_barrier) {
+ latency_barrier->arrive_and_wait();
+ }
+
+ for (auto i = 0; i < m_params.batch_size; i++) {
+ auto status = reader->wait_for_async_ready();
+ if (status == HAILO_STREAM_ABORTED_BY_USER) {
+ return status;
+ }
+ CHECK_SUCCESS(status);
+
+ status = reader->read_async(
+ [sync_event](const typename Reader::CompletionInfo &) {
+ if (sync_event) {
+ (void)sync_event->signal();
+ }
+ });
+ if (status == HAILO_STREAM_ABORTED_BY_USER) {
+ return status;
+ }
+ CHECK_SUCCESS(status);
+
+ if (m_params.measure_hw_latency || m_params.measure_overall_latency) {
+ status = WaitOrShutdown(sync_event, shutdown_event).wait(SYNC_EVENT_TIMEOUT);
+ if (HAILO_SHUTDOWN_EVENT_SIGNALED == status) {
+ // Don't print an error for this
+ return status;
+ }
+ CHECK_SUCCESS(status);
+ status = sync_event->reset();
+ CHECK_SUCCESS(status);
+ }
+ }
+ }
+ return HAILO_SUCCESS;
+ }
+
+ VDevice &m_vdevice;
+ const NetworkParams m_params;
std::string m_name;
- std::vector<hailort::InputVStream> m_input_vstreams;
- std::vector<hailort::OutputVStream> m_output_vstreams;
- std::shared_ptr<hailort::ConfiguredNetworkGroup> m_cng;
- hailort::LatencyMeterPtr m_overall_latency_meter;
+ std::shared_ptr<ConfiguredNetworkGroup> m_cng;
+ LatencyMeterPtr m_overall_latency_meter;
+ BarrierPtr m_latency_barrier;
+
+private:
+ static const std::vector<hailo_status> ALLOWED_INFERENCE_RETURN_VALUES;
+ static hailo_status wait_for_threads(std::vector<AsyncThreadPtr<hailo_status>> &threads);
+ static Expected<BufferPtr> create_constant_dataset(size_t size);
+ static Expected<BufferPtr> create_dataset_from_input_file(const std::string &file_path, size_t size);
+};
+
+class FullNetworkRunner : public NetworkRunner
+{
+public:
+ FullNetworkRunner(const NetworkParams ¶ms, const std::string &name, VDevice &vdevice,
+ std::vector<InputVStream> &&input_vstreams, std::vector<OutputVStream> &&output_vstreams,
+ std::shared_ptr<ConfiguredNetworkGroup> cng);
+
+ virtual Expected<std::vector<AsyncThreadPtr<hailo_status>>> start_inference_threads(EventPtr shutdown_event,
+ std::shared_ptr<NetworkLiveTrack> net_live_track) override;
+ virtual hailo_status run_single_thread_async_infer(EventPtr, std::shared_ptr<NetworkLiveTrack>) override
+ {
+ return HAILO_NOT_IMPLEMENTED;
+ };
+
+ virtual void stop() override;
+ virtual std::set<std::string> get_input_names() override;
+ virtual std::set<std::string> get_output_names() override;
+ VStreamParams get_params(const std::string &name);
+
+private:
+ std::vector<InputVStream> m_input_vstreams;
+ std::vector<OutputVStream> m_output_vstreams;
+};
+
+class RawNetworkRunner : public NetworkRunner
+{
+public:
+ RawNetworkRunner(const NetworkParams ¶ms, const std::string &name, VDevice &vdevice,
+ InputStreamRefVector &&input_streams, OutputStreamRefVector &&output_streams,
+ std::shared_ptr<ConfiguredNetworkGroup> cng);
+
+ virtual Expected<std::vector<AsyncThreadPtr<hailo_status>>> start_inference_threads(EventPtr shutdown_event,
+ std::shared_ptr<NetworkLiveTrack> net_live_track) override;
+
+ virtual hailo_status run_single_thread_async_infer(EventPtr shutdown_event,
+ std::shared_ptr<NetworkLiveTrack> net_live_track) override;
+
+ virtual void stop() override;
+ virtual std::set<std::string> get_input_names() override;
+ virtual std::set<std::string> get_output_names() override;
+ StreamParams get_params(const std::string &name);
+
+private:
+ InputStreamRefVector m_input_streams;
+ OutputStreamRefVector m_output_streams;
};
#endif /* _HAILO_HAILORTCLI_RUN2_NETWORK_RUNNER_HPP_ */
\ No newline at end of file
**/
#include "run2_command.hpp"
-#include "live_printer.hpp"
+#include "live_stats.hpp"
#include "timer_live_track.hpp"
#include "measurement_live_track.hpp"
#include "network_runner.hpp"
#include "common/barrier.hpp"
#include "common/async_thread.hpp"
+#include "../common.hpp"
#include "hailo/vdevice.hpp"
#include "hailo/hef.hpp"
return names;
}
+class StreamNameValidator : public CLI::Validator {
+ public:
+ StreamNameValidator(const CLI::Option *hef_path_option, const CLI::Option *net_group_name_option);
+private:
+ static std::vector<std::string> get_values(const std::string &hef_path, const std::string &net_group_name);
+};
+
+StreamNameValidator::StreamNameValidator(const CLI::Option *hef_path_option, const CLI::Option *net_group_name_option) : Validator("STREAM") {
+ func_ = [](std::string&) {
+ //TODO: support?
+ return std::string();
+ };
+ autocomplete_func_ = [hef_path_option, net_group_name_option](const std::string&) {
+ // TODO: remove existing names from prev user input
+ return get_values(hef_path_option->as<std::string>(), net_group_name_option->as<std::string>());
+ };
+}
+
+std::vector<std::string> StreamNameValidator::get_values(const std::string &hef_path, const std::string &net_group_name)
+{
+ auto hef = Hef::create(hef_path);
+ if (!hef.has_value()) {
+ return {};
+ }
+
+ // TODO: duplicate
+ auto actual_net_group_name = net_group_name;
+ if (actual_net_group_name.empty()) {
+ auto net_groups_names = hef->get_network_groups_names();
+ if (net_groups_names.size() != 1) {
+ return {};
+ }
+ actual_net_group_name = net_groups_names[0];
+ }
+
+ auto streams_info = hef->get_all_stream_infos(actual_net_group_name);
+ if (!streams_info.has_value()) {
+ return {};
+ }
+
+ std::vector<std::string> names;
+ for (auto &stream_info : streams_info.value()) {
+ names.emplace_back(stream_info.name);
+ }
+ return names;
+}
+
+IoApp::IoApp(const std::string &description, const std::string &name, Type type) :
+ CLI::App(description, name),
+ m_type(type),
+ m_vstream_params(),
+ m_stream_params()
+{
+}
+
+IoApp::Type IoApp::get_type() const
+{
+ return m_type;
+}
+
+const VStreamParams &IoApp::get_vstream_params() const
+{
+ // TODO: instead of copy do a move + call reset()? change func name to move_params? same for NetworkParams/NetworkApp class
+ return m_vstream_params;
+}
+
+const StreamParams &IoApp::get_stream_params() const
+{
+ // TODO: instead of copy do a move + call reset()? change func name to move_params? same for NetworkParams/NetworkApp class
+ return m_stream_params;
+}
+
/** VStreamApp */
-class VStreamApp : public CLI::App
+class VStreamApp : public IoApp
{
public:
VStreamApp(const std::string &description, const std::string &name, CLI::Option *hef_path_option, CLI::Option *net_group_name_option);
- const VStreamParams& get_params();
private:
CLI::Option* add_flag_callback(CLI::App *app, const std::string &name, const std::string &description,
std::function<void(bool)> function);
-
- VStreamParams m_params;
};
VStreamApp::VStreamApp(const std::string &description, const std::string &name, CLI::Option *hef_path_option,
- CLI::Option *net_group_name_option) : CLI::App(description, name), m_params()
+ CLI::Option *net_group_name_option) :
+ IoApp(description, name, IoApp::Type::VSTREAM)
{
- add_option("name", m_params.name, "vStream name")
+ add_option("name", m_vstream_params.name, "vStream name")
->check(VStreamNameValidator(hef_path_option, net_group_name_option));
- add_option("--input-file", m_params.input_file_path,
+ add_option("--input-file", m_vstream_params.input_file_path,
"Input file path. If not given, random data will be used. File format should be raw binary data with size that is a factor of the input shape size")
->default_val("");
auto format_opt_group = add_option_group("Format");
- format_opt_group->add_option("--type", m_params.params.user_buffer_format.type, "Format type")
+ format_opt_group->add_option("--type", m_vstream_params.params.user_buffer_format.type, "Format type")
->transform(HailoCheckedTransformer<hailo_format_type_t>({
{ "auto", HAILO_FORMAT_TYPE_AUTO },
{ "uint8", HAILO_FORMAT_TYPE_UINT8 },
}))
->default_val("auto");
- format_opt_group->add_option("--order", m_params.params.user_buffer_format.order, "Format order")
+ format_opt_group->add_option("--order", m_vstream_params.params.user_buffer_format.order, "Format order")
->transform(HailoCheckedTransformer<hailo_format_order_t>({
{ "auto", HAILO_FORMAT_ORDER_AUTO },
{ "nhwc", HAILO_FORMAT_ORDER_NHWC },
add_flag_callback(format_opt_group, "-q,--quantized,!--no-quantized", "Whether or not data is quantized",
[this](bool result){
- m_params.params.user_buffer_format.flags = result ?
- static_cast<hailo_format_flags_t>(m_params.params.user_buffer_format.flags | HAILO_FORMAT_FLAGS_QUANTIZED) :
- static_cast<hailo_format_flags_t>(m_params.params.user_buffer_format.flags & (~HAILO_FORMAT_FLAGS_QUANTIZED));})
+ m_vstream_params.params.user_buffer_format.flags = result ?
+ static_cast<hailo_format_flags_t>(m_vstream_params.params.user_buffer_format.flags | HAILO_FORMAT_FLAGS_QUANTIZED) :
+ static_cast<hailo_format_flags_t>(m_vstream_params.params.user_buffer_format.flags & (~HAILO_FORMAT_FLAGS_QUANTIZED));})
->run_callback_for_default()
->default_val(true); // default_val() must be after run_callback_for_default()
}
-const VStreamParams& VStreamApp::get_params()
+CLI::Option* VStreamApp::add_flag_callback(CLI::App *app, const std::string &name, const std::string &description,
+ std::function<void(bool)> function)
{
- //TODO: instead of copy do a move + call reset()? change func name to move_params? same for NetworkParams/NetworkApp class
- return m_params;
+ // get_option doesn't support multiple names so taking the first one
+ auto first_name = name.substr(0, name.find(','));
+ auto wrap_function = [app, function, first_name](std::int64_t){function(app->get_option(first_name)->as<bool>());};
+ return app->add_flag_function(name, wrap_function, description);
}
-CLI::Option* VStreamApp::add_flag_callback(CLI::App *app, const std::string &name, const std::string &description,
- std::function<void(bool)> function)
- {
- // get_option doesn't support multiple names so taking the first one
- auto first_name = name.substr(0, name.find(','));
- auto wrap_function = [app, function, first_name](std::int64_t){function(app->get_option(first_name)->as<bool>());};
- return app->add_flag_function(name, wrap_function, description);
- }
+/** StreamApp */
+class StreamApp : public IoApp
+{
+public:
+ StreamApp(const std::string &description, const std::string &name, CLI::Option *hef_path_option, CLI::Option *net_group_name_option);
+};
+
+StreamApp::StreamApp(const std::string &description, const std::string &name, CLI::Option *hef_path_option,
+ CLI::Option *net_group_name_option) :
+ IoApp(description, name, IoApp::Type::STREAM)
+{
+ add_option("name", m_stream_params.name, "Stream name")
+ ->check(StreamNameValidator(hef_path_option, net_group_name_option));
+
+ add_option("--input-file", m_stream_params.input_file_path,
+ "Input file path. If not given, random data will be used. File format should be raw binary data with size that is a factor of the input shape size")
+ ->default_val("");
+
+ // TODO: async option (HRT-9580)
+ // TODO: flag callback?
+ // add_flag_callback(format_opt_group, "-q,--quantized,!--no-quantized", "Whether or not data is quantized",
+ // [this](bool result){
+ // m_params.params.user_buffer_format.flags = result ?
+ // static_cast<hailo_format_flags_t>(m_params.params.user_buffer_format.flags | HAILO_FORMAT_FLAGS_QUANTIZED) :
+ // static_cast<hailo_format_flags_t>(m_params.params.user_buffer_format.flags & (~HAILO_FORMAT_FLAGS_QUANTIZED));})
+ // ->run_callback_for_default()
+ // ->default_val(true); // default_val() must be after run_callback_for_default()
+}
/** NetworkGroupNameValidator */
class NetworkGroupNameValidator : public CLI::Validator {
}
/** NetworkApp */
-class NetworkApp : public CLI::App
-{
-public:
- NetworkApp(const std::string &description, const std::string &name);
- const NetworkParams& get_params();
-
-private:
- void add_vstream_app_subcom(CLI::Option *hef_path_option, CLI::Option *net_group_name_option);
- NetworkParams m_params;
-};
-
-NetworkApp::NetworkApp(const std::string &description, const std::string &name) : CLI::App(description, name), m_params()
+NetworkApp::NetworkApp(const std::string &description, const std::string &name) :
+ CLI::App(description, name),
+ m_params()
{
auto hef_path_option = add_option("hef", m_params.hef_path, "HEF file path")->check(CLI::ExistingFile);
auto net_group_name_option = add_option("--name", m_params.net_group_name, "Network group name")
// TODO: support multiple scheduling algorithms
m_params.scheduling_algorithm = HAILO_SCHEDULING_ALGORITHM_ROUND_ROBIN;
- add_vstream_app_subcom(hef_path_option, net_group_name_option);
-}
-
-void NetworkApp::add_vstream_app_subcom(CLI::Option *hef_path_option, CLI::Option *net_group_name_option)
-{
- auto vstream_app = std::make_shared<VStreamApp>("Set vStream", "set-vstream", hef_path_option, net_group_name_option);
- vstream_app->immediate_callback();
- vstream_app->callback([this, vstream_app, hef_path_option, net_group_name_option]() {
- m_params.vstream_params.push_back(vstream_app->get_params());
-
- // Throw an error if anything is left over and should not be.
- _process_extras();
-
- // NOTE: calling "net_app->clear(); m_params = NetworkParams();" is not sufficient because default values
- // need to be re-set. we can override clear and reset them but there might be other issues as well
- // and this one feels less hacky ATM
- remove_subcommand(vstream_app.get());
- // Remove from parsed_subcommands_ as well (probably a bug in CLI11)
- parsed_subcommands_.erase(std::remove_if(
- parsed_subcommands_.begin(), parsed_subcommands_.end(),
- [vstream_app](auto x){return x == vstream_app.get();}),
- parsed_subcommands_.end());
- add_vstream_app_subcom(hef_path_option, net_group_name_option);
- });
-
- // Must set fallthrough to support nested repeated subcommands.
- vstream_app->fallthrough();
- add_subcommand(vstream_app);
+ auto vstream_subcommand = add_io_app_subcom<VStreamApp>("Set vStream", "set-vstream", hef_path_option, net_group_name_option);
+ auto stream_subcommand = add_io_app_subcom<StreamApp>("Set Stream", "set-stream", hef_path_option, net_group_name_option);
+ // TODO: doesn't seam to be working (HRT-9886)
+ vstream_subcommand->excludes(stream_subcommand);
+ stream_subcommand->excludes(vstream_subcommand);
}
const NetworkParams& NetworkApp::get_params()
bool get_measure_power();
bool get_measure_current();
bool get_measure_temp();
+ bool get_measure_hw_latency();
+ bool get_measure_overall_latency();
bool get_multi_process_service();
const std::string &get_group_id();
+ InferenceMode get_mode() const;
+ const std::string &get_output_json_path();
void set_scheduling_algorithm(hailo_scheduling_algorithm_t scheduling_algorithm);
+ void set_inference_mode();
void set_measure_latency();
private:
void add_net_app_subcom();
std::vector<NetworkParams> m_network_params;
uint32_t m_time_to_run;
+ InferenceMode m_mode;
+ std::string m_stats_json_path;
std::vector<std::string> m_device_id;
uint32_t m_device_count;
bool m_multi_process_service;
add_option("-t,--time-to-run", m_time_to_run, "Time to run (seconds)")
->default_val(DEFAULT_TIME_TO_RUN_SECONDS)
->check(CLI::PositiveNumber);
+ add_option("-m,--mode", m_mode, "Inference mode")
+ ->transform(HailoCheckedTransformer<InferenceMode>({
+ { "full", InferenceMode::FULL },
+ { "raw", InferenceMode::RAW },
+ { "raw_async", InferenceMode::RAW_ASYNC },
+ { "raw_async_single_thread", InferenceMode::RAW_ASYNC_SINGLE_THREAD, OptionVisibility::HIDDEN }
+ }))->default_val("full");
+ static const char *JSON_SUFFIX = ".json";
+ add_option("-j,--json", m_stats_json_path, "If set save statistics as json to the specified path")
+ ->default_val("")
+ ->check(FileSuffixValidator(JSON_SUFFIX));
auto vdevice_options_group = add_option_group("VDevice Options");
auto measure_power_opt = measurement_options_group->add_flag("--measure-power", m_measure_power, "Measure power consumption")
->default_val(false);
-
+
measurement_options_group->add_flag("--measure-current", m_measure_current, "Measure current")->excludes(measure_power_opt)
->default_val(false);
- measurement_options_group->add_flag("--measure-latency", m_measure_hw_latency, "Measure network latency")
+ measurement_options_group->add_flag("--measure-latency", m_measure_hw_latency, "Measure network latency on the NN core")
->default_val(false);
-
+
measurement_options_group->add_flag("--measure-overall-latency", m_measure_overall_latency, "Measure overall latency measurement")
->default_val(false);
// NOTE: fallthrough() is not a must here but it is also not working (causing only a single vstream param
// instead of >1). Debug - App.hpp::void _parse(std::vector<std::string> &args)
add_subcommand(net_app);
+ // TODO: set _autocomplete based on m_mode (HRT-9886)
}
const std::vector<NetworkParams>& Run2::get_network_params()
return m_measure_temp;
}
+bool Run2::get_measure_hw_latency()
+{
+ return m_measure_hw_latency;
+}
+
+bool Run2::get_measure_overall_latency()
+{
+ return m_measure_overall_latency;
+}
+
std::vector<hailo_device_id_t> Run2::get_dev_ids()
{
std::vector<hailo_device_id_t> res;
return m_device_count;
}
+void Run2::set_inference_mode()
+{
+ for (auto ¶ms : m_network_params) {
+ params.mode = m_mode;
+ }
+}
+
void Run2::set_scheduling_algorithm(hailo_scheduling_algorithm_t scheduling_algorithm)
{
for (auto ¶ms: m_network_params) {
void Run2::set_measure_latency()
{
- for (auto ¶ms: m_network_params) {
+ for (auto ¶ms : m_network_params) {
params.measure_hw_latency = m_measure_hw_latency;
params.measure_overall_latency = m_measure_overall_latency;
}
return m_group_id;
}
+InferenceMode Run2::get_mode() const
+{
+ return m_mode;
+}
+
+const std::string &Run2::get_output_json_path()
+{
+ return m_stats_json_path;
+}
/** Run2Command */
Run2Command::Run2Command(CLI::App &parent_app) : Command(parent_app.add_subcommand(std::make_shared<Run2>()))
IS_FIT_IN_UINT8(a) && IS_FIT_IN_UINT8(b) && IS_FIT_IN_UINT8(c) && IS_FIT_IN_UINT8(d);
}
+std::string get_str_infer_mode(const InferenceMode& infer_mode)
+{
+ switch(infer_mode){
+ case InferenceMode::FULL:
+ return "full";
+ case InferenceMode::RAW:
+ return "raw";
+ case InferenceMode::RAW_ASYNC:
+ return "raw_async";
+ case InferenceMode::RAW_ASYNC_SINGLE_THREAD:
+ return "raw_async_single_thread";
+ }
+
+ return "<Unknown>";
+}
+
hailo_status Run2Command::execute()
{
Run2 *app = reinterpret_cast<Run2*>(m_app);
+ app->set_inference_mode();
app->set_measure_latency();
if (0 == app->get_network_params().size()) {
if (1 == app->get_network_params().size()) {
LOGGER__WARN("\"hailortcli run2\" is in preview. It is recommended to use \"hailortcli run\" command for a single network group");
}
+ if (app->get_measure_hw_latency() || app->get_measure_overall_latency()) {
+ CHECK(1 == app->get_network_params().size(), HAILO_INVALID_OPERATION, "When latency measurement is enabled, only one model is allowed");
+ LOGGER__WARN("Measuring latency; frames are sent one at a time and FPS will not be measured");
+ }
- hailo_vdevice_params_t vdevice_params = {};
+ hailo_vdevice_params_t vdevice_params{};
CHECK_SUCCESS(hailo_init_vdevice_params(&vdevice_params));
auto dev_ids = app->get_dev_ids();
if (!dev_ids.empty()) {
} else {
vdevice_params.device_count = app->get_device_count();
}
+ // TODO: Async stream support for scheduler (HRT-9878)
+ if ((app->get_mode() == InferenceMode::RAW_ASYNC) || (app->get_mode() == InferenceMode::RAW_ASYNC_SINGLE_THREAD)) {
+ vdevice_params.scheduling_algorithm = HAILO_SCHEDULING_ALGORITHM_NONE;
+ CHECK(1 == app->get_network_params().size(), HAILO_INVALID_OPERATION, "Only one model is allowed with aw async inference mode");
+ app->set_scheduling_algorithm(HAILO_SCHEDULING_ALGORITHM_NONE);
+ }
vdevice_params.group_id = app->get_group_id().c_str();
vdevice_params.multi_process_service = app->get_multi_process_service();
net_runners.emplace_back(net_runner.release());
}
- auto live_printer = std::make_unique<LivePrinter>(std::chrono::seconds(1));
- live_printer->add(std::make_shared<TimerLiveTrack>(app->get_time_to_run()), 0);
+ auto live_stats = std::make_unique<LiveStats>(std::chrono::seconds(1));
+
+ live_stats->add(std::make_shared<TimerLiveTrack>(app->get_time_to_run()), 0);
+
+ auto shutdown_event = Event::create_shared(Event::State::not_signalled);
+ CHECK_NOT_NULL(shutdown_event, HAILO_OUT_OF_HOST_MEMORY);
- auto shutdown_event = Event::create(Event::State::not_signalled);
- CHECK_EXPECTED_AS_STATUS(shutdown_event);
std::vector<AsyncThreadPtr<hailo_status>> threads;
- Barrier barrier(net_runners.size() + 1); // We wait for all nets to finish activation + this thread to start sampling
+ Barrier activation_barrier(net_runners.size() + 1); // We wait for all nets to finish activation + this thread to start sampling
for (auto &net_runner : net_runners) {
threads.emplace_back(std::make_unique<AsyncThread<hailo_status>>("NG_INFER", [&net_runner, &shutdown_event,
- &live_printer, &barrier](){
- return net_runner->run(shutdown_event.value(), *live_printer, barrier);
+ &live_stats, &activation_barrier](){
+ return net_runner->run(shutdown_event, *live_stats, activation_barrier);
}));
}
+ auto signal_event_scope_guard = SignalEventScopeGuard(*shutdown_event);
+
auto physical_devices = vdevice.value()->get_physical_devices();
CHECK_EXPECTED_AS_STATUS(physical_devices);
for (auto &device : physical_devices.value()) {
auto measurement_live_track = MeasurementLiveTrack::create_shared(device.get(), app->get_measure_power(),
app->get_measure_current(), app->get_measure_temp());
+ if (HAILO_SUCCESS != measurement_live_track.status()) {
+ activation_barrier.terminate();
+ }
CHECK_EXPECTED_AS_STATUS(measurement_live_track);
- live_printer->add(measurement_live_track.release(), 2);
+
+ live_stats->add(measurement_live_track.release(), 2);
}
// TODO: wait for all nets before starting timer. start() should update TimerLiveTrack to start. or maybe append here but first in vector...
- barrier.arrive_and_wait();
- CHECK_SUCCESS(live_printer->start());
+ activation_barrier.arrive_and_wait();
+ CHECK_SUCCESS(live_stats->start());
auto status = shutdown_event->wait(app->get_time_to_run());
if (HAILO_TIMEOUT != status) {
// if shutdown_event is signaled its because one of the send/recv threads failed
LOGGER__ERROR("Encountered error during inference. See log for more information.");
}
- live_printer.reset(); // Ensures that the final print will include real values and not with values of when streams are already aborted.
+ if (!app->get_output_json_path().empty()){
+ live_stats->dump_stats(app->get_output_json_path(), get_str_infer_mode(app->get_mode()));
+ }
+ live_stats.reset(); // Ensures that the final print will include real values and not with values of when streams are already aborted.
shutdown_event->signal();
return wait_for_threads(threads);
}
\ No newline at end of file
#define _HAILO_HAILORTCLI_RUN2_RUN2_COMMAND_HPP_
#include "../command.hpp"
+#include "network_runner.hpp"
+
+#include <type_traits>
+
class Run2Command : public Command {
public:
private:
};
-#endif /* _HAILO_HAILORTCLI_RUN2_RUN2_COMMAND_HPP_ */
\ No newline at end of file
+class IoApp : public CLI::App
+{
+public:
+ enum class Type {
+ STREAM,
+ VSTREAM
+ };
+
+ IoApp(const std::string &description, const std::string &name, Type type);
+ Type get_type() const;
+ const VStreamParams& get_vstream_params() const;
+ const StreamParams& get_stream_params() const;
+
+protected:
+ Type m_type;
+ VStreamParams m_vstream_params;
+ StreamParams m_stream_params;
+};
+
+class NetworkApp : public CLI::App
+{
+public:
+ NetworkApp(const std::string &description, const std::string &name);
+ const NetworkParams& get_params();
+
+private:
+ template <typename T>
+ CLI::App *add_io_app_subcom(const std::string &description, const std::string &name,
+ CLI::Option *hef_path_option, CLI::Option *net_group_name_option)
+ {
+ static_assert(std::is_base_of<IoApp, T>::value, "T is not a subclass of IoApp");
+
+ auto io_app = std::make_shared<T>(description, name, hef_path_option, net_group_name_option);
+ io_app->immediate_callback();
+ io_app->callback([this, description, name, io_app, hef_path_option, net_group_name_option]() {
+ if (io_app->get_type() == IoApp::Type::VSTREAM) {
+ auto vstream_params = io_app->get_vstream_params();
+ m_params.vstream_params.push_back(vstream_params);
+ } else {
+ auto stream_params = io_app->get_stream_params();
+ m_params.stream_params.push_back(stream_params);
+ }
+
+ // Throw an error if anything is left over and should not be.
+ _process_extras();
+
+ // NOTE: calling "net_app->clear(); m_params = NetworkParams();" is not sufficient because default values
+ // need to be re-set. we can override clear and reset them but there might be other issues as well
+ // and this one feels less hacky ATM
+ remove_subcommand(io_app.get());
+ // Remove from parsed_subcommands_ as well (probably a bug in CLI11)
+ parsed_subcommands_.erase(std::remove_if(
+ parsed_subcommands_.begin(), parsed_subcommands_.end(),
+ [io_app](auto x){return x == io_app.get();}),
+ parsed_subcommands_.end());
+ add_io_app_subcom<T>(description, name, hef_path_option, net_group_name_option);
+ });
+
+ // Must set fallthrough to support nested repeated subcommands.
+ io_app->fallthrough();
+ return add_subcommand(io_app);
+ }
+
+ NetworkParams m_params;
+};
+
+
+#endif /* _HAILO_HAILORTCLI_RUN2_RUN2_COMMAND_HPP_ */
#include <sstream>
TimerLiveTrack::TimerLiveTrack(std::chrono::milliseconds duration) :
- LivePrinter::Track(), m_duration(duration), m_start_time()
+ LiveStats::Track(), m_duration(duration), m_start_time()
{
}
-hailo_status TimerLiveTrack::start()
+hailo_status TimerLiveTrack::start_impl()
{
m_start_time = std::chrono::steady_clock::now();
- m_started = true;
-
return HAILO_SUCCESS;
}
-uint32_t TimerLiveTrack::get_text(std::stringstream &ss)
+uint32_t TimerLiveTrack::push_text_impl(std::stringstream &ss)
{
- if (!m_started) {
- return 0;
- }
static const uint32_t MAX_PROGRESS_BAR_WIDTH = 20;
auto elapsed_time = std::chrono::steady_clock::now() - m_start_time;
auto eta = std::chrono::seconds(std::max<int32_t>(0, static_cast<int32_t>(std::round(std::chrono::duration<double>(m_duration - elapsed_time).count())))); // std::chrono::round is from C++17
ss << fmt::format("[{:=>{}}{:{}}] {:>3}% {}\n", '>', progress_bar_width, "", MAX_PROGRESS_BAR_WIDTH - progress_bar_width, elapsed_percentage, CliCommon::duration_to_string(eta));
return 1;
+}
+
+void TimerLiveTrack::push_json_impl(nlohmann::ordered_json &json)
+{
+ std::stringstream time_to_run;
+ time_to_run << std::fixed << std::setprecision(2) << std::round(std::chrono::duration<double>(m_duration).count()) << " seconds";
+ json["time_to_run"] = time_to_run.str();
}
\ No newline at end of file
* @brief Timer live track
**/
-#include "live_printer.hpp"
+#include "live_stats.hpp"
#ifndef _HAILO_HAILORTCLI_RUN2_TIMER_LIVE_TRACK_HPP_
#define _HAILO_HAILORTCLI_RUN2_TIMER_LIVE_TRACK_HPP_
-class TimerLiveTrack : public LivePrinter::Track
+class TimerLiveTrack : public LiveStats::Track
{
public:
TimerLiveTrack(std::chrono::milliseconds duration);
virtual ~TimerLiveTrack() = default;
- virtual hailo_status start() override;
- virtual uint32_t get_text(std::stringstream &ss) override;
+ virtual hailo_status start_impl() override;
+ virtual uint32_t push_text_impl(std::stringstream &ss) override;
+ virtual void push_json_impl(nlohmann::ordered_json &json) override;
private:
std::chrono::milliseconds m_duration;
};
desc_function_ = []() {
- return "\t\tInput file path/paths. On single input network, give the full path of the data file.\n\
+ return "\t\tInput file (.bin) path/paths. On single input network, give the full path of the data file.\n\
\t\tOn multiple inputs network, the format is input_name1=path1 input_name2=path2, where\n\
\t\tinput_name1 is the name of the input stream. If not given, random data will be used";
};
# set(CMAKE_C_CLANG_TIDY "clang-tidy;-checks=*")
set(HAILORT_MAJOR_VERSION 4)
-set(HAILORT_MINOR_VERSION 13)
+set(HAILORT_MINOR_VERSION 14)
set(HAILORT_REVISION_VERSION 0)
# Add the cmake folder so the modules there are found
message(FATAL_ERROR "Only unix hosts are supported, stopping build")
endif()
-find_package(HailoRT 4.13.0 EXACT REQUIRED)
+find_package(HailoRT 4.14.0 EXACT REQUIRED)
# GST_PLUGIN_DEFINE needs PACKAGE to be defined
set(GST_HAILO_PACKAGE_NAME "hailo")
set_target_properties(gsthailo PROPERTIES
PUBLIC_HEADER "gst-hailo/metadata/tensor_meta.hpp"
+ CXX_STANDARD 14
+ CXX_STANDARD_REQUIRED YES
+ CXX_EXTENSIONS NO
+ C_VISIBILITY_PRESET hidden
+ CXX_VISIBILITY_PRESET hidden
+ # VISIBILITY_INLINES_HIDDEN YES
)
target_compile_options(gsthailo PRIVATE
#define DEFAULT_VDEVICE_KEY (0)
#define MIN_VALID_VDEVICE_KEY (1)
-#define HAILO_SUPPORTED_FORMATS "{ RGB, RGBA, YUY2, NV12, NV21, I420 }"
+#define HAILO_SUPPORTED_FORMATS "{ RGB, RGBA, YUY2, NV12, NV21, I420, GRAY8 }"
#define HAILO_VIDEO_CAPS GST_VIDEO_CAPS_MAKE(HAILO_SUPPORTED_FORMATS)
#define HAILO_DEFAULT_SCHEDULER_TIMEOUT_MS (0)
PROP_SCHEDULING_ALGORITHM,
PROP_SCHEDULER_TIMEOUT_MS,
PROP_SCHEDULER_THRESHOLD,
+ PROP_SCHEDULER_PRIORITY,
PROP_MULTI_PROCESS_SERVICE,
PROP_INPUT_QUANTIZED,
PROP_OUTPUT_QUANTIZED,
g_object_class_install_property(gobject_class, PROP_SCHEDULER_THRESHOLD,
g_param_spec_uint("scheduler-threshold", "Frames threshold for scheduler", "The minimum number of send requests required before the hailonet is considered ready to get run time from the scheduler.",
HAILO_DEFAULT_SCHEDULER_THRESHOLD, std::numeric_limits<uint32_t>::max(), HAILO_DEFAULT_SCHEDULER_THRESHOLD, (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
+ g_object_class_install_property(gobject_class, PROP_SCHEDULER_PRIORITY,
+ g_param_spec_uint("scheduler-priority", "Priority index for scheduler", "When the scheduler will choose the next hailonet to run, higher priority will be prioritized in the selection. "
+ "Bigger number represent higher priority",
+ HAILO_SCHEDULER_PRIORITY_MIN, HAILO_SCHEDULER_PRIORITY_MAX, HAILO_SCHEDULER_PRIORITY_NORMAL, (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
g_object_class_install_property(gobject_class, PROP_MULTI_PROCESS_SERVICE,
g_param_spec_boolean("multi-process-service", "Should run over HailoRT service", "Controls wether to run HailoRT over its service. "
"To use this property, the service should be active and scheduling-algorithm should be set. Defaults to false.",
break;
case PROP_SCHEDULER_TIMEOUT_MS:
if (m_was_configured) {
- g_warning("The network was already configured so changing the scheduling algorithm will not take place!");
+ g_warning("The network was already configured so changing the scheduling timeout will not take place!");
break;
}
if (m_props.m_is_active.was_changed()) {
break;
case PROP_SCHEDULER_THRESHOLD:
if (m_was_configured) {
- g_warning("The network was already configured so changing the scheduling algorithm will not take place!");
+ g_warning("The network was already configured so changing the scheduling threshold will not take place!");
break;
}
if (m_props.m_is_active.was_changed()) {
}
m_props.m_scheduler_threshold = g_value_get_uint(value);
break;
+ case PROP_SCHEDULER_PRIORITY:
+ if (m_was_configured) {
+ g_warning("The network was already configured so changing the scheduling priority will not take place!");
+ break;
+ }
+ if (m_props.m_is_active.was_changed()) {
+ g_error("scheduler usage (scheduler-priority) in combination with 'is-active' is not supported.");
+ break;
+ }
+ m_props.m_scheduler_priority = static_cast<guint8>(g_value_get_uint(value));
+ break;
case PROP_MULTI_PROCESS_SERVICE:
if (m_was_configured) {
g_warning("The network was already configured so changing the multi-process-service property will not take place!");
case PROP_SCHEDULER_THRESHOLD:
g_value_set_uint(value, m_props.m_scheduler_threshold.get());
break;
+ case PROP_SCHEDULER_PRIORITY:
+ g_value_set_uint(value, m_props.m_scheduler_priority.get());
+ break;
case PROP_MULTI_PROCESS_SERVICE:
g_value_set_boolean(value, m_props.m_multi_process_service.get());
break;
status = m_net_group_handle->set_scheduler_threshold(m_props.m_network_name.get(), m_props.m_scheduler_threshold.get());
GST_CHECK_SUCCESS(status, m_element, RESOURCE, "Setting scheduler threshold failed, status = %d", status);
}
+ if (m_props.m_scheduler_priority.was_changed()) {
+ status = m_net_group_handle->set_scheduler_priority(m_props.m_network_name.get(), m_props.m_scheduler_priority.get());
+ GST_CHECK_SUCCESS(status, m_element, RESOURCE, "Setting scheduler priority failed, status = %d", status);
+ }
auto vstreams = m_net_group_handle->create_vstreams(m_props.m_network_name.get(), m_props.m_scheduling_algorithm.get(), m_output_formats, static_cast<bool>(m_props.m_input_quantized.get()),
static_cast<bool>(m_props.m_output_quantized.get()), m_props.m_input_format_type.get(), m_props.m_output_format_type.get());
public:
HailoNetProperties() : m_device_id(nullptr), m_hef_path(nullptr), m_network_name(nullptr), m_batch_size(HAILO_DEFAULT_BATCH_SIZE),
m_is_active(false), m_device_count(0), m_vdevice_key(DEFAULT_VDEVICE_KEY), m_scheduling_algorithm(HAILO_SCHEDULING_ALGORITHM_ROUND_ROBIN),
- m_scheduler_timeout_ms(HAILO_DEFAULT_SCHEDULER_TIMEOUT_MS), m_scheduler_threshold(HAILO_DEFAULT_SCHEDULER_THRESHOLD),
+ m_scheduler_timeout_ms(HAILO_DEFAULT_SCHEDULER_TIMEOUT_MS), m_scheduler_threshold(HAILO_DEFAULT_SCHEDULER_THRESHOLD), m_scheduler_priority(HAILO_SCHEDULER_PRIORITY_NORMAL),
m_multi_process_service(HAILO_DEFAULT_MULTI_PROCESS_SERVICE), m_input_quantized(true), m_output_quantized(true), m_input_format_type(HAILO_FORMAT_TYPE_AUTO),
m_output_format_type(HAILO_FORMAT_TYPE_AUTO)
HailoElemProperty<hailo_scheduling_algorithm_t> m_scheduling_algorithm;
HailoElemProperty<guint32> m_scheduler_timeout_ms;
HailoElemProperty<guint32> m_scheduler_threshold;
+ HailoElemProperty<guint8> m_scheduler_priority;
HailoElemProperty<gboolean> m_multi_process_service;
HailoElemProperty<gboolean> m_input_quantized;
HailoElemProperty<gboolean> m_output_quantized;
std::chrono::duration<double, std::milli> latency = std::chrono::system_clock::now() - start_time;
GST_DEBUG("%s latency: %f milliseconds", output_info.vstream().name().c_str(), latency.count());
}
- GST_CHECK_SUCCESS(status, m_element, STREAM, "Reading from vstream failed, status = %d", status);
-
gst_buffer_unmap(*buffer, &buffer_info);
+ if (HAILO_STREAM_ABORTED_BY_USER == status) {
+ return status;
+ }
+ GST_CHECK_SUCCESS(status, m_element, STREAM, "Reading from vstream failed, status = %d", status);
}
if (should_print_latency) {
#define GST_CAT_DEFAULT gst_hailosend_debug_category
#define RGB_FEATURES_SIZE (3)
#define RGBA_FEATURES_SIZE (4)
+#define GRAY8_FEATURES_SIZE (1)
#define YUY2_FEATURES_SIZE (2)
#define NV12_FEATURES_SIZE (3)
#define NV21_FEATURES_SIZE (3)
gst_pad_template_new("sink", GST_PAD_SINK, GST_PAD_ALWAYS, gst_caps_from_string(HAILO_VIDEO_CAPS)));
gst_element_class_set_static_metadata(GST_ELEMENT_CLASS(klass),
- "hailosend element", "Hailo/Filter/Video", "Send RGB/RGBA/YUY2/NV12/NV21/I420 video to HailoRT", PLUGIN_AUTHOR);
+ "hailosend element", "Hailo/Filter/Video", "Send RGB/RGBA/GRAY8/YUY2/NV12/NV21/I420 video to HailoRT", PLUGIN_AUTHOR);
element_class->change_state = GST_DEBUG_FUNCPTR(gst_hailosend_change_state);
format = "RGBA";
break;
}
+ else if (m_input_vstream_infos[0].shape.features == GRAY8_FEATURES_SIZE)
+ {
+ format = "GRAY8";
+ break;
+ }
/* Fallthrough */
case HAILO_FORMAT_ORDER_NHCW:
case HAILO_FORMAT_ORDER_FCR:
case HAILO_FORMAT_ORDER_F8CR:
- format = "RGB";
- GST_CHECK(RGB_FEATURES_SIZE == m_input_vstream_infos[0].shape.features, NULL, m_element, STREAM,
- "Features of input vstream %s is not %d for RGB format! (features=%d)", m_input_vstream_infos[0].name, RGB_FEATURES_SIZE,
- m_input_vstream_infos[0].shape.features);
- break;
+ if (m_input_vstream_infos[0].shape.features == GRAY8_FEATURES_SIZE)
+ {
+ format = "GRAY8";
+ break;
+ }
+ else
+ {
+ format = "RGB";
+ GST_CHECK(RGB_FEATURES_SIZE == m_input_vstream_infos[0].shape.features, NULL, m_element, STREAM,
+ "Features of input vstream %s is not %d for RGB format! (features=%d)", m_input_vstream_infos[0].name, RGB_FEATURES_SIZE,
+ m_input_vstream_infos[0].shape.features);
+ break;
+ }
case HAILO_FORMAT_ORDER_YUY2:
format = "YUY2";
GST_CHECK(YUY2_FEATURES_SIZE == m_input_vstream_infos[0].shape.features, NULL, m_element, STREAM,
return m_cng->set_scheduler_threshold(threshold, network_name);
}
+hailo_status NetworkGroupHandle::set_scheduler_priority(const char *network_name, uint8_t priority)
+{
+ return m_cng->set_scheduler_priority(priority, network_name);
+}
+
Expected<std::pair<std::vector<InputVStream>, std::vector<OutputVStream>>> NetworkGroupHandle::create_vstreams(const char *network_name,
hailo_scheduling_algorithm_t scheduling_algorithm, const std::vector<hailo_format_with_name_t> &output_formats, bool input_quantized,
bool output_quantized, hailo_format_type_t input_format_type, hailo_format_type_t output_format_type)
std::shared_ptr<ConfiguredNetworkGroup> found_cng = get_configured_network_group(device_id, hef->hash(), network_group_name, batch_size);
if (nullptr != found_cng) {
- // If cng was already configured
auto infos = found_cng->get_network_infos();
GST_CHECK_EXPECTED(infos, element, RESOURCE, "Failed getting network infos");
if ((infos.release().size() > 1) || (scheduling_algorithm == HAILO_SCHEDULING_ALGORITHM_NONE)) {
+ // If cng was already configured
// But hailonet is not running all networks in the cng (or if not using scheduler) -
// Do not use multiplexer!
return found_cng;
hailo_status set_scheduler_timeout(const char *network_name, uint32_t timeout_ms);
hailo_status set_scheduler_threshold(const char *network_name, uint32_t threshold);
-
+ hailo_status set_scheduler_priority(const char *network_name, uint8_t priority);
std::shared_ptr<Hef> hef()
{
+cmake_minimum_required(VERSION 3.11.0)
+
+include(externals/pybind11.cmake)
add_subdirectory(src)
--- /dev/null
+cmake_minimum_required(VERSION 3.11.0)
+
+include(FetchContent)
+
+if(NOT PYTHON_EXECUTABLE AND PYBIND11_PYTHON_VERSION)
+ # venv version is prioritized (instead of PYBIND11_PYTHON_VERSION) if PYTHON_EXECUTABLE is not set.
+ # See https://pybind11.readthedocs.io/en/stable/changelog.html#v2-6-0-oct-21-2020
+ if((${CMAKE_VERSION} VERSION_LESS "3.22.0") AND (NOT WIN32))
+ find_package(PythonInterp ${PYBIND11_PYTHON_VERSION} REQUIRED)
+ set(PYTHON_EXECUTABLE ${Python_EXECUTABLE})
+ else()
+ find_package(Python3 ${PYBIND11_PYTHON_VERSION} REQUIRED EXACT COMPONENTS Interpreter Development)
+ set(PYTHON_EXECUTABLE ${Python3_EXECUTABLE})
+ endif()
+endif()
+
+FetchContent_Declare(
+ pybind11
+ GIT_REPOSITORY https://github.com/pybind/pybind11.git
+ GIT_TAG 80dc998efced8ceb2be59756668a7e90e8bef917 # Version 2.10.1
+ #GIT_SHALLOW TRUE
+ SOURCE_DIR "${CMAKE_CURRENT_LIST_DIR}/pybind11"
+ BINARY_DIR "${CMAKE_CURRENT_LIST_DIR}/pybind11"
+)
+
+if(NOT HAILO_OFFLINE_COMPILATION)
+ # https://stackoverflow.com/questions/65527126/disable-install-for-fetchcontent
+ FetchContent_GetProperties(pybind11)
+ if(NOT pybind11_POPULATED)
+ FetchContent_Populate(pybind11)
+ add_subdirectory(${pybind11_SOURCE_DIR} ${pybind11_BINARY_DIR} EXCLUDE_FROM_ALL)
+ endif()
+else()
+ add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/pybind11 EXCLUDE_FROM_ALL)
+endif()
\ No newline at end of file
InputVStreams, OutputVStreams,
InferVStreams, HailoStreamDirection, HailoFormatFlags, HailoCpuId, Device, VDevice,
DvmTypes, PowerMeasurementTypes, SamplingPeriod, AveragingFactor, MeasurementBufferIndex,
- HailoRTException, YOLOv5PostProcessOp, HailoSchedulingAlgorithm)
+ HailoRTException, HailoSchedulingAlgorithm, HailoRTStreamAbortedByUser)
def _verify_pyhailort_lib_exists():
python_version = "".join(str(i) for i in sys.version_info[:2])
'MipiIspImageInOrder', 'MipiIspImageOutDataType', 'join_drivers_path', 'IspLightFrequency', 'HailoPowerMode',
'Endianness', 'HailoStreamInterface', 'InputVStreamParams', 'OutputVStreamParams',
'InputVStreams', 'OutputVStreams', 'InferVStreams', 'HailoStreamDirection', 'HailoFormatFlags', 'HailoCpuId',
- 'Device', 'VDevice', 'HailoRTException', 'YOLOv5PostProcessOp', 'HailoSchedulingAlgorithm']
+ 'Device', 'VDevice', 'HailoRTException', 'HailoSchedulingAlgorithm', 'HailoRTStreamAbortedByUser']
"""Control operations for the Hailo hardware device."""
+from hailo_platform.common.logger.logger import default_logger
from hailo_platform.pyhailort.pyhailort import (Control, InternalPcieDevice, ExceptionWrapper, BoardInformation, # noqa F401
CoreInformation, DeviceArchitectureTypes, ExtendedDeviceInformation, # noqa F401
HealthInformation, SamplingPeriod, AveragingFactor, DvmTypes, # noqa F401
"""
# In the C API we define the total amount of attempts, instead of the amount of retries.
- # TODO: HRT-9987 - Add this deprecation warning
- # default_logger().warning("UdpHcpControl is deprecated! Please Use Control object")
+ default_logger().warning("UdpHcpControl is deprecated! Please Use Control object")
max_number_of_attempts = retries + 1
response_timeout_milliseconds = int(response_timeout_seconds * 1000)
if device is None:
def __init__(self, device=None, device_info=None):
"""Initializes a new HailoPcieController object."""
- # TODO: HRT-9987 - Add this deprecation warning
- # default_logger().warning("PcieHcpControl is deprecated! Please Use Control object")
+
+ default_logger().warning("PcieHcpControl is deprecated! Please Use Control object")
if device_info is None:
device_info = InternalPcieDevice.scan_devices()[0]
class HailoHWObject(object):
- # TODO: HRT-9987 - Add (deprecated) to this doc
- """Abstract Hailo hardware device representation"""
+ """Abstract Hailo hardware device representation (deprecated)"""
NAME = InferenceTargets.UNINITIALIZED
IS_HARDWARE = True
self._is_device_used = False
self._hef_loaded = False
- # TODO: HRT-9987 - Add this deprecation warning
- # self._logger.warning("HailoHWObject is deprecated! Please use VDevice/Device object.")
+ self._logger.warning("HailoHWObject is deprecated! Please use VDevice/Device object.")
# TODO: HRT-6310 Remove this.
def __eq__(self, other):
@property
def name(self):
- """str: The name of this target. Valid values are defined by :class:`~hailo_platform.pyhailort.hw_object.InferenceTargets`"""
- # TODO: HRT-9987 - Add this deprecation warning
- # self._logger.warning("HailoHWObject name property is deprecated! Please use VDevice/Device object with device_id.")
+ """str: The name of this target. Valid values are defined by :class:`~hailo_platform.pyhailort.hw_object.InferenceTargets` (deprecated)"""
+ self._logger.warning("HailoHWObject name property is deprecated! Please use VDevice/Device object with device_id.")
return type(self).NAME
@property
def is_hardware(self):
- """bool: Indicates this target runs on a physical hardware device."""
+ """bool: Indicates this target runs on a physical hardware device. (deprecated)"""
# TODO: SDK should implement in Target
- # TODO: HRT-9987 - Add this deprecation warning
- # self._logger.warning("HailoHWObject is_hardware property is deprecated! Please use VDevice/Device object, or derive from it.")
+ self._logger.warning("HailoHWObject is_hardware property is deprecated! Please use VDevice/Device object, or derive from it.")
return type(self).IS_HARDWARE
@property
@property
def sorted_output_layer_names(self):
- """Getter for the property sorted_output_names.
+ """Getter for the property sorted_output_names (deprecated).
Returns:
list of str: Sorted list of the output layer names.
"""
- # TODO: HRT-9987 - Add this deprecation warning
- # self._logger.warning("HailoHWObject sorted_output_layer_names property is deprecated! Please use ConfiguredNetwork get_sorted_output_names.")
+ self._logger.warning("HailoHWObject sorted_output_layer_names property is deprecated! Please use ConfiguredNetwork get_sorted_output_names.")
if len(self._loaded_network_groups) != 1:
raise HailoHWObjectException("Access to sorted_output_layer_names is only allowed when there is a single loaded network group")
return self._loaded_network_groups[0].get_sorted_output_names()
@contextmanager
def use_device(self, *args, **kwargs):
- # TODO: HRT-9987 - Add this deprecation warning and (deprecated) to this docs
- # self._logger.warning("HailoHWObject use_device context manager is deprecated! Please use VDevice/Device object.")
- """A context manager that wraps the usage of the device."""
+ """A context manager that wraps the usage of the device. (deprecated)"""
+ self._logger.warning("HailoHWObject use_device context manager is deprecated! Please use VDevice/Device object.")
self._is_device_used = True
yield
self._is_device_used = False
def get_output_device_layer_to_original_layer_map(self):
- """Get a mapping between the device outputs to the layers' names they represent.
+ """Get a mapping between the device outputs to the layers' names they represent (deprecated).
Returns:
dict: Keys are device output names and values are lists of layers' names.
"""
- # TODO: HRT-9987 - Add this deprecation warning and (deprecated) to this docs
- # self._logger.warning("HailoHWObject get_output_device_layer_to_original_layer_map function is deprecated!")
+ self._logger.warning("HailoHWObject get_output_device_layer_to_original_layer_map function is deprecated!")
if len(self._loaded_network_groups) != 1:
raise HailoHWObjectException("Access to layer names is only allowed when there is a single loaded network group")
return {stream_info.name : self._loaded_network_groups[0].get_vstream_names_from_stream_name(stream_info.name)
for stream_info in self.get_output_stream_infos()}
def get_original_layer_to_device_layer_map(self):
- """Get a mapping between the layer names and the device outputs that contain them.
+ """Get a mapping between the layer names and the device outputs that contain them (deprecated).
Returns:
dict: Keys are the names of the layers and values are device outputs names.
"""
- # TODO: HRT-9987 - Add this deprecation warning and (deprecated) to this docs
- # self._logger.warning("HailoHWObject get_original_layer_to_device_layer_map function is deprecated!")
+ self._logger.warning("HailoHWObject get_original_layer_to_device_layer_map function is deprecated!")
if len(self._loaded_network_groups) != 1:
raise HailoHWObjectException("Access to layer names is only allowed when there is a single loaded network group")
return {vstream_info.name : self._loaded_network_groups[0].get_stream_names_from_vstream_name(vstream_info.name)
@property
def device_input_layers(self):
- """Get a list of the names of the device's inputs."""
- # TODO: HRT-9987 - Add this deprecation warning and (deprecated) to this docs
- # self._logger.warning("HailoHWObject device_input_layers function is deprecated! Please use ConfiguredNetwork object.")
+ """Get a list of the names of the device's inputs. (deprecated)"""
+ self._logger.warning("HailoHWObject device_input_layers function is deprecated! Please use ConfiguredNetwork object.")
return [layer.name for layer in self.get_input_stream_infos()]
@property
def device_output_layers(self):
- """Get a list of the names of the device's outputs."""
- # TODO: HRT-9987 - Add this deprecation warning and (deprecated) to this docs
- # self._logger.warning("HailoHWObject device_output_layers function is deprecated! Please use ConfiguredNetwork object.")
+ """Get a list of the names of the device's outputs. (deprecated)"""
+ self._logger.warning("HailoHWObject device_output_layers function is deprecated! Please use ConfiguredNetwork object.")
return [layer.name for layer in self.get_output_stream_infos()]
def hef_loaded(self):
- """Return True if this object has loaded the model HEF to the hardware device."""
+ """Return True if this object has loaded the model HEF to the hardware device. (deprecated)"""
# TODO: SDK should implement in Target
- # TODO: HRT-9987 - Add this deprecation warning and (deprecated) to this docs
- # self._logger.warning("HailoHWObject hef_loaded function is deprecated! Please use VDevice/Device object, or derive from it.")
+ self._logger.warning("HailoHWObject hef_loaded function is deprecated! Please use VDevice/Device object, or derive from it.")
return self._hef_loaded
def outputs_count(self):
"""Return the amount of output tensors that are returned from the hardware device for every
- input image.
+ input image (deprecated).
"""
- # TODO: HRT-9987 - Add this deprecation warning and (deprecated) to this docs
- # self._logger.warning("HailoHWObject outputs_count function is deprecated! Please use ConfiguredNetwork object.")
+ self._logger.warning("HailoHWObject outputs_count function is deprecated! Please use ConfiguredNetwork object.")
return len(self.get_output_vstream_infos())
def _clear_shapes(self):
# TODO: SDK should implement in Target
- # TODO: HRT-9987 - Add this deprecation warning and (deprecated) to this docs
- # self._logger.warning("HailoHWObject _clear_shapes function is deprecated! Please use ConfiguredNetwork object.")
+ self._logger.warning("HailoHWObject _clear_shapes function is deprecated! Please use ConfiguredNetwork object.")
self._hw_consts = None
@property
def model_name(self):
- """Get the name of the current model.
+ """Get the name of the current model (deprecated).
Returns:
str: Model name.
"""
- # TODO: HRT-9987 - Add this deprecation warning and (deprecated) to this docs
- # self._logger.warning("HailoHWObject model_name property is deprecated! Please use ConfiguredNetwork object.")
+ self._logger.warning("HailoHWObject model_name property is deprecated! Please use ConfiguredNetwork object.")
if len(self._loaded_network_groups) == 1:
return self._loaded_network_groups[0].name
raise HailoHWObjectException(
"This function is only supported when there is exactly 1 loaded network group. one should use HEF.get_network_group_names() / ConfiguredNetwork.name / ActivatedNetwork.name")
def get_output_shapes(self):
- """Get the model output shapes, as returned to the user (without any hardware padding).
+ """Get the model output shapes, as returned to the user (without any hardware padding) (deprecated).
Returns:
Tuple of output shapes, sorted by the output names.
"""
- # TODO: HRT-9987 - Add this deprecation warning and (deprecated) to this docs
- # self._logger.warning("HailoHWObject get_output_shapes function is deprecated! Please use ConfiguredNetwork object.")
+ self._logger.warning("HailoHWObject get_output_shapes function is deprecated! Please use ConfiguredNetwork object.")
if len(self._loaded_network_groups) != 1:
raise HailoHWObjectException("Calling get_output_shapes is only allowed when there is a single loaded network group")
return self._loaded_network_groups[0].get_output_shapes()
class HailoChipObject(HailoHWObject):
- # TODO: HRT-9987 - Add (deprecated) to this docs
- """Hailo hardware device representation"""
+ """Hailo hardware device representation (deprecated)"""
def __init__(self):
"""Create the Hailo Chip hardware object."""
return self._control_object
def get_all_input_layers_dtype(self):
- """Get the model inputs dtype.
+ """Get the model inputs dtype (deprecated).
Returns:
dict of :obj:'numpy.dtype': where the key is model input_layer name, and the value is dtype as the device expect to get for this input.
"""
- # TODO: HRT-9987 - Add this deprecation warning and (deprecated) to this docs
- # self._logger.warning("HailoChipObject get_all_input_layers_dtype function is deprecated! Please use ConfiguredNetwork object.")
+ self._logger.warning("HailoChipObject get_all_input_layers_dtype function is deprecated! Please use ConfiguredNetwork object.")
return {stream.name: HailoRTTransformUtils.get_dtype(stream.data_bytes) for stream in self.get_input_stream_infos()}
def get_input_vstream_infos(self, network_name=None):
- """Get input vstreams information of a specific network group.
+ """Get input vstreams information of a specific network group (deprecated).
Args:
network_name (str, optional): The name of the network to access. In case not given, all the networks in the network group will be addressed.
If there is exactly one configured network group, returns a list of
:obj:`hailo_platform.pyhailort._pyhailort.VStreamInfo`: with all the information objects of all input vstreams
"""
- # TODO: HRT-9987 - Add this deprecation warning and (deprecated) to this docs
- # self._logger.warning("HailoChipObject get_input_vstream_infos function is deprecated! Please use ConfiguredNetwork object.")
+ self._logger.warning("HailoChipObject get_input_vstream_infos function is deprecated! Please use ConfiguredNetwork object.")
if len(self._loaded_network_groups) != 1:
raise HailoHWObjectException("Access to network vstream info is only allowed when there is a single loaded network group")
return self._loaded_network_groups[0].get_input_vstream_infos(network_name=network_name)
def get_output_vstream_infos(self, network_name=None):
- """Get output vstreams information of a specific network group.
+ """Get output vstreams information of a specific network group (deprecated).
Args:
network_name (str, optional): The name of the network to access. In case not given, all the networks in the network group will be addressed.
If there is exactly one configured network group, returns a list of
:obj:`hailo_platform.pyhailort._pyhailort.VStreamInfo`: with all the information objects of all output vstreams
"""
- # TODO: HRT-9987 - Add this deprecation warning and (deprecated) to this docs
- # self._logger.warning("HailoChipObject get_output_vstream_infos function is deprecated! Please use ConfiguredNetwork object.")
+ self._logger.warning("HailoChipObject get_output_vstream_infos function is deprecated! Please use ConfiguredNetwork object.")
if len(self._loaded_network_groups) != 1:
raise HailoHWObjectException("Access to network vstream info is only allowed when there is a single loaded network group")
return self._loaded_network_groups[0].get_output_vstream_infos(network_name=network_name)
def get_all_vstream_infos(self, network_name=None):
- """Get input and output vstreams information.
+ """Get input and output vstreams information (deprecated).
Args:
network_name (str, optional): The name of the network to access. In case not given, all the networks in the network group will be addressed.
If there is exactly one configured network group, returns a list of
:obj:`hailo_platform.pyhailort._pyhailort.VStreamInfo`: with all the information objects of all input and output vstreams
"""
- # TODO: HRT-9987 - Add this deprecation warning and (deprecated) to this docs
- # self._logger.warning("HailoChipObject get_all_vstream_infos function is deprecated! Please use ConfiguredNetwork object.")
+ self._logger.warning("HailoChipObject get_all_vstream_infos function is deprecated! Please use ConfiguredNetwork object.")
if len(self._loaded_network_groups) != 1:
raise HailoHWObjectException("Access to network vstream info is only allowed when there is a single loaded network group")
return self._loaded_network_groups[0].get_all_vstream_infos(network_name=network_name)
def get_input_stream_infos(self, network_name=None):
- """Get the input low-level streams information of a specific network group.
+ """Get the input low-level streams information of a specific network group (deprecated).
Args:
network_name (str, optional): The name of the network to access. In case not given, all the networks in the network group will be addressed.
:obj:`hailo_platform.pyhailort._pyhailort.VStreamInfo`: with information objects
of all input low-level streams.
"""
- # TODO: HRT-9987 - Add this deprecation warning and (deprecated) to this docs
- # self._logger.warning("HailoChipObject get_input_stream_infos function is deprecated! Please use ConfiguredNetwork object.")
+ self._logger.warning("HailoChipObject get_input_stream_infos function is deprecated! Please use ConfiguredNetwork object.")
if len(self._loaded_network_groups) != 1:
raise HailoHWObjectException("Access to network stream info is only allowed when there is a single loaded network group")
return self._loaded_network_groups[0].get_input_stream_infos(network_name=network_name)
def get_output_stream_infos(self, network_name=None):
- """Get the output low-level streams information of a specific network group.
+ """Get the output low-level streams information of a specific network group (deprecated).
Args:
network_name (str, optional): The name of the network to access. In case not given, all the networks in the network group will be addressed.
:obj:`hailo_platform.pyhailort._pyhailort.VStreamInfo`: with information objects
of all output low-level streams.
"""
- # TODO: HRT-9987 - Add this deprecation warning and (deprecated) to this docs
- # self._logger.warning("HailoChipObject get_output_stream_infos function is deprecated! Please use ConfiguredNetwork object.")
+ self._logger.warning("HailoChipObject get_output_stream_infos function is deprecated! Please use ConfiguredNetwork object.")
if len(self._loaded_network_groups) != 1:
raise HailoHWObjectException("Access to network stream info is only allowed when there is a single loaded network group")
return self._loaded_network_groups[0].get_output_stream_infos(network_name=network_name)
def get_all_stream_infos(self, network_name=None):
- """Get input and output streams information of a specific network group.
+ """Get input and output streams information of a specific network group (deprecated).
Args:
network_name (str, optional): The name of the network to access. In case not given, all the networks in the network group will be addressed.
If there is exactly one configured network group, returns a list of
:obj:`hailo_platform.pyhailort._pyhailort.StreamInfo`: with all the information objects of all input and output streams
"""
- # TODO: HRT-9987 - Add this deprecation warning and (deprecated) to this docs
- # self._logger.warning("HailoChipObject get_all_stream_infos function is deprecated! Please use ConfiguredNetwork object.")
+ self._logger.warning("HailoChipObject get_all_stream_infos function is deprecated! Please use ConfiguredNetwork object.")
if len(self._loaded_network_groups) != 1:
raise HailoHWObjectException("Access to network stream info is only allowed when there is a single loaded network group")
return self._loaded_network_groups[0].get_all_stream_infos(network_name=network_name)
raise HailoRTException("Device can only be configured from the process it was created in.")
configured_apps = self.control.configure(hef, configure_params_by_name)
self._hef_loaded = True
- configured_networks = [ConfiguredNetwork(configured_app, self, hef) for configured_app in configured_apps]
+ configured_networks = [ConfiguredNetwork(configured_app) for configured_app in configured_apps]
self._loaded_network_groups.extend(configured_networks)
return configured_networks
def get_input_shape(self, name=None):
- """Get the input shape (not padded) of a network.
+ """Get the input shape (not padded) of a network (deprecated).
Args:
name (str, optional): The name of the desired input. If a name is not provided, return
Returns:
Tuple of integers representing the input_shape.
"""
- # TODO: HRT-9987 - Add this deprecation warning and (deprecated) to this docs
- # self._logger.warning("HailoChipObject get_input_shape function is deprecated! Please use ConfiguredNetwork object.")
+ self._logger.warning("HailoChipObject get_input_shape function is deprecated! Please use ConfiguredNetwork object.")
if name is None:
name = self.get_input_vstream_infos()[0].name
[input_vstream.name for input_vstream in self.get_input_vstream_infos()]))
def get_index_from_name(self, name):
- """Get the index in the output list from the name.
+ """Get the index in the output list from the name (deprecated).
Args:
name (str): The name of the output.
Returns:
int: The index of the layer name in the output list.
"""
- # TODO: HRT-9987 - Add this deprecation warning and (deprecated) to this docs
- # self._logger.warning("HailoChipObject get_index_from_name function is deprecated! Please use ConfiguredNetwork object.")
+ self._logger.warning("HailoChipObject get_index_from_name function is deprecated! Please use ConfiguredNetwork object.")
try:
return self.sorted_output_layer_names.index(name)
except ValueError:
class EthernetDevice(HailoChipObject):
- # TODO: HRT-9987 - Add (deprecated) to this docs
- """Represents any Hailo hardware device that supports UDP control and dataflow"""
+ """Represents any Hailo hardware device that supports UDP control and dataflow (deprecated)"""
NAME = InferenceTargets.UDP_CONTROLLER
super(EthernetDevice, self).__init__()
+ self._logger.warning("EthernetDevice is deprecated! Please use VDevice/Device object.")
+
gc.collect()
self._remote_ip = remote_ip
Returns:
list of str: IPs of scanned devices.
"""
- # TODO: HRT-9987 - Add this deprecation warning and (deprecated) to this docs
- # default_logger().warning("EthernetDevice scan_devices method is deprecated! Please use scan() of Device object.")
+ default_logger().warning("EthernetDevice scan_devices method is deprecated! Please use scan() of Device object.")
udp_scanner = HailoUdpScan()
return udp_scanner.scan_devices(interface_name, timeout_seconds=timeout_seconds)
@property
def remote_ip(self):
- """Return the IP of the remote device."""
- # TODO: HRT-9987 - Add this deprecation warning and (deprecated) to this docs
- # self._logger.warning("EthernetDevice remote_ip method is deprecated! Please use VDevice/Device object.")
+ """Return the IP of the remote device (deprecated)."""
+ self._logger.warning("EthernetDevice remote_ip method is deprecated! Please use VDevice/Device object.")
return self._remote_ip
class PcieDevice(HailoChipObject):
- # TODO: HRT-9987 - Add (deprecated) to this docs
- """Hailo PCIe production device representation"""
+ """Hailo PCIe production device representation (deprecated)"""
NAME = InferenceTargets.PCIE_CONTROLLER
:func:`PcieDevice.scan_devices` to get list of all available devices.
"""
super(PcieDevice, self).__init__()
- # TODO: HRT-9987 - Add this deprecation warning
- # self._logger.warning("PcieDevice is deprecated! Please use VDevice/Device object.")
+ self._logger.warning("PcieDevice is deprecated! Please use VDevice/Device object.")
gc.collect()
# PcieDevice __del__ function tries to release self._device.
@staticmethod
def scan_devices():
- """Scans for all pcie devices on the system.
+ """Scans for all pcie devices on the system (deprecated).
Returns:
list of :obj:`hailo_platform.pyhailort.pyhailort.PcieDeviceInfo`
"""
- # TODO: HRT-9987 - Add this deprecation warning and (deprecated) to this docs
- # default_logger().warning("PcieDevice scan_devices method is deprecated! Please use Device object.")
+ default_logger().warning("PcieDevice scan_devices method is deprecated! Please use Device object.")
return InternalPcieDevice.scan_devices()
def _open_device(self, device_info):
MipiClockSelection, MipiIspImageInOrder,
MipiIspImageOutDataType, IspLightFrequency,
BootSource, HailoSocketDefs, Endianness,
- MipiInputStreamParams, SensorConfigTypes,
- SensorConfigOpCode)
+ MipiInputStreamParams, SensorConfigTypes)
BBOX_PARAMS = _pyhailort.HailoRTDefaults.BBOX_PARAMS()
HAILO_DEFAULT_ETH_CONTROL_PORT = _pyhailort.HailoRTDefaults.HAILO_DEFAULT_ETH_CONTROL_PORT()
class HailoRTStreamAborted(HailoRTException):
pass
+class HailoRTStreamAbortedByUser(HailoRTException):
+ pass
+
class HailoRTInvalidOperationException(HailoRTException):
pass
raise HailoRTTimeout("Received a timeout - hailort has failed because a timeout had occurred") from libhailort_exception
if string_error_code == "HAILO_STREAM_ABORTED_BY_HW":
raise HailoRTStreamAborted("Stream aborted due to an external event") from libhailort_exception
+ if string_error_code == "HAILO_STREAM_ABORTED_BY_USER":
+ raise HailoRTStreamAbortedByUser("Stream was aborted by user") from libhailort_exception
if string_error_code == "HAILO_INVALID_OPERATION":
raise HailoRTInvalidOperationException("Invalid operation. See hailort.log for more information") from libhailort_exception
return device_ip_addresses
-class TrafficControl(object):
+class NetworkRateLimiter(object):
def __init__(self, ip, port, rate_bytes_per_sec):
if sys.platform != 'linux':
- raise HailoRTInvalidOperationException('TrafficControl is supported only on UNIX os')
- with ExceptionWrapper():
- self._tc_util = _pyhailort.TrafficControlUtil(ip, port, int(rate_bytes_per_sec))
-
+ raise HailoRTInvalidOperationException('NetworkRateLimiter is supported only on UNIX os')
+ self._ip = ip
+ self._port = port
+ self._rate_bytes_per_sec = rate_bytes_per_sec
+
def set_rate_limit(self):
- self._tc_util.set_rate_limit()
+ with ExceptionWrapper():
+ return _pyhailort.NetworkRateLimiter.set_rate_limit(self._ip, self._port, self._rate_bytes_per_sec)
def reset_rate_limit(self):
- self._tc_util.reset_rate_limit()
+ with ExceptionWrapper():
+ return _pyhailort.NetworkRateLimiter.reset_rate_limit(self._ip, self._port)
def get_interface_name(ip):
"get the interface corresponding to the given ip"
with ExceptionWrapper():
- return _pyhailort.TrafficControlUtil.get_interface_name(ip)
+ return _pyhailort.NetworkRateLimiter.get_interface_name(ip)
class ConfigureParams(object):
class ConfiguredNetwork(object):
"""Represents a network group loaded to the device."""
- def __init__(self, configured_network, target, hef):
+ def __init__(self, configured_network):
self._configured_network = configured_network
self._input_vstreams_holders = []
self._output_vstreams_holders = []
- self._target = target
- self._hef = hef
def get_networks_names(self):
- return self._hef.get_networks_names(self.name)
+ return self._configured_network.get_networks_names()
def activate(self, network_group_params=None):
"""Activate this network group in order to infer data through it.
Returns:
:class:`ActivatedNetworkContextManager`: Context manager that returns the activated
network group.
+
+ Note:
+ Usage of `activate` when scheduler enabled is deprecated. On this case, this function will return None and print deprecation warning.
"""
- # TODO: HRT-9988 - Add deprecation warning when changing to service by default
- network_group_params = network_group_params or self.create_params()
+ if self._configured_network.is_scheduled():
+ default_logger().warning("Calls to `activate()` when working with scheduler are deprecated! On future versions this call will raise an error.")
+ return EmptyContextManager()
+ network_group_params = network_group_params or self.create_params()
with ExceptionWrapper():
return ActivatedNetworkContextManager(self,
- self._configured_network.activate(network_group_params),
- self._target, self._hef)
+ self._configured_network.activate(network_group_params))
def wait_for_activation(self, timeout_ms=None):
"""Block until activated, or until ``timeout_ms`` is passed.
return tuple(results)
def get_sorted_output_names(self):
- return self._hef.get_sorted_output_names(self.name)
+ return self._configured_network.get_sorted_output_names()
def get_input_vstream_infos(self, network_name=None):
"""Get input vstreams information.
list of :obj:`hailo_platform.pyhailort._pyhailort.VStreamInfo`: with all the information objects of all input vstreams
"""
- name = network_name if network_name is not None else self.name
- return self._hef.get_input_vstream_infos(name)
+ name = network_name if network_name is not None else ""
+ return self._configured_network.get_input_vstream_infos(name)
def get_output_vstream_infos(self, network_name=None):
"""Get output vstreams information.
list of :obj:`hailo_platform.pyhailort._pyhailort.VStreamInfo`: with all the information objects of all output vstreams
"""
- name = network_name if network_name is not None else self.name
- return self._hef.get_output_vstream_infos(name)
+ name = network_name if network_name is not None else ""
+ return self._configured_network.get_output_vstream_infos(name)
def get_all_vstream_infos(self, network_name=None):
"""Get input and output vstreams information.
list of :obj:`hailo_platform.pyhailort._pyhailort.VStreamInfo`: with all the information objects of all input and output vstreams
"""
- name = network_name if network_name is not None else self.name
- return self._hef.get_all_vstream_infos(name)
+ name = network_name if network_name is not None else ""
+ return self._configured_network.get_all_vstream_infos(name)
def get_input_stream_infos(self, network_name=None):
"""Get the input low-level streams information of a specific network group.
of all input low-level streams.
"""
- name = network_name if network_name is not None else self.name
- return self._hef.get_input_stream_infos(name)
+ name = network_name if network_name is not None else ""
+ return self._configured_network.get_input_stream_infos(name)
def get_output_stream_infos(self, network_name=None):
"""Get the output low-level streams information of a specific network group.
of all output low-level streams.
"""
- name = network_name if network_name is not None else self.name
- return self._hef.get_output_stream_infos(name)
+ name = network_name if network_name is not None else ""
+ return self._configured_network.get_output_stream_infos(name)
def get_all_stream_infos(self, network_name=None):
"""Get input and output streams information of a specific network group.
list of :obj:`hailo_platform.pyhailort._pyhailort.StreamInfo`: with all the information objects of all input and output streams
"""
- name = network_name if network_name is not None else self.name
- return self._hef.get_all_stream_infos(name)
+ name = network_name if network_name is not None else ""
+ return self._configured_network.get_all_stream_infos(name)
def get_udp_rates_dict(self, fps, max_supported_rate_bytes):
with ExceptionWrapper():
list of str: All the underlying streams names for the provided vstream name.
"""
with ExceptionWrapper():
- return self._hef.get_stream_names_from_vstream_name(vstream_name, self.name)
+ return self._configured_network.get_stream_names_from_vstream_name(vstream_name)
def get_vstream_names_from_stream_name(self, stream_name):
"""Get vstream names list from their underlying stream name for a specific network group.
list of str: All the matching vstream names for the provided stream name.
"""
with ExceptionWrapper():
- return self._hef.get_vstream_names_from_stream_name(stream_name, self.name)
+ return self._configured_network.get_vstream_names_from_stream_name(stream_name)
def set_scheduler_timeout(self, timeout_ms, network_name=None):
"""Sets the maximum time period that may pass before getting run time from the scheduler,
return self._configured_network.set_scheduler_priority(priority)
+class EmptyContextManager(object):
+ """An empty context manager that returns instead of activated network group when scheduler is enabled`."""
+
+ def __init__(self):
+ pass
+
+ def __enter__(self):
+ pass
+
+ def __exit__(self, *args):
+ pass
+
+
class ActivatedNetworkContextManager(object):
"""A context manager that returns the activated network group upon enter."""
- def __init__(self, configured_network, activated_network, target, hef):
+ def __init__(self, configured_network, activated_network):
self._configured_network = configured_network
self._activated_network = activated_network
- self._target = target
- self._hef = hef
def __enter__(self):
with ExceptionWrapper():
- activated_network_group = ActivatedNetwork(self._configured_network, self._activated_network.__enter__(), self._target,
- self._hef)
+ activated_network_group = ActivatedNetwork(self._configured_network, self._activated_network.__enter__())
return activated_network_group
def __exit__(self, *args):
class ActivatedNetwork(object):
"""The network group that is currently activated for inference."""
- def __init__(self, configured_network, activated_network, target, hef):
+ def __init__(self, configured_network, activated_network):
self._configured_network = configured_network
self._activated_network = activated_network
- self._target = target
- self._hef = hef
self._last_number_of_invalid_frames_read = 0
-
- @property
- def target(self):
- return self._target
@property
def name(self):
raise HailoRTException("There are {} invalid frames.".format(number_of_invalid_frames))
def get_sorted_output_names(self):
- return self._hef.get_sorted_output_names(self.name)
+ return self._configured_network.get_sorted_output_names()
def _get_intermediate_buffer(self, src_context_index, src_stream_index):
with ExceptionWrapper():
``[class_count, BBOX_PARAMS, detections_count]`` padded with empty bboxes.
"""
- self._logger = default_logger()
self._configured_net_group = configured_net_group
self._net_group_name = configured_net_group.name
self._input_vstreams_params = input_vstreams_params
network_name = self._input_name_to_network_name[input_name]
if (network_name not in already_seen_networks) :
already_seen_networks.add(network_name)
+ output_vstream_infos = self._configured_net_group.get_output_vstream_infos()
for output_name in self._network_name_to_outputs[network_name]:
- output_buffers_info[output_name] = OutputLayerUtils(self._configured_net_group._hef, output_name, self._infer_pipeline,
+ output_buffers_info[output_name] = OutputLayerUtils(output_vstream_infos, output_name, self._infer_pipeline,
self._net_group_name)
output_tensor_info = output_buffers_info[output_name].output_tensor_info
shape, dtype = output_tensor_info
are output data tensors as :obj:`numpy.ndarray` (or list of :obj:`numpy.ndarray` in case of nms output and tf_nms_format=False).
"""
- time_before_infer_calcs = time.time()
+ time_before_infer_calcs = time.perf_counter()
if not isinstance(input_data, dict):
input_stream_infos = self._configured_net_group.get_input_stream_infos()
if len(input_stream_infos) != 1:
self._make_c_contiguous_if_needed(input_layer_name, input_data)
with ExceptionWrapper():
- time_before_infer = time.time()
+ time_before_infer = time.perf_counter()
self._infer_pipeline.infer(input_data, output_buffers, batch_size)
- self._hw_time = time.time() - time_before_infer
+ self._hw_time = time.perf_counter() - time_before_infer
for name, result_array in output_buffers.items():
is_nms = output_buffers_info[name].is_nms
else:
output_buffers[name] = HailoRTTransformUtils.output_raw_buffer_to_nms_format(result_array, nms_shape.number_of_classes)
- self._total_time = time.time() - time_before_infer_calcs
+ self._total_time = time.perf_counter() - time_before_infer_calcs
return output_buffers
def get_hw_time(self):
input_expected_dtype = self._infer_pipeline.get_host_dtype(input_layer_name)
if input_dtype != input_expected_dtype:
- self._logger.warning("Given input data dtype ({}) is different than inferred dtype ({}). "
+ default_logger().warning("Given input data dtype ({}) is different than inferred dtype ({}). "
"conversion for every frame will reduce performance".format(input_dtype,
input_expected_dtype))
input_data[input_layer_name] = input_data[input_layer_name].astype(input_expected_dtype)
def _make_c_contiguous_if_needed(self, input_layer_name, input_data):
if not input_data[input_layer_name].flags.c_contiguous:
- self._logger.warning("Converting {} numpy array to be C_CONTIGUOUS".format(
+ default_logger().warning("Converting {} numpy array to be C_CONTIGUOUS".format(
input_layer_name))
input_data[input_layer_name] = numpy.asarray(input_data[input_layer_name], order='C')
return FormatType.FLOAT32
raise HailoRTException("unsupported data type {}".format(dtype))
+# TODO: HRT-10427 - Remove
class InternalEthernetDevice(object):
def __init__(self, address, port, response_timeout_seconds=10, max_number_of_attempts=3):
- # TODO: HRT-9987 - Add this deprecation warning
- # default_logger().warning("InternalEthernetDevice is deprecated! Please use VDevice object.")
self.device = None
self._address = address
self._port = port
except HailoRTException:
raise ArgumentTypeError('Invalid device info string, format is [<domain>]:<bus>:<device>.<func>')
-
+# TODO: HRT-10427 - Remove
class InternalPcieDevice(object):
def __init__(self, device_info=None):
self.device = None
self.device.release()
self.device = None
+ # TODO: HRT-10427 - Move to a static method in pyhailort_internal when InternalPcieDevice removed
@staticmethod
def scan_devices():
with ExceptionWrapper():
with ExceptionWrapper():
return self.device.direct_read_memory(address, size)
-
+# TODO: HRT-10427 - Remove when removing InternalPcieDevice
class PcieDebugLog(object):
def __init__(self, pci_device):
self._pcie_device = pci_device
SUPPORTED_PROTOCOL_VERSION = 2
SUPPORTED_FW_MAJOR = 4
-SUPPORTED_FW_MINOR = 13
+SUPPORTED_FW_MINOR = 14
SUPPORTED_FW_REVISION = 0
MEGA_MULTIPLIER = 1000.0 * 1000.0
def __init__(self, device: '_pyhailort.Device'):
self.__device = device
- self._logger = default_logger()
# TODO: should remove?
if sys.platform != "win32":
"""
gc.collect()
- self._logger = default_logger()
# Device __del__ function tries to release self._device.
# to avoid AttributeError if the __init__ func fails, we set it to None first.
# https://stackoverflow.com/questions/6409644/is-del-called-on-an-object-that-doesnt-complete-init
Args:
hef (:class:`~hailo_platform.pyhailort.pyhailort.HEF`): HEF to configure the vdevice from
configure_params_by_name (dict, optional): Maps between each net_group_name to configure_params. If not provided, default params will be applied
+
+ Note:
+ This function is deprecated. Support will be removed in future versions.
"""
+ default_logger().warning("Usage of Device.configure is deprecated! One should use VDevice for inference")
if self._creation_pid != os.getpid():
raise HailoRTException("Device can only be configured from the process it was created in.")
with ExceptionWrapper():
- configured_apps = self._device.configure(hef._hef, configure_params_by_name)
- configured_networks = [ConfiguredNetwork(configured_app, self, hef) for configured_app in configured_apps]
+ configured_ngs_handles = self._device.configure(hef._hef, configure_params_by_name)
+ configured_networks = [ConfiguredNetwork(configured_ng_handle) for configured_ng_handle in configured_ngs_handles]
self._loaded_network_groups.extend(configured_networks)
return configured_networks
list of all available devices. Excludes 'params'. Cannot be used together with device_id.
"""
gc.collect()
- self._logger = default_logger()
# VDevice __del__ function tries to release self._vdevice.
# to avoid AttributeError if the __init__ func fails, we set it to None first.
if self._creation_pid != os.getpid():
raise HailoRTException("VDevice can only be configured from the process it was created in.")
with ExceptionWrapper():
- configured_apps = self._vdevice.configure(hef._hef, configure_params_by_name)
- configured_networks = [ConfiguredNetwork(configured_app, self, hef) for configured_app in configured_apps]
+ configured_ngs_handles = self._vdevice.configure(hef._hef, configure_params_by_name)
+ configured_networks = [ConfiguredNetwork(configured_ng_handle) for configured_ng_handle in configured_ngs_handles]
self._loaded_network_groups.extend(configured_networks)
return configured_networks
timeout_ms = DEFAULT_VSTREAM_TIMEOUT_MS
if queue_size is None:
queue_size = DEFAULT_VSTREAM_QUEUE_SIZE
- name = network_name if network_name is not None else configured_network.name
+ name = network_name if network_name is not None else ""
with ExceptionWrapper():
- return configured_network._hef._hef.get_input_vstreams_params(name, quantized,
+ return configured_network._configured_network.make_input_vstream_params(name, quantized,
format_type, timeout_ms, queue_size)
@staticmethod
timeout_ms = DEFAULT_VSTREAM_TIMEOUT_MS
if queue_size is None:
queue_size = DEFAULT_VSTREAM_QUEUE_SIZE
- name = network_name if network_name is not None else configured_network.name
+ name = network_name if network_name is not None else ""
with ExceptionWrapper():
- return configured_network._hef._hef.get_output_vstreams_params(name, quantized,
+ return configured_network._configured_network.make_output_vstream_params(name, quantized,
format_type, timeout_ms, queue_size)
@staticmethod
class OutputLayerUtils(object):
- def __init__(self, hef, vstream_name, pipeline, net_group_name=""):
- self._hef = hef
+ def __init__(self, output_vstream_infos, vstream_name, pipeline, net_group_name=""):
+ self._output_vstream_infos = output_vstream_infos
self._vstream_info = self._get_vstream_info(net_group_name, vstream_name)
if isinstance(pipeline, (_pyhailort.InferVStreams)):
return self._quantized_empty_bbox
def _get_vstream_info(self, net_group_name, vstream_name):
- output_vstream_infos = self._hef.get_output_vstream_infos(net_group_name)
- for info in output_vstream_infos:
+ for info in self._output_vstream_infos:
if info.name == vstream_name:
return info
raise HailoRTException("No vstream matches the given name {}".format(vstream_name))
def __init__(self, configured_network, recv_object, name, tf_nms_format=False, net_group_name=""):
self._recv_object = recv_object
- self._output_layer_utils = OutputLayerUtils(configured_network._hef, name, self._recv_object, net_group_name)
+ output_vstream_infos = configured_network.get_output_vstream_infos()
+ self._output_layer_utils = OutputLayerUtils(output_vstream_infos, name, self._recv_object, net_group_name)
self._output_dtype = self._output_layer_utils.output_dtype
self._vstream_info = self._output_layer_utils._vstream_info
self._output_tensor_info = self._output_layer_utils.output_tensor_info
def _after_fork_in_child(self):
for vstream in self._vstreams.values():
vstream._after_fork_in_child()
-
-
-class YOLOv5PostProcessOp(object):
-
- def __init__(self, anchors, shapes, formats, quant_infos, image_height, image_width, confidence_threshold, iou_threshold, num_of_classes,
- max_boxes, cross_classes=True):
-
- self._op = _pyhailort.YOLOv5PostProcessOp.create(anchors, shapes, formats, quant_infos, image_height, image_width, confidence_threshold,
- iou_threshold, num_of_classes, max_boxes, cross_classes)
-
- def execute(self, net_flow_tensors):
- return self._op.execute(net_flow_tensors)
\ No newline at end of file
from builtins import object
-from hailo_platform.pyhailort.pyhailort import ConfiguredNetwork, HEF, TrafficControl, INPUT_DATAFLOW_BASE_PORT
+from hailo_platform.pyhailort.pyhailort import HEF, NetworkRateLimiter, INPUT_DATAFLOW_BASE_PORT
DEFAULT_MAX_KBPS = 850e3
-DEFAULT_MAX_KBPS_PAPRIKA_B0 = 160e3
BYTES_IN_Kbits = 125.0
pass
-def get_max_supported_kbps(hw_arch="hailo8"):
- # TODO: What should be here?
- if hw_arch == "paprika_b0":
- return DEFAULT_MAX_KBPS_PAPRIKA_B0
- return DEFAULT_MAX_KBPS
-
class RateLimiterWrapper(object):
"""UDPRateLimiter wrapper enabling ``with`` statements."""
- def __init__(self, network_group, fps=1, fps_factor=1.0, remote_ip=None, hw_arch=None):
+ def __init__(self, configured_network_group, fps=1, fps_factor=1.0, remote_ip=None):
"""RateLimiterWrapper constructor.
Args:
target network_group.
fps (int): Frame rate.
fps_factor (float): Safety factor by which to multiply the calculated UDP rate.
+ remote_ip (str): Device IP address.
"""
- if not isinstance(network_group, ConfiguredNetwork):
- return RateLimiterException("The API was changed. RateLimiterWrapper accept ConfiguredNetwork instead of ActivatedNetwork")
- self._network_group = network_group
- if remote_ip is not None:
- self._remote_ip = remote_ip
- else:
- # this line should be removed. this parameter will be removed from the object
- self._remote_ip = network_group._target.device_id
+ self._network_group = configured_network_group
+ if remote_ip is None:
+ raise RateLimiterException("In order to use RateLimiterWrapper, one should pass 'remote_ip'")
+ self._remote_ip = remote_ip
self._fps = fps
self._fps_factor = fps_factor
- if hw_arch is not None:
- self._hw_arch = hw_arch
- else:
- # this line should be removed. this parameter will be removed from the object
- self._hw_arch = network_group._target._hw_arch if hasattr(network_group._target, '_hw_arch') else None
self._rates_dict = {}
self._tc_dict = {}
def __enter__(self):
- max_supported_kbps_rate = get_max_supported_kbps(self._hw_arch)
+ max_supported_kbps_rate = DEFAULT_MAX_KBPS
self._rates_dict = self._network_group.get_udp_rates_dict((self._fps * self._fps_factor),
(max_supported_kbps_rate * BYTES_IN_Kbits))
for port, rate in self._rates_dict.items():
- self._tc_dict[port] = TrafficControl(self._remote_ip, port, rate)
+ self._tc_dict[port] = NetworkRateLimiter(self._remote_ip, port, rate)
self._tc_dict[port].reset_rate_limit()
self._tc_dict[port].set_rate_limit()
class UDPRateLimiter(object):
"""Enables limiting or removing limits on UDP communication rate to a board."""
def __init__(self, remote_ip, port, rate_kbits_per_sec = 0):
- self._tc = TrafficControl(remote_ip, port, rate_kbits_per_sec * BYTES_IN_Kbits)
+ self._tc = NetworkRateLimiter(remote_ip, port, rate_kbits_per_sec * BYTES_IN_Kbits)
def set_rate_limit(self):
return self._tc.set_rate_limit()
"outputs": [],
"source": [
"def send(configured_network, num_frames):\n",
- " vstreams_params = InputVStreamParams.make(configured_network)\n",
" configured_network.wait_for_activation(1000)\n",
+ " vstreams_params = InputVStreamParams.make(configured_network)\n",
" with InputVStreams(configured_network, vstreams_params) as vstreams:\n",
" vstream_to_buffer = {vstream: np.ndarray([1] + list(vstream.shape), dtype=vstream.dtype) for vstream in vstreams}\n",
" for _ in range(num_frames):\n",
"source": [
"## Single power measurement"
],
- "cell_type": "markdown",
- "metadata": {}
+ "cell_type": "markdown",
+ "metadata": {}
},
{
"cell_type": "code",
"%matplotlib inline\n",
"import time\n",
"\n",
- "from hailo_platform import PcieDevice, DvmTypes, PowerMeasurementTypes, SamplingPeriod, AveragingFactor, MeasurementBufferIndex # noqa F401\n"
+ "from hailo_platform import Device, DvmTypes, PowerMeasurementTypes, SamplingPeriod, AveragingFactor, MeasurementBufferIndex # noqa F401\n"
]
},
{
"metadata": {},
"outputs": [],
"source": [
- "target = PcieDevice()"
+ "target = Device()"
]
},
{
--- /dev/null
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "\n",
+ "# Python inference tutorial - Multi Process Service and Model Scheduler\n",
+ "\n",
+ "This tutorial will walk you through the inference process using The Model Scheduler.\n",
+ "\n",
+ "**Requirements:**\n",
+ "\n",
+ "* Run HailoRT Multi-Process Service before running inference. See installation steps in [Multi-Process Service](../../inference/inference.rst)\n",
+ "* Run the notebook inside the Python virtual environment: ```source hailo_virtualenv/bin/activate```\n",
+ "\n",
+ "It is recommended to use the command ``hailo tutorial`` (when inside the virtualenv) to open a Jupyter server that contains the tutorials."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Running Inference using HailoRT\n",
+ "\n",
+ "In this example we will use the Model Scheduler to run inference on multiple models.\n",
+ "Each model is represented by an HEF which is built using the Hailo Dataflow Compiler.\n",
+ "An HEF is Hailo's binary format for neural networks. The HEF files contain:\n",
+ "\n",
+ "* Target HW configuration\n",
+ "* Weights\n",
+ "* Metadata for HailoRT (e.g. input/output scaling)\n",
+ "\n",
+ "The Model Scheduler is an HailoRT component that comes to enhance and simplify the usage\n",
+ "of the same Hailo device by multiple networks. The responsibility for activating/deactivating the network\n",
+ "groups is now under HailoRT, and done **automatically** without user application intervention.\n",
+ "In order to use the Model Scheduler, create the VDevice with scheduler enabled, configure all models to the device, and start inference on all models:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import numpy as np\n",
+ "from multiprocessing import Process\n",
+ "from hailo_platform import (HEF, VDevice, HailoStreamInterface, InferVStreams, ConfigureParams,\n",
+ " InputVStreamParams, OutputVStreamParams, InputVStreams, OutputVStreams, FormatType, HailoSchedulingAlgorithm)\n",
+ "\n",
+ "\n",
+ "# Define the function to run inference on the model\n",
+ "def infer(network_group, input_vstreams_params, output_vstreams_params, input_data):\n",
+ " rep_count = 100\n",
+ " with InferVStreams(network_group, input_vstreams_params, output_vstreams_params) as infer_pipeline:\n",
+ " for i in range(rep_count):\n",
+ " infer_results = infer_pipeline.infer(input_data)\n",
+ "\n",
+ "\n",
+ "# Loading compiled HEFs:\n",
+ "first_hef_path = '../hefs/resnet_v1_18.hef'\n",
+ "second_hef_path = '../hefs/shortcut_net.hef'\n",
+ "first_hef = HEF(first_hef_path)\n",
+ "second_hef = HEF(second_hef_path)\n",
+ "hefs = [first_hef, second_hef]\n",
+ "\n",
+ "# Creating the VDevice target with scheduler enabled\n",
+ "params = VDevice.create_params()\n",
+ "params.scheduling_algorithm = HailoSchedulingAlgorithm.ROUND_ROBIN\n",
+ "with VDevice(params) as target:\n",
+ " infer_processes = []\n",
+ "\n",
+ " # Configure network groups\n",
+ " for hef in hefs:\n",
+ " configure_params = ConfigureParams.create_from_hef(hef=hef, interface=HailoStreamInterface.PCIe)\n",
+ " network_groups = target.configure(hef, configure_params)\n",
+ " network_group = network_groups[0]\n",
+ "\n",
+ " # Create input and output virtual streams params\n",
+ " # Quantized argument signifies whether or not the incoming data is already quantized.\n",
+ " # Data is quantized by HailoRT if and only if quantized == False.\n",
+ " input_vstreams_params = InputVStreamParams.make(network_group, quantized=False, format_type=FormatType.FLOAT32)\n",
+ " output_vstreams_params = OutputVStreamParams.make(network_group, quantized=True, format_type=FormatType.UINT8)\n",
+ "\n",
+ " # Define dataset params\n",
+ " input_vstream_info = hef.get_input_vstream_infos()[0]\n",
+ " image_height, image_width, channels = input_vstream_info.shape\n",
+ " num_of_frames = 10\n",
+ " low, high = 2, 20\n",
+ "\n",
+ " # Generate random dataset\n",
+ " dataset = np.random.randint(low, high, (num_of_frames, image_height, image_width, channels)).astype(np.float32)\n",
+ " input_data = {input_vstream_info.name: dataset}\n",
+ "\n",
+ " # Create infer process\n",
+ " infer_process = Process(target=infer, args=(network_group, input_vstreams_params, output_vstreams_params, input_data))\n",
+ " infer_processes.append(infer_process)\n",
+ "\n",
+ " print(f'Starting streaming on multiple models using scheduler')\n",
+ " for infer_process in infer_processes:\n",
+ " infer_process.start()\n",
+ " for infer_process in infer_processes:\n",
+ " infer_process.join()\n",
+ "\n",
+ " print('Done inference')"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.8.10"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
appdirs==1.4.4
argcomplete==2.0.0
contextlib2==0.6.0.post1
-distlib==0.3.4
-filelock==3.4.1
+distlib==0.3.6
+filelock==3.8.0
future==0.18.2
importlib-metadata==5.1.0
importlib-resources==5.1.2
numpy==1.23.3
typing_extensions==4.1.1
verboselogs==1.7
-virtualenv==20.4.3
+virtualenv==20.17.0
"linux_aarch64",
],
url="https://hailo.ai/",
- version="4.13.0",
+ version="4.14.0",
zip_safe=False,
)
cmake_minimum_required(VERSION 3.0.0)
-option(HAILO_BUILD_PYHAILORT_INTERNAL OFF)
+include(ExternalProject)
+
+FUNCTION(exclude_archive_libs_symbols target) # should be same as in common_compiler_options.cmake
+ if(WIN32)
+ # TODO: check if there are required actions for Windows
+ elseif(UNIX)
+ get_property(TEMP_LINK_FLAGS TARGET ${target} PROPERTY LINK_FLAGS)
+ set(TEMP_LINK_FLAGS "${TEMP_LINK_FLAGS} -Wl,--exclude-libs=ALL")
+ set_property(TARGET ${target} PROPERTY LINK_FLAGS ${TEMP_LINK_FLAGS})
+ else()
+ message(FATAL_ERROR "Unexpeced host, stopping build")
+ endif()
+ENDFUNCTION()
if(${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
if(NOT DEFINED PYBIND11_PYTHON_VERSION)
message(FATAL_ERROR "PYBIND11_PYTHON_VERSION is not defined. To build _pyhailort, pass python version")
endif()
-
string(REPLACE "." "" dpython ${PYBIND11_PYTHON_VERSION}) # E.g "3.5" -> "35"
if(${dpython} LESS "38")
set(m_flag "m")
set(PYTHON_MODULE_EXTENSION ".cpython-${dpython}${m_flag}-${CMAKE_SYSTEM_PROCESSOR}-linux-gnu.so")
endif()
+option(HAILO_BUILD_PYHAILORT_INTERNAL OFF)
+
set(PYHAILORT_DIR ${CMAKE_CURRENT_LIST_DIR})
pybind11_add_module(_pyhailort
hef_api.cpp
vstream_api.cpp
quantization_api.cpp
- ${HAILORT_OPS_CPP_SOURCES}
- ${HAILORT_COMMON_CPP_SOURCES}
)
set_target_properties(_pyhailort PROPERTIES
CXX_STANDARD 14
CXX_STANDARD_REQUIRED YES
+ CXX_EXTENSIONS NO
+ C_VISIBILITY_PRESET hidden
+ CXX_VISIBILITY_PRESET hidden
+ # VISIBILITY_INLINES_HIDDEN YES
)
-target_include_directories(_pyhailort
- PRIVATE
- $<BUILD_INTERFACE:${HAILORT_INC_DIR}>
- $<BUILD_INTERFACE:${HAILORT_COMMON_DIR}>
- $<BUILD_INTERFACE:${HAILORT_SRC_DIR}>
- $<BUILD_INTERFACE:${COMMON_INC_DIR}>
-)
+find_package(HailoRT 4.14.0 EXACT REQUIRED)
-target_link_libraries(_pyhailort PRIVATE libhailort spdlog::spdlog)
+target_link_libraries(_pyhailort PRIVATE HailoRT::libhailort)
if(WIN32)
- target_link_libraries(_pyhailort PRIVATE Ws2_32 Iphlpapi Shlwapi)
-endif()
-if(HAILO_BUILD_SERVICE)
- target_link_libraries(_pyhailort PRIVATE grpc++_unsecure hailort_rpc_grpc_proto hef_proto)
+ target_link_libraries(_pyhailort PRIVATE Ws2_32)
+ target_compile_options(_pyhailort PRIVATE
+ /DWIN32_LEAN_AND_MEAN
+ /DNOMINMAX # NOMINMAX is required in order to play nice with std::min/std::max (otherwise Windows.h defines it's own)
+ /wd4201 /wd4251
+ )
endif()
target_compile_options(_pyhailort PRIVATE ${HAILORT_COMPILE_OPTIONS})
#include "hailo/hailort_common.hpp"
#include "hailo/network_group.hpp"
-#include "common/logger_macros.hpp"
-
#include "utils.hpp"
#include <pybind11/numpy.h>
**/
#include "device_api.hpp"
+#include <memory>
namespace hailort
DeviceWrapper DeviceWrapper::create_eth(const std::string &device_address, uint16_t port,
uint32_t timeout_milliseconds, uint8_t max_number_of_attempts)
{
- hailo_eth_device_info_t device_info = {};
-
- /* Validate address length */
- if (INET_ADDRSTRLEN < device_address.size()) {
- EXIT_WITH_ERROR("device_address is too long")
- }
-
- device_info.host_address.sin_family = AF_INET;
- device_info.host_address.sin_port = HAILO_ETH_PORT_ANY;
- auto status = Socket::pton(AF_INET, HAILO_ETH_ADDRESS_ANY, &(device_info.host_address.sin_addr));
- VALIDATE_STATUS(status);
-
- device_info.device_address.sin_family = AF_INET;
- device_info.device_address.sin_port = port;
- status = Socket::pton(AF_INET, device_address.c_str(), &(device_info.device_address.sin_addr));
- VALIDATE_STATUS(status);
-
- device_info.timeout_millis = timeout_milliseconds;
- device_info.max_number_of_attempts = max_number_of_attempts;
- device_info.max_payload_size = HAILO_DEFAULT_ETH_MAX_PAYLOAD_SIZE;
-
- auto device = Device::create_eth(device_info);
+ auto device = Device::create_eth(device_address, port, timeout_milliseconds, max_number_of_attempts);
VALIDATE_EXPECTED(device);
return DeviceWrapper(device.release());
py::bytes DeviceWrapper::read_memory(uint32_t address, uint32_t length)
{
- std::unique_ptr<std::string> response = make_unique_nothrow<std::string>(length, '\x00');
+ std::unique_ptr<std::string> response = std::make_unique<std::string>(length, '\x00');
VALIDATE_NOT_NULL(response, HAILO_OUT_OF_HOST_MEMORY);
MemoryView data_view(const_cast<uint8_t*>(reinterpret_cast<const uint8_t*>(response->data())), length);
{
VALIDATE_NOT_NULL(slave_config, HAILO_INVALID_ARGUMENT);
- std::unique_ptr<std::string> response = make_unique_nothrow<std::string>(length, '\x00');
+ std::unique_ptr<std::string> response = std::make_unique<std::string>(length, '\x00');
VALIDATE_NOT_NULL(response, HAILO_OUT_OF_HOST_MEMORY);
MemoryView data_view(const_cast<uint8_t*>(reinterpret_cast<const uint8_t*>(response->data())), length);
auto config_buffer = device().read_user_config();
VALIDATE_EXPECTED(config_buffer);
- std::unique_ptr<std::string> response = make_unique_nothrow<std::string>(
+ std::unique_ptr<std::string> response = std::make_unique<std::string>(
const_cast<char*>(reinterpret_cast<const char*>(config_buffer->data())), config_buffer->size());
VALIDATE_NOT_NULL(response, HAILO_OUT_OF_HOST_MEMORY);
auto config_buffer = device().read_board_config();
VALIDATE_EXPECTED(config_buffer);
- std::unique_ptr<std::string> response = make_unique_nothrow<std::string>(
+ std::unique_ptr<std::string> response = std::make_unique<std::string>(
const_cast<char*>(reinterpret_cast<const char*>(config_buffer->data())), config_buffer->size());
VALIDATE_NOT_NULL(response, HAILO_OUT_OF_HOST_MEMORY);
auto buffer = device().sensor_get_sections_info();
VALIDATE_EXPECTED(buffer);
- std::unique_ptr<std::string> response = make_unique_nothrow<std::string>(
+ std::unique_ptr<std::string> response = std::make_unique<std::string>(
const_cast<char*>(reinterpret_cast<const char*>(buffer->data())), buffer->size());
VALIDATE_NOT_NULL(response, HAILO_OUT_OF_HOST_MEMORY);
#define _DEVICE_API_HPP_
#include "hailo/hailort.h"
+#include <hailo/platform.h>
#include "hailo/device.hpp"
-#include "common/socket.hpp"
-
#include "utils.hpp"
#include "hef_api.hpp"
**/
#include "hef_api.hpp"
+#include <memory>
namespace hailort
auto hef_expected = Hef::create(hef_path);
VALIDATE_EXPECTED(hef_expected);
- hef = make_unique_nothrow<Hef>(hef_expected.release());
+ hef = std::make_unique<Hef>(hef_expected.release());
if (nullptr == hef) {
THROW_STATUS_ERROR(HAILO_OUT_OF_HOST_MEMORY);
}
auto hef_expected = Hef::create(hef_buffer);
VALIDATE_EXPECTED(hef_expected);
- hef = make_unique_nothrow<Hef>(hef_expected.release());
+ hef = std::make_unique<Hef>(hef_expected.release());
if (nullptr == hef) {
THROW_STATUS_ERROR(HAILO_OUT_OF_HOST_MEMORY);
}
.def("get_networks_names", &HefWrapper::get_networks_names)
;
- py::class_<ConfiguredNetworkGroup>(m, "ConfiguredNetworkGroup")
+ py::class_<ConfiguredNetworkGroup, std::shared_ptr<ConfiguredNetworkGroup>>(m, "ConfiguredNetworkGroup")
+ .def("is_scheduled", [](ConfiguredNetworkGroup& self)
+ {
+ return self.is_scheduled();
+ })
.def("get_name", [](ConfiguredNetworkGroup& self)
{
return self.name();
})
.def("before_fork", [](ConfiguredNetworkGroup& self)
{
-#ifdef HAILO_SUPPORT_MULTI_PROCESS
auto status = self.before_fork();
VALIDATE_STATUS(status);
-#else
- (void)self;
-#endif // HAILO_SUPPORT_MULTI_PROCESS
})
.def("after_fork_in_parent", [](ConfiguredNetworkGroup& self)
{
-#ifdef HAILO_SUPPORT_MULTI_PROCESS
auto status = self.after_fork_in_parent();
VALIDATE_STATUS(status);
-#else
- (void)self;
-#endif // HAILO_SUPPORT_MULTI_PROCESS
})
.def("after_fork_in_child", [](ConfiguredNetworkGroup& self)
{
-#ifdef HAILO_SUPPORT_MULTI_PROCESS
auto status = self.after_fork_in_child();
VALIDATE_STATUS(status);
-#else
- (void)self;
-#endif // HAILO_SUPPORT_MULTI_PROCESS
})
.def("set_scheduler_timeout", [](ConfiguredNetworkGroup& self, int timeout, const std::string &network_name="")
{
auto status = self.set_scheduler_priority(priority);
VALIDATE_STATUS(status);
})
+ .def("get_networks_names", [](ConfiguredNetworkGroup& self)
+ {
+ auto network_infos = self.get_network_infos();
+ VALIDATE_EXPECTED(network_infos);
+ std::vector<std::string> result;
+ result.reserve(network_infos->size());
+ for (const auto &info : network_infos.value()) {
+ result.push_back(info.name);
+ }
+ return py::cast(result);
+ })
+ .def("get_sorted_output_names", [](ConfiguredNetworkGroup& self)
+ {
+ auto names_list = self.get_sorted_output_names();
+ VALIDATE_EXPECTED(names_list);
+ return py::cast(names_list.release());
+ })
+ .def("get_input_vstream_infos", [](ConfiguredNetworkGroup& self, const std::string &name)
+ {
+ auto result = self.get_input_vstream_infos(name);
+ VALIDATE_EXPECTED(result);
+ return py::cast(result.value());
+ })
+ .def("get_output_vstream_infos", [](ConfiguredNetworkGroup& self, const std::string &name)
+ {
+ auto result = self.get_output_vstream_infos(name);
+ VALIDATE_EXPECTED(result);
+ return py::cast(result.value());
+ })
+ .def("get_all_vstream_infos", [](ConfiguredNetworkGroup& self, const std::string &name)
+ {
+ auto result = self.get_all_vstream_infos(name);
+ VALIDATE_EXPECTED(result);
+ return py::cast(result.value());
+ })
+ .def("get_all_stream_infos", [](ConfiguredNetworkGroup& self, const std::string &name)
+ {
+ auto result = self.get_all_stream_infos(name);
+ VALIDATE_EXPECTED(result);
+ return py::cast(result.value());
+ })
+ .def("get_input_stream_infos", [](ConfiguredNetworkGroup& self, const std::string &name)
+ {
+ std::vector<hailo_stream_info_t> input_streams_infos;
+ auto all_streams = self.get_all_stream_infos(name);
+ VALIDATE_EXPECTED(all_streams);
+ for (auto &info : all_streams.value()) {
+ if (HAILO_H2D_STREAM == info.direction) {
+ input_streams_infos.push_back(std::move(info));
+ }
+ }
+ return py::cast(input_streams_infos);
+ })
+ .def("get_output_stream_infos", [](ConfiguredNetworkGroup& self, const std::string &name)
+ {
+ std::vector<hailo_stream_info_t> output_streams_infos;
+ auto all_streams = self.get_all_stream_infos(name);
+ VALIDATE_EXPECTED(all_streams);
+ for (auto &info : all_streams.value()) {
+ if (HAILO_D2H_STREAM == info.direction) {
+ output_streams_infos.push_back(std::move(info));
+ }
+ }
+ return py::cast(output_streams_infos);
+ })
+ .def("get_vstream_names_from_stream_name", [](ConfiguredNetworkGroup& self, const std::string &stream_name)
+ {
+ auto result = self.get_vstream_names_from_stream_name(stream_name);
+ VALIDATE_EXPECTED(result);
+ return py::cast(result.release());
+ })
+ .def("get_stream_names_from_vstream_name", [](ConfiguredNetworkGroup& self, const std::string &vstream_name)
+ {
+ auto result = self.get_stream_names_from_vstream_name(vstream_name);
+ VALIDATE_EXPECTED(result);
+ return py::cast(result.release());
+ })
+ .def("make_input_vstream_params", [](ConfiguredNetworkGroup& self, const std::string &name, bool quantized, hailo_format_type_t format_type,
+ uint32_t timeout_ms, uint32_t queue_size)
+ {
+ auto result = self.make_input_vstream_params(quantized, format_type, timeout_ms, queue_size, name);
+ VALIDATE_EXPECTED(result);
+ return py::cast(result.release());
+ })
+ .def("make_output_vstream_params", [](ConfiguredNetworkGroup& self, const std::string &name, bool quantized, hailo_format_type_t format_type,
+ uint32_t timeout_ms, uint32_t queue_size)
+ {
+ auto result = self.make_output_vstream_params(quantized, format_type, timeout_ms, queue_size, name);
+ VALIDATE_EXPECTED(result);
+ return py::cast(result.release());
+ })
+ .def(py::pickle(
+ [](const ConfiguredNetworkGroup &cng) { // __getstate__
+ auto handle = cng.get_client_handle();
+ VALIDATE_EXPECTED(handle);
+ return py::make_tuple(handle.value(), cng.name());
+ },
+ [](py::tuple t) { // __setstate__
+ auto handle = t[0].cast<uint32_t>();
+ auto net_group_name = t[1].cast<std::string>();
+ auto net_group = ConfiguredNetworkGroup::duplicate_network_group_client(handle, net_group_name);
+ VALIDATE_EXPECTED(net_group);
+
+ return net_group.value();
+ }
+ ))
;
ActivatedAppContextManagerWrapper::add_to_python_module(m);
#include "vstream_api.hpp"
#include "utils.hpp"
-#include "common/logger_macros.hpp"
#include <pybind11/pybind11.h>
#include <pybind11/numpy.h>
auto buffer = get_expected_buffer_float32();
VALIDATE_EXPECTED(buffer);
+ auto type = py::dtype(HailoRTBindingsCommon::convert_format_type_to_string(HAILO_FORMAT_TYPE_FLOAT32));
+ auto shape = *py::array::ShapeContainer({buffer->size()});
+
// Note: The ownership of the buffer is transferred to Python wrapped as a py::array.
// When the py::array isn't referenced anymore in Python and is destructed, the py::capsule's dtor
// is called too (and it deletes the raw buffer)
- auto type = py::dtype(HailoRTBindingsCommon::convert_format_type_to_string(HAILO_FORMAT_TYPE_FLOAT32));
- auto shape = *py::array::ShapeContainer({buffer->size()});
- const auto unmanaged_addr = buffer.release().release();
+ auto unmanaged_addr_exp = buffer->storage().release();
+ VALIDATE_EXPECTED(unmanaged_addr_exp);
+ const auto unmanaged_addr = unmanaged_addr_exp.release();
return py::array(type, shape, unmanaged_addr,
py::capsule(unmanaged_addr, [](void *p) { delete reinterpret_cast<uint8_t*>(p); }));
}
auto core_op_metadata = hef.hef_ptr()->pimpl->get_core_op_metadata(net_group_name);
VALIDATE_EXPECTED(core_op_metadata);
- return py::cast(core_op_metadata->get_all_layer_infos());
+ return py::cast(core_op_metadata.value()->get_all_layer_infos());
}
PYBIND11_MODULE(_pyhailort_internal, m) {
.def_readonly("cluster_index", &BufferIndices::cluster_index)
;
+ py::enum_<SENSOR_CONFIG_OPCODES_t>(m, "SensorConfigOpCode")
+ .value("SENSOR_CONFIG_OPCODES_WR", SENSOR_CONFIG_OPCODES_WR)
+ .value("SENSOR_CONFIG_OPCODES_RD", SENSOR_CONFIG_OPCODES_RD)
+ .value("SENSOR_CONFIG_OPCODES_RMW", SENSOR_CONFIG_OPCODES_RMW)
+ .value("SENSOR_CONFIG_OPCODES_DELAY", SENSOR_CONFIG_OPCODES_DELAY)
+ ;
+
py::class_<LayerInfo>(m, "HailoLayerInfo", py::module_local())
.def_readonly("is_mux", &LayerInfo::is_mux)
.def_readonly("mux_predecessors", &LayerInfo::predecessor)
+++ /dev/null
-/**
- * Copyright (c) 2022 Hailo Technologies Ltd. All rights reserved.
- * Distributed under the MIT license (https://opensource.org/licenses/MIT)
-**/
-/**
- * @file net_flow_api.hpp
- * @brief Defines binding to a HailoRT++ ops usage over Python.
- **/
-
-#ifndef _HAILO_NET_FLOW_API_HPP_
-#define _HAILO_NET_FLOW_API_HPP_
-
-#include "hailo/hailort.h"
-
-#include "net_flow/ops/yolo_post_process.hpp"
-
-#include "utils.hpp"
-#include "bindings_common.hpp"
-
-
-namespace hailort
-{
-namespace net_flow
-{
-
-class YOLOv5PostProcessOpWrapper
-{
-public:
- static YOLOv5PostProcessOpWrapper create(const std::vector<std::vector<int>> &anchors,
- const std::vector<hailo_3d_image_shape_t> &shapes, const std::vector<hailo_format_t> &formats,
- const std::vector<hailo_quant_info_t> &quant_infos, float32_t image_height, float32_t image_width, float32_t confidence_threshold,
- float32_t iou_threshold, uint32_t num_of_classes, uint32_t max_boxes,
- bool cross_classes=true)
- {
- std::map<std::string, net_flow::BufferMetaData> inputs_metadata;
- std::map<std::string, net_flow::BufferMetaData> outputs_metadata;
-
- net_flow::NmsPostProcessConfig nms_post_process_config{};
- nms_post_process_config.nms_score_th = confidence_threshold;
- nms_post_process_config.nms_iou_th = iou_threshold;
- nms_post_process_config.max_proposals_per_class = max_boxes;
- nms_post_process_config.classes = num_of_classes;
- nms_post_process_config.background_removal = false;
- nms_post_process_config.background_removal_index = 0;
- nms_post_process_config.cross_classes = cross_classes;
- net_flow::YoloPostProcessConfig yolo_post_process_config{};
- yolo_post_process_config.image_height = image_height;
- yolo_post_process_config.image_width = image_width;
- // Each layer anchors vector is structured as {w,h} pairs.
- for (size_t i = 0; i < anchors.size(); ++i) {
- auto name = std::to_string(i);
- yolo_post_process_config.anchors.insert({name, anchors[i]});
- BufferMetaData input_metadata = {
- shapes[i],
- shapes[i],
- formats[i],
- quant_infos[i]
- };
- inputs_metadata.insert({name, input_metadata});
- }
- auto op = YOLOv5PostProcessOp::create(inputs_metadata, outputs_metadata, nms_post_process_config, yolo_post_process_config);
- VALIDATE_EXPECTED(op);
-
- return YOLOv5PostProcessOpWrapper(op.release(), num_of_classes, max_boxes);
- }
-
- static void add_to_python_module(py::module &m)
- {
- py::class_<YOLOv5PostProcessOpWrapper>(m, "YOLOv5PostProcessOp")
- .def("create", &YOLOv5PostProcessOpWrapper::create)
- .def("execute",[](YOLOv5PostProcessOpWrapper &self, const std::vector<py::array> &tensors)
- {
- std::map<std::string, MemoryView> data_views;
- for (size_t i = 0; i < tensors.size(); ++i) {
- data_views.insert({std::to_string(i),
- MemoryView(const_cast<void*>(reinterpret_cast<const void*>(tensors[i].data())), tensors[i].nbytes())});
- }
-
- hailo_nms_info_t nms_info = {
- self.m_num_of_classes,
- self.m_max_boxes,
- sizeof(hailo_bbox_float32_t),
- 1,
- false,
- hailo_nms_defuse_info_t()
- };
- hailo_format_t output_format = {
- HAILO_FORMAT_TYPE_FLOAT32,
- HAILO_FORMAT_ORDER_HAILO_NMS,
- HAILO_FORMAT_FLAGS_QUANTIZED,
- };
-
- auto buffer = Buffer::create(HailoRTCommon::get_nms_host_frame_size(nms_info, output_format), 0);
- VALIDATE_STATUS(buffer.status());
- std::map<std::string, MemoryView> outputs;
- outputs.insert({"", MemoryView(buffer.value().data(), buffer.value().size())});
- auto status = self.m_post_processing_op->execute(data_views, outputs);
- VALIDATE_STATUS(status);
-
- // Note: The ownership of the buffer is transferred to Python wrapped as a py::array.
- // When the py::array isn't referenced anymore in Python and is destructed, the py::capsule's dtor
- // is called too (and it deletes the raw buffer)
- auto type = py::dtype(HailoRTBindingsCommon::convert_format_type_to_string(HAILO_FORMAT_TYPE_FLOAT32));
- auto shape = *py::array::ShapeContainer({buffer.value().size()});
- const auto unmanaged_addr = buffer.release().release();
- return py::array(type, shape, unmanaged_addr,
- py::capsule(unmanaged_addr, [](void *p) { delete reinterpret_cast<uint8_t*>(p); }));
- })
- ;
- }
-
-private:
- YOLOv5PostProcessOpWrapper(std::shared_ptr<Op> post_processing_op, uint32_t num_of_classes, uint32_t max_bboxes)
- : m_post_processing_op(post_processing_op),
- m_num_of_classes(num_of_classes),
- m_max_boxes(max_bboxes) {}
-
- std::shared_ptr<Op> m_post_processing_op;
- uint32_t m_num_of_classes = 0;
- uint32_t m_max_boxes = 0;
-};
-
-void NetFlow_api_initialize_python_module(py::module &m)
-{
- YOLOv5PostProcessOpWrapper::add_to_python_module(m);
-}
-
-
-} /* namespace net_flow */
-} /* namespace hailort */
-
-#endif /* _HAILO_NET_FLOW_API_HPP_ */
#include "hailo/hailort.h"
#include "hailo/hailort_defaults.hpp"
+#include "hailo/network_rate_calculator.hpp"
#include "hef_api.hpp"
#include "vstream_api.hpp"
#include "vdevice_api.hpp"
#include "device_api.hpp"
#include "quantization_api.hpp"
-#include "net_flow_api.hpp"
#include "utils.hpp"
-#include "utils.h"
#include "bindings_common.hpp"
-#include "sensor_config_exports.h"
-#if defined(__GNUC__)
-#include "common/os/posix/traffic_control.hpp"
-#endif
+// should be same as socket.hpp
+#define PADDING_BYTES_SIZE (6)
+#define PADDING_ALIGN_BYTES (8 - PADDING_BYTES_SIZE)
+#define MIN_UDP_PAYLOAD_SIZE (24)
+#define MAX_UDP_PAYLOAD_SIZE (1456)
+#define MAX_UDP_PADDED_PAYLOAD_SIZE (MAX_UDP_PAYLOAD_SIZE - PADDING_BYTES_SIZE - PADDING_ALIGN_BYTES)
namespace hailort
{
}
}
-#if defined(__GNUC__)
-
-class TrafficControlUtilWrapper final
+class NetworkRateLimiter final
{
public:
- static TrafficControlUtilWrapper create(const std::string &ip, uint16_t port, uint32_t rate_bytes_per_sec)
+ static void set_rate_limit(const std::string &ip, uint16_t port, uint32_t rate_bytes_per_sec)
{
- auto tc_expected = TrafficControlUtil::create(ip, port, rate_bytes_per_sec);
- VALIDATE_STATUS(tc_expected.status());
-
- auto tc_ptr = make_unique_nothrow<TrafficControlUtil>(tc_expected.release());
- if (nullptr == tc_ptr) {
- VALIDATE_STATUS(HAILO_OUT_OF_HOST_MEMORY);
- }
- return TrafficControlUtilWrapper(std::move(tc_ptr));
+ VALIDATE_STATUS(NetworkUdpRateCalculator::set_rate_limit(ip, port, rate_bytes_per_sec));
}
- void set_rate_limit()
+ static void reset_rate_limit(const std::string &ip, uint16_t port)
{
- VALIDATE_STATUS(m_tc->set_rate_limit());
- }
-
- void reset_rate_limit()
- {
- VALIDATE_STATUS(m_tc->reset_rate_limit());
+ VALIDATE_STATUS(NetworkUdpRateCalculator::reset_rate_limit(ip, port));
}
static std::string get_interface_name(const std::string &ip)
{
- auto name = TrafficControlUtil::get_interface_name(ip);
+ auto name = NetworkUdpRateCalculator::get_interface_name(ip);
VALIDATE_STATUS(name.status());
return name.value();
static void add_to_python_module(py::module &m)
{
- py::class_<TrafficControlUtilWrapper>(m, "TrafficControlUtil")
- .def(py::init(&TrafficControlUtilWrapper::create))
- .def("set_rate_limit", &TrafficControlUtilWrapper::set_rate_limit)
- .def("reset_rate_limit", &TrafficControlUtilWrapper::reset_rate_limit)
+ py::class_<NetworkRateLimiter>(m, "NetworkRateLimiter")
+ .def("set_rate_limit", &NetworkRateLimiter::set_rate_limit)
+ .def("reset_rate_limit", &NetworkRateLimiter::reset_rate_limit)
.def_static("get_interface_name", [](const std::string &ip) {
- return TrafficControlUtilWrapper::get_interface_name(ip);
+ return NetworkRateLimiter::get_interface_name(ip);
})
;
}
-
-private:
- TrafficControlUtilWrapper(std::unique_ptr<TrafficControlUtil> tc) :
- m_tc(std::move(tc))
- {}
-
- std::unique_ptr<TrafficControlUtil> m_tc;
};
-#endif
-
static void validate_versions_match()
{
hailo_version_t libhailort_version = {};
.value("HAILO8_ISP", HAILO_SENSOR_TYPES_HAILO8_ISP)
;
- py::enum_<SENSOR_CONFIG_OPCODES_t>(m, "SensorConfigOpCode")
- .value("SENSOR_CONFIG_OPCODES_WR", SENSOR_CONFIG_OPCODES_WR)
- .value("SENSOR_CONFIG_OPCODES_RD", SENSOR_CONFIG_OPCODES_RD)
- .value("SENSOR_CONFIG_OPCODES_RMW", SENSOR_CONFIG_OPCODES_RMW)
- .value("SENSOR_CONFIG_OPCODES_DELAY", SENSOR_CONFIG_OPCODES_DELAY)
- ;
-
py::class_<hailo_i2c_slave_config_t>(m, "I2CSlaveConfig")
.def(py::init<>())
.def_readwrite("endianness", &hailo_i2c_slave_config_t::endianness)
.value("MIPI", HAILO_STREAM_INTERFACE_MIPI)
;
+ py::enum_<hailo_vstream_stats_flags_t>(m, "VStreamStatsFlags")
+ .value("NONE", hailo_vstream_stats_flags_t::HAILO_VSTREAM_STATS_NONE)
+ .value("MEASURE_FPS", hailo_vstream_stats_flags_t::HAILO_VSTREAM_STATS_MEASURE_FPS)
+ .value("MEASURE_LATENCY", hailo_vstream_stats_flags_t::HAILO_VSTREAM_STATS_MEASURE_LATENCY)
+ ;
+
+ py::enum_<hailo_pipeline_elem_stats_flags_t>(m, "PipelineElemStatsFlags")
+ .value("NONE", hailo_pipeline_elem_stats_flags_t::HAILO_PIPELINE_ELEM_STATS_NONE)
+ .value("MEASURE_FPS", hailo_pipeline_elem_stats_flags_t::HAILO_PIPELINE_ELEM_STATS_MEASURE_FPS)
+ .value("MEASURE_LATENCY", hailo_pipeline_elem_stats_flags_t::HAILO_PIPELINE_ELEM_STATS_MEASURE_LATENCY)
+ .value("MEASURE_QUEUE_SIZE", hailo_pipeline_elem_stats_flags_t::HAILO_PIPELINE_ELEM_STATS_MEASURE_QUEUE_SIZE)
+ ;
+
py::class_<hailo_vstream_params_t>(m, "VStreamParams")
.def(py::init<>())
.def_readwrite("user_buffer_format", &hailo_vstream_params_t::user_buffer_format)
.def_readwrite("timeout_ms", &hailo_vstream_params_t::timeout_ms)
.def_readwrite("queue_size", &hailo_vstream_params_t::queue_size)
+ .def_readonly("vstream_stats_flags", &hailo_vstream_params_t::vstream_stats_flags)
+ .def_readonly("pipeline_elements_stats_flags", &hailo_vstream_params_t::pipeline_elements_stats_flags)
+ .def(py::pickle(
+ [](const hailo_vstream_params_t &vstream_params) { // __getstate__
+ return py::make_tuple(
+ vstream_params.user_buffer_format,
+ vstream_params.timeout_ms,
+ vstream_params.queue_size,
+ vstream_params.vstream_stats_flags,
+ vstream_params.pipeline_elements_stats_flags);
+ },
+ [](py::tuple t) { // __setstate__
+ hailo_vstream_params_t vstream_params;
+ vstream_params.user_buffer_format = t[0].cast<hailo_format_t>();
+ vstream_params.timeout_ms = t[1].cast<uint32_t>();
+ vstream_params.queue_size = t[2].cast<uint32_t>();
+ vstream_params.vstream_stats_flags = t[3].cast<hailo_vstream_stats_flags_t>();
+ vstream_params.pipeline_elements_stats_flags = t[4].cast<hailo_pipeline_elem_stats_flags_t>();
+ return vstream_params;
+ }
+ ))
;
py::enum_<hailo_latency_measurement_flags_t>(m, "LatencyMeasurementFlags")
},
[](VDeviceParamsWrapper& params, const uint32_t& device_count) {
params.orig_params.device_count = device_count;
- }
+ }
)
.def_property("scheduling_algorithm",
[](const VDeviceParamsWrapper& params) -> uint32_t {
},
[](VDeviceParamsWrapper& params, hailo_scheduling_algorithm_t scheduling_algorithm) {
params.orig_params.scheduling_algorithm = scheduling_algorithm;
- }
+ params.orig_params.multi_process_service = (HAILO_SCHEDULING_ALGORITHM_NONE != scheduling_algorithm);
+ }
)
.def_property("group_id",
[](const VDeviceParamsWrapper& params) -> py::str {
params.orig_params.group_id = params.group_id_str.c_str();
}
)
- .def_property("multi_process_service",
- [](const VDeviceParamsWrapper& params) -> uint32_t {
+ .def_property_readonly("multi_process_service",
+ [](const VDeviceParamsWrapper& params) -> bool {
return params.orig_params.multi_process_service;
- },
- [](VDeviceParamsWrapper& params, bool multi_process_service) {
- params.orig_params.multi_process_service = multi_process_service;
}
)
.def_static("default", []() {
VStream_api_initialize_python_module(m);
VDevice_api_initialize_python_module(m);
DeviceWrapper::add_to_python_module(m);
- hailort::net_flow::NetFlow_api_initialize_python_module(m);
- #if defined(__GNUC__)
- TrafficControlUtilWrapper::add_to_python_module(m);
- #endif
+ NetworkRateLimiter::add_to_python_module(m);
std::stringstream version;
version << HAILORT_MAJOR_VERSION << "." << HAILORT_MINOR_VERSION << "." << HAILORT_REVISION_VERSION;
#include "quantization_api.hpp"
#include "bindings_common.hpp"
+#include <iostream>
+
namespace hailort
{
static_cast<float32_t*>(dst_buffer.mutable_data()), shape_size, quant_info);
break;
default:
- LOGGER__ERROR("Output quantization isn't supported from src format type uint8 to dst format type = {}",
- HailoRTBindingsCommon::convert_format_type_to_string(dst_dtype));
+ std::cerr << "Output quantization isn't supported from src format type uint8 to dst format type = " << HailoRTBindingsCommon::convert_format_type_to_string(dst_dtype);
THROW_STATUS_ERROR(HAILO_INVALID_ARGUMENT);
break;
}
static_cast<float32_t*>(dst_buffer.mutable_data()), shape_size, quant_info);
break;
default:
- LOGGER__ERROR("Output quantization isn't supported from src dormat type uint16 to dst format type = {}",
- HailoRTBindingsCommon::convert_format_type_to_string(dst_dtype));
+ std::cerr << "Output quantization isn't supported from src dormat type uint16 to dst format type = " << HailoRTBindingsCommon::convert_format_type_to_string(dst_dtype);
THROW_STATUS_ERROR(HAILO_INVALID_ARGUMENT);
break;
}
static_cast<float32_t*>(dst_buffer.mutable_data()), shape_size, quant_info);
break;
default:
- LOGGER__ERROR("Output quantization isn't supported from src format type float32 to dst format type = {}",
- HailoRTBindingsCommon::convert_format_type_to_string(dst_dtype));
+ std::cerr << "Output quantization isn't supported from src format type float32 to dst format type = " << HailoRTBindingsCommon::convert_format_type_to_string(dst_dtype);
THROW_STATUS_ERROR(HAILO_INVALID_ARGUMENT);
break;
}
static_cast<float32_t*>(dst_buffer.mutable_data()), shape_size, quant_info);
break;
default:
- LOGGER__ERROR("Output quantization isn't supported from src format type uint8 to dst format type = {}",
- HailoRTBindingsCommon::convert_format_type_to_string(dst_dtype));
+ std::cerr << "Output quantization isn't supported from src format type uint8 to dst format type = " << HailoRTBindingsCommon::convert_format_type_to_string(dst_dtype);
THROW_STATUS_ERROR(HAILO_INVALID_ARGUMENT);
break;
}
static_cast<float32_t*>(dst_buffer.mutable_data()), shape_size, quant_info);
break;
default:
- LOGGER__ERROR("Output quantization isn't supported from src dormat type uint16 to dst format type = {}",
- HailoRTBindingsCommon::convert_format_type_to_string(dst_dtype));
+ std::cerr << "Output quantization isn't supported from src dormat type uint16 to dst format type = " << HailoRTBindingsCommon::convert_format_type_to_string(dst_dtype);
THROW_STATUS_ERROR(HAILO_INVALID_ARGUMENT);
break;
}
static_cast<float32_t*>(dst_buffer.mutable_data()), shape_size, quant_info);
break;
default:
- LOGGER__ERROR("Output quantization isn't supported from src format type float32 to dst format type = {}",
- HailoRTBindingsCommon::convert_format_type_to_string(dst_dtype));
+ std::cerr << "Output quantization isn't supported from src format type float32 to dst format type = " << HailoRTBindingsCommon::convert_format_type_to_string(dst_dtype);
THROW_STATUS_ERROR(HAILO_INVALID_ARGUMENT);
break;
}
QuantizationBindings::dequantize_output_buffer_from_float32_in_place(dst_buffer, dst_dtype, shape_size, quant_info);
break;
default:
- LOGGER__ERROR("Unsupported src format type = {}", HailoRTBindingsCommon::convert_format_type_to_string(dst_dtype));
+ std::cerr << "Unsupported src format type = " << HailoRTBindingsCommon::convert_format_type_to_string(dst_dtype);
THROW_STATUS_ERROR(HAILO_INVALID_ARGUMENT);
break;
}
QuantizationBindings::dequantize_output_buffer_from_float32(src_buffer, dst_buffer, dst_dtype, shape_size, quant_info);
break;
default:
- LOGGER__ERROR("Unsupported src format type = {}", HailoRTBindingsCommon::convert_format_type_to_string(dst_dtype));
+ std::cerr << "Unsupported src format type = " << HailoRTBindingsCommon::convert_format_type_to_string(dst_dtype);
THROW_STATUS_ERROR(HAILO_INVALID_ARGUMENT);
break;
}
static_cast<uint8_t*>(dst_buffer.mutable_data()), shape_size, quant_info);
break;
default:
- LOGGER__ERROR("Input quantization isn't supported from src format type uint8 to dst format type = {}", HailoRTBindingsCommon::convert_format_type_to_string(dst_dtype));
+ std::cerr << "Input quantization isn't supported from src format type uint8 to dst format type = " << HailoRTBindingsCommon::convert_format_type_to_string(dst_dtype);
THROW_STATUS_ERROR(HAILO_INVALID_ARGUMENT);
break;
}
static_cast<uint16_t*>(dst_buffer.mutable_data()), shape_size, quant_info);
break;
default:
- LOGGER__ERROR("Input quantization isn't supported from src format type uint16 to dst format type = {}",
- HailoRTBindingsCommon::convert_format_type_to_string(dst_dtype));
+ std::cerr << "Input quantization isn't supported from src format type uint16 to dst format type = " << HailoRTBindingsCommon::convert_format_type_to_string(dst_dtype);
THROW_STATUS_ERROR(HAILO_INVALID_ARGUMENT);
break;
}
static_cast<uint16_t*>(dst_buffer.mutable_data()), shape_size, quant_info);
break;
default:
- LOGGER__ERROR("Input quantization isn't supported from src format type float32 to dst format type = {}",
- HailoRTBindingsCommon::convert_format_type_to_string(dst_dtype));
+ std::cerr << "Input quantization isn't supported from src format type float32 to dst format type = " <<
+ HailoRTBindingsCommon::convert_format_type_to_string(dst_dtype);
THROW_STATUS_ERROR(HAILO_INVALID_ARGUMENT);
break;
}
QuantizationBindings::quantize_input_buffer_from_float32(src_buffer, dst_buffer, dst_dtype, shape_size, quant_info);
break;
default:
- LOGGER__ERROR("Input quantization isn't supported for src format type = {}", HailoRTBindingsCommon::convert_format_type_to_string(dst_dtype));
+ std::cerr << "Input quantization isn't supported for src format type = " << HailoRTBindingsCommon::convert_format_type_to_string(dst_dtype);
THROW_STATUS_ERROR(HAILO_INVALID_ARGUMENT);
break;
}
[](hailo_stream_parameters_t& self) -> const __property_type& \
{ \
if (__interface_value != self.stream_interface) { \
- LOGGER__ERROR("Stream params interface is not {}.", #__interface_value); \
+ std::cerr << "Stream params interface is not " << __interface_value << "."; \
THROW_STATUS_ERROR(HAILO_INVALID_OPERATION); \
} \
if (__direction_value != self.direction) { \
- LOGGER__ERROR("Stream params direction is not {}.", #__direction_value); \
+ std::cerr << "Stream params direction is not " << __direction_value << "."; \
THROW_STATUS_ERROR(HAILO_INVALID_OPERATION); \
} \
return self.__property_name; \
[](hailo_stream_parameters_t& self, const __property_type& value) \
{ \
if (__interface_value != self.stream_interface) { \
- LOGGER__ERROR("Stream params interface is not {}.", #__interface_value); \
+ std::cerr << "Stream params interface is not " << __interface_value << "."; \
THROW_STATUS_ERROR(HAILO_INVALID_OPERATION); \
} \
if (__direction_value != self.direction) { \
- LOGGER__ERROR("Stream params direction is not {}.", #__direction_value); \
+ std::cerr << "Stream params direction is not " << __direction_value << "."; \
THROW_STATUS_ERROR(HAILO_INVALID_OPERATION); \
} \
self.__property_name = value; \
#include "hailo/vdevice.hpp"
#include "hailo/hailort_common.hpp"
-#include "common/logger_macros.hpp"
-
-#ifdef HAILO_SUPPORT_MULTI_PROCESS
-#include "service/rpc_client_utils.hpp"
-#endif // HAILO_SUPPORT_MULTI_PROCESS
-
#include "utils.hpp"
-
+#include <iostream>
#include <pybind11/pybind11.h>
#include <pybind11/numpy.h>
#include <pybind11/detail/common.h>
static VDeviceWrapper create(const VDeviceParamsWrapper ¶ms, const std::vector<std::string> &device_ids)
{
if (params.orig_params.device_ids != nullptr && (!device_ids.empty())) {
- LOGGER__ERROR("VDevice device_ids can be set in params or device_ids argument. Both parameters were passed to the c'tor");
+ std::cerr << "VDevice device_ids can be set in params or device_ids argument. Both parameters were passed to the c'tor";
throw HailoRTStatusException(std::to_string(HAILO_INVALID_OPERATION));
}
auto modified_params = params;
void before_fork()
{
-#ifdef HAILO_SUPPORT_MULTI_PROCESS
if (m_vdevice != nullptr) {
auto status = m_vdevice->before_fork();
VALIDATE_STATUS(status);
}
-#endif // HAILO_SUPPORT_MULTI_PROCESS
}
void after_fork_in_parent()
{
-#ifdef HAILO_SUPPORT_MULTI_PROCESS
if (m_vdevice != nullptr) {
auto status = m_vdevice->after_fork_in_parent();
VALIDATE_STATUS(status);
}
-#endif // HAILO_SUPPORT_MULTI_PROCESS
}
void after_fork_in_child()
{
-#ifdef HAILO_SUPPORT_MULTI_PROCESS
if (m_vdevice != nullptr) {
auto status = m_vdevice->after_fork_in_child();
VALIDATE_STATUS(status);
}
-#endif // HAILO_SUPPORT_MULTI_PROCESS
}
private:
* @brief Implementation of binding to virtual stream usage over Python.
**/
-#include "common/logger_macros.hpp"
-#include "common/utils.hpp"
-
#include "vstream_api.hpp"
#include "bindings_common.hpp"
#include "utils.hpp"
+#include <iostream>
namespace hailort
std::unordered_map<std::string, std::shared_ptr<InputVStream>> input_vstreams;
for (auto &input : input_vstreams_expected.value()) {
auto input_name = input.name();
- input_vstreams.emplace(input_name, make_shared_nothrow<InputVStream>(std::move(input)));
+ input_vstreams.emplace(input_name, std::make_unique<InputVStream>(std::move(input)));
}
return InputVStreamsWrapper(input_vstreams);
}
{
auto input = m_input_vstreams.find(name);
if (m_input_vstreams.end() == input) {
- LOGGER__ERROR("Input virtual stream for name={} not found", name);
+ std::cerr << "Input virtual stream for name=" << name << " not found";
THROW_STATUS_ERROR(HAILO_NOT_FOUND);
}
// Note: The ownership of the buffer is transferred to Python wrapped as a py::array.
// When the py::array isn't referenced anymore in Python and is destructed, the py::capsule's dtor
// is called too (and it deletes the raw buffer)
- const auto unmanaged_addr = buffer.release().release();
+ auto unmanaged_addr_exp = buffer->storage().release();
+ VALIDATE_EXPECTED(unmanaged_addr_exp);
+ const auto unmanaged_addr = unmanaged_addr_exp.release();
return py::array(get_dtype(self), get_shape(self), unmanaged_addr,
py::capsule(unmanaged_addr, [](void *p) { delete reinterpret_cast<uint8_t*>(p); }));
})
std::unordered_map<std::string, std::shared_ptr<OutputVStream>> output_vstreams;
for (auto &output : output_vstreams_expected.value()) {
auto output_name = output.name();
- output_vstreams.emplace(output_name, make_shared_nothrow<OutputVStream>(std::move(output)));
+ output_vstreams.emplace(output_name, std::make_unique<OutputVStream>(std::move(output)));
}
return OutputVStreamsWrapper(output_vstreams);
}
{
auto output = m_output_vstreams.find(name);
if (m_output_vstreams.end() == output) {
- LOGGER__ERROR("Output virtual stream for name={} not found", name);
+ std::cerr << "Output virtual stream for name=" << name << " not found";
THROW_STATUS_ERROR(HAILO_NOT_FOUND);
}
{
auto infer_pipeline = InferVStreams::create(network_group, input_vstreams_params, output_vstreams_params);
VALIDATE_EXPECTED(infer_pipeline);
- auto infer_vstream_ptr = make_shared_nothrow<InferVStreams>(std::move(infer_pipeline.value()));
+ auto infer_vstream_ptr = std::make_shared<InferVStreams>(std::move(infer_pipeline.value()));
return InferVStreamsWrapper(infer_vstream_ptr);
}
return HailoRTBindingsCommon::get_pybind_shape(output->get().get_info(), output->get().get_user_buffer_format());
}
- LOGGER__ERROR("Stream {} not found", stream_name);
+ std::cerr << "Stream " << stream_name << " not found";
THROW_STATUS_ERROR(HAILO_NOT_FOUND);
}
#ifndef _VSTREAM_API_HPP_
#define _VSTREAM_API_HPP_
-#include "common/logger_macros.hpp"
-#include "common/utils.hpp"
#include "hailo/vstream.hpp"
#include "hailo/inference_pipeline.hpp"
#include "utils.hpp"
python_versions:
- version: '3.8'
installation: manual
- package_name: https://launchpad.net/ubuntu/+source/python3.8/3.8.2-1ubuntu1/+build/18834117/+files/libpython3.8-dev_3.8.2-1ubuntu1_arm64.deb
+ package_name: https://launchpad.net/ubuntu/+source/python3.8/3.8.2-1ubuntu1/+build/18834117/+files/libpython3.8-dev_3.8.2-1ubuntu1_arm64.deb
package_dest: /usr/include/aarch64-linux-gnu
- version: '3.9'
installation: manual
- package_name: https://launchpad.net/~deadsnakes/+archive/ubuntu/ppa/+build/24906233/+files/libpython3.9-dev_3.9.16-1+bionic1_arm64.deb
+ package_name: https://launchpad.net/~deadsnakes/+archive/ubuntu/ppa/+build/26280901/+files/libpython3.9-dev_3.9.17-1+focal1_arm64.deb
package_dest: /usr/include/aarch64-linux-gnu
- version: '3.10'
installation: manual
- package_name: https://launchpadlibrarian.net/569418529/libpython3.10-dev_3.10.0-5_arm64.deb
+ package_name: https://launchpadlibrarian.net/569418529/libpython3.10-dev_3.10.0-5_arm64.deb
package_dest: /usr/include/aarch64-linux-gnu
- name: linux.armv7l
required_packages:
project(hailort-examples)
+if(WIN32)
+ add_compile_options(/W4)
+elseif(UNIX)
+ if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU" OR CMAKE_CXX_COMPILER_ID STREQUAL "QCC")
+ add_compile_options(-Wall -Wextra -Wconversion)
+ elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
+ add_compile_options(-Wall -Wextra -Wconversion -Wno-missing-braces)
+ endif()
+else()
+ message(FATAL_ERROR "Unexpeced host, stopping build")
+endif()
+
+if (HAILO_COMPILE_WARNING_AS_ERROR)
+ # Treat warnings as errors for all examples
+ if(WIN32)
+ add_compile_options(/WX)
+ elseif(UNIX)
+ add_compile_options(-Werror)
+ else()
+ message(FATAL_ERROR "Unexpeced host, stopping build")
+ endif()
+endif()
+
add_subdirectory(cpp)
add_subdirectory(c)
# We add a costum target in order to compile all of the hailort examples
add_custom_target(hailort_examples)
-add_dependencies(hailort_examples c_hailort_examples cpp_hailort_examples)
\ No newline at end of file
+
+add_dependencies(hailort_examples c_hailort_examples cpp_hailort_examples)
- this example uses udp device.
- `raw_streams_example` - Basic inference of a shortcut network using raw stream api.
- The data is transformed before sent and after received in the same thread sending/receiving using the transformation api.
+ - `raw_async_streams_single_thread_example` - Basic inference of a shortcut network using raw stream async api with
+ a single thread.
+ - Each async read operation will re-launch some new async read operation.
+ - Each async write operation will re-launch some new async write operation.
+ - The main thread will stop the async operations by deactivating the network group.
- `notification_callback_example` - Demonstrates how to work with notification callbacks.
- C++ examples:
- `infer_pipeline_example` - Basic inference of a shortcut network using inference pipeline (blocking) api.
- same as `infer_pipeline_example` C example, uses HailoRT C++ api.
- `raw_streams_example` - Basic inference of a shortcut network, same as `raw_streams_example` C example, uses HailoRT C++ api.
- - `multi_process_example` - Demonstrates how to work with HailoRT as a service and using the HailoRT Model Scheduler for network groups switching.
+ - `raw_async_streams_single_thread_example` - Basic inference of a shortcut network using raw stream async api with
+ a single thread.
+ - Each async read operation will re-launch some new async read operation.
+ - Each async write operation will re-launch some new async write operation.
+ - The main thread will stop the async operations by deactivating the network group.
+ - `raw_async_streams_multi_thread_example` - Basic inference of a shortcut network using raw stream async api with
+ a thread for each stream.
+ - The threads will continuously initiate an async read or write operations.
+ - The main thread will stop the async operations and the threads by deactivating the network group.
+ - `multi_process_example` - Demonstrates how to work with HailoRT multi-process service and using the HailoRT Model Scheduler for network groups switching.
Using the script `multi_process_example.sh` one can specify the number of processes to run each hef, see `multi_process_example.sh -h` for more information.
- `notification_callback_example` - Demonstrates how to work with notification callbacks, same as `notification_callback_example` C example.
+You can find more details about each example in the HailoRT user guide.
## Compiling with CMake
Examples are configured and compiled using the following commands:
```sh
## Running the examples
-Before running an example, download the HEFs using the [download script](../../scripts/download_hefs.sh):
+Before running an example, download the HEFs using the [download script](../../scripts/download_hefs.sh) from the scripts directory:
```sh
- ../../scripts/download_hefs.sh
+ cd ../../scripts
+ ./download_hefs.sh
```
To run an example, use (from this examples directory):
add_subdirectory(power_measurement_example)
add_subdirectory(notification_callback_example)
-add_custom_target(c_hailort_examples)
-add_dependencies(c_hailort_examples
+set(C_EXAMPLE_TARGETS
c_data_quantization_example
c_raw_streams_example
c_vstreams_example
c_switch_network_groups_manually_example
c_multi_device_example
c_power_measurement_example
- c_notification_callback_example)
\ No newline at end of file
+ c_notification_callback_example
+)
+
+if(NOT CMAKE_SYSTEM_NAME STREQUAL QNX)
+ # TODO: HRT-10956 support QNX async examples
+ add_subdirectory(raw_async_streams_single_thread_example)
+ set(C_EXAMPLE_TARGETS ${C_EXAMPLE_TARGETS} c_raw_async_streams_single_thread_example)
+endif()
+
+add_custom_target(c_hailort_examples)
+add_dependencies(c_hailort_examples ${C_EXAMPLE_TARGETS})
\ No newline at end of file
#define ARRAY_LENGTH(__array) (sizeof((__array)) / sizeof((__array)[0]))
-#define NSEC_IN_SEC (1e+9)
+
+#if defined(__unix__)
+#define hailo_sleep(seconds) sleep((seconds))
+#elif defined(_MSC_VER)
+#define hailo_sleep(seconds) Sleep((seconds) * 1000)
+#else /* defined(_MSC_VER) */
+#pragma error("sleep not supported")
+#endif
#endif /* _EXAMPLE_COMMON_H_ */
#include "hailo/hailort.h"
-#if defined(__unix__) || defined(__QNX__)
+#if defined(__unix__) || defined(__QNX__)
#include <pthread.h>
+#include <unistd.h>
+#include <stdatomic.h>
+
typedef pthread_t hailo_thread;
typedef void* thread_return_type;
+typedef atomic_int hailo_atomic_int;
+
+#define MICROSECONDS_PER_MILLISECOND (1000)
hailo_status hailo_create_thread(thread_return_type(*func_ptr)(void*), void* args, hailo_thread *thread_out)
{
return (hailo_status)results;
}
+void hailo_atomic_init(hailo_atomic_int *atomic, int value)
+{
+ atomic_init(atomic, value);
+}
+
+int hailo_atomic_load(hailo_atomic_int *atomic)
+{
+ return atomic_load(atomic);
+}
+
+int hailo_atomic_fetch_add(hailo_atomic_int *atomic, int value)
+{
+ return atomic_fetch_add(atomic, value);
+}
+
+void hailo_atomic_increment(hailo_atomic_int *atomic)
+{
+ atomic_fetch_add(atomic, 1);
+}
+
+void hailo_atomic_store(hailo_atomic_int *atomic, int value)
+{
+ atomic_store(atomic, value);
+}
+
#elif defined _MSC_VER // __unix__ || __QNX__
#include <windows.h>
typedef HANDLE hailo_thread;
typedef DWORD thread_return_type;
+typedef LONG hailo_atomic_int;
hailo_status hailo_create_thread(thread_return_type(func_ptr)(void*), void* args, hailo_thread *thread_out)
{
return (hailo_status)result;
}
+void hailo_atomic_init(hailo_atomic_int *atomic, int value)
+{
+ InterlockedExchange(atomic, (LONG)value);
+}
+
+int hailo_atomic_load(hailo_atomic_int *atomic)
+{
+ return InterlockedExchangeAdd(atomic, (LONG)0);
+}
+
+int hailo_atomic_fetch_add(hailo_atomic_int *atomic, int value)
+{
+ return InterlockedExchangeAdd(atomic, (LONG)value);
+}
+
+void hailo_atomic_increment(hailo_atomic_int *atomic)
+{
+ InterlockedIncrement(atomic);
+}
+
+void hailo_atomic_store(hailo_atomic_int *atomic, int value)
+{
+ InterlockedExchange(atomic, value);
+}
+
+
#endif
#endif /* _HAILO_THREAD_H_ */
find_package(Threads REQUIRED)
set(THREADS_PREFER_PTHREAD_FLAG ON)
-find_package(HailoRT 4.13.0 EXACT REQUIRED)
+find_package(HailoRT 4.14.0 EXACT REQUIRED)
SET_SOURCE_FILES_PROPERTIES(data_quantization_example.c PROPERTIES LANGUAGE C)
status = hailo_create_hef_file(&hef, HEF_FILE);
REQUIRE_SUCCESS(status, l_release_vdevice, "Failed reading hef file");
- status = hailo_init_configure_params(hef, HAILO_STREAM_INTERFACE_PCIE, &config_params);
+ status = hailo_init_configure_params_by_vdevice(hef, vdevice, &config_params);
REQUIRE_SUCCESS(status, l_release_hef, "Failed initializing configure parameters");
status = hailo_configure_vdevice(vdevice, hef, &config_params, &network_group, &network_group_size);
l_release_vdevice:
(void) hailo_release_vdevice(vdevice);
l_exit:
- return status;
+ return (int)status;
}
find_package(Threads REQUIRED)
set(THREADS_PREFER_PTHREAD_FLAG ON)
-find_package(HailoRT 4.13.0 EXACT REQUIRED)
+find_package(HailoRT 4.14.0 EXACT REQUIRED)
SET_SOURCE_FILES_PROPERTIES(infer_pipeline_example.c PROPERTIES LANGUAGE C)
status = hailo_create_hef_file(&hef, HEF_FILE);
REQUIRE_SUCCESS(status, l_release_device, "Failed reading hef file");
- status = hailo_init_configure_params(hef, HAILO_STREAM_INTERFACE_ETH, &config_params);
+ status = hailo_init_configure_params_by_device(hef, device, &config_params);
REQUIRE_SUCCESS(status, l_release_hef, "Failed initializing configure parameters");
status = hailo_configure_device(device, hef, &config_params, &network_group, &network_group_size);
l_release_device:
(void) hailo_release_device(device);
l_exit:
- return status;
+ return (int)status;
}
find_package(Threads REQUIRED)
set(THREADS_PREFER_PTHREAD_FLAG ON)
-find_package(HailoRT 4.13.0 EXACT REQUIRED)
+find_package(HailoRT 4.14.0 EXACT REQUIRED)
SET_SOURCE_FILES_PROPERTIES(multi_device_example.c PROPERTIES LANGUAGE C)
#define INFER_FRAME_COUNT (100)
#define MAX_EDGE_LAYERS (16)
#define MAX_DEVICES (16)
+#define BATCH_SIZE (1)
#define HEF_FILE ("hefs/shortcut_net.hef")
hailo_vdevice_params_t params = {0};
hailo_hef hef = NULL;
hailo_configure_params_t config_params = {0};
+ uint16_t batch_size = BATCH_SIZE;
hailo_configured_network_group network_group = NULL;
size_t network_group_size = 1;
hailo_input_vstream_params_by_name_t input_vstream_params[MAX_EDGE_LAYERS] = {0};
status = hailo_scan_devices(NULL, device_ids, &actual_count);
REQUIRE_SUCCESS(status, l_exit, "Failed to scan devices");
+ printf("Found %zu devices\n", actual_count);
status = hailo_init_vdevice_params(¶ms);
REQUIRE_SUCCESS(status, l_exit, "Failed init vdevice_params");
status = hailo_create_hef_file(&hef, HEF_FILE);
REQUIRE_SUCCESS(status, l_release_vdevice, "Failed reading hef file");
- status = hailo_init_configure_params(hef, HAILO_STREAM_INTERFACE_PCIE, &config_params);
+ status = hailo_init_configure_params_by_vdevice(hef, vdevice, &config_params);
REQUIRE_SUCCESS(status, l_release_hef, "Failed initializing configure parameters");
+ // Modify batch_size and power_mode for each network group
+ for (size_t i = 0; i < config_params.network_group_params_count; i++) {
+ config_params.network_group_params[i].batch_size = batch_size;
+ config_params.network_group_params[i].power_mode = HAILO_POWER_MODE_ULTRA_PERFORMANCE;
+ }
+
status = hailo_configure_vdevice(vdevice, hef, &config_params, &network_group, &network_group_size);
REQUIRE_SUCCESS(status, l_release_hef, "Failed configure vdevcie from hef");
REQUIRE_ACTION(network_group_size == 1, status = HAILO_INVALID_ARGUMENT, l_release_hef,
l_release_vdevice:
(void) hailo_release_vdevice(vdevice);
l_exit:
- return status;
+ return (int)status;
}
find_package(Threads REQUIRED)
set(THREADS_PREFER_PTHREAD_FLAG ON)
-find_package(HailoRT 4.13.0 EXACT REQUIRED)
+find_package(HailoRT 4.14.0 EXACT REQUIRED)
SET_SOURCE_FILES_PROPERTIES(multi_network_vstream_example.c PROPERTIES LANGUAGE C)
status = hailo_create_hef_file(&hef, HEF_FILE);
REQUIRE_SUCCESS(status, l_release_vdevice, "Failed reading hef file");
- status = hailo_init_configure_params(hef, HAILO_STREAM_INTERFACE_PCIE, &config_params);
+ status = hailo_init_configure_params_by_vdevice(hef, vdevice, &config_params);
REQUIRE_SUCCESS(status, l_release_hef, "Failed initializing configure parameters");
// Modify batch_size for each network
l_release_vdevice:
(void) hailo_release_vdevice(vdevice);
l_exit:
- return status;
+ return (int)status;
}
find_package(Threads REQUIRED)
set(THREADS_PREFER_PTHREAD_FLAG ON)
-find_package(HailoRT 4.13.0 EXACT REQUIRED)
+find_package(HailoRT 4.14.0 EXACT REQUIRED)
SET_SOURCE_FILES_PROPERTIES(notification_callback_example.c PROPERTIES LANGUAGE C)
l_release_device:
(void) hailo_release_device(device);
l_exit:
- return status;
+ return (int)status;
}
cmake_minimum_required(VERSION 3.0.0)
-find_package(HailoRT 4.13.0 EXACT REQUIRED)
+find_package(HailoRT 4.14.0 EXACT REQUIRED)
SET_SOURCE_FILES_PROPERTIES(power_measurement_example.c PROPERTIES LANGUAGE C)
l_release_vdevice:
(void) hailo_release_vdevice(vdevice);
l_exit:
- return status;
+ return (int)status;
}
\ No newline at end of file
--- /dev/null
+cmake_minimum_required(VERSION 3.0.0)
+
+find_package(Threads REQUIRED)
+set(THREADS_PREFER_PTHREAD_FLAG ON)
+
+find_package(HailoRT 4.14.0 EXACT REQUIRED)
+
+SET_SOURCE_FILES_PROPERTIES(raw_async_streams_single_thread_example.c PROPERTIES LANGUAGE C)
+
+add_executable(c_raw_async_streams_single_thread_example raw_async_streams_single_thread_example.c)
+target_link_libraries(c_raw_async_streams_single_thread_example PRIVATE HailoRT::libhailort Threads::Threads)
+target_include_directories(c_raw_async_streams_single_thread_example PRIVATE "${CMAKE_CURRENT_LIST_DIR}/../common")
+
+if(WIN32)
+ target_compile_options(c_raw_async_streams_single_thread_example PRIVATE
+ /DWIN32_LEAN_AND_MEAN
+ /DNOMINMAX # NOMINMAX is required in order to play nice with std::min/std::max (otherwise Windows.h defines it's own)
+ /wd4201 /wd4251
+ )
+endif()
\ No newline at end of file
--- /dev/null
+/**
+ * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+ **/
+/**
+ * @file raw_async_streams_single_thread_example.c
+ * This example demonstrates basic usage of HailoRT async streaming api with a single thread.
+ **/
+
+#include "common.h"
+#include "hailo/hailort.h"
+
+#include <string.h>
+
+#if defined(__unix__)
+#include <sys/mman.h>
+#endif
+
+
+#define HEF_FILE ("hefs/shortcut_net.hef")
+#define MAX_EDGE_LAYERS_PER_DIR (16)
+#define MAX_EDGE_LAYERS (MAX_EDGE_LAYERS_PER_DIR * 2)
+#define MAX_ONGOING_TRANSFERS (16)
+#define INFER_TIME_SECONDS (5)
+
+#if defined(__unix__)
+#define INVALID_ADDR (MAP_FAILED)
+#define page_aligned_alloc(size) mmap(NULL, (size), PROT_WRITE | PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0)
+#define page_aligned_free(addr, size) munmap((addr), (size))
+#elif defined(_MSC_VER)
+#define INVALID_ADDR (NULL)
+#define page_aligned_alloc(size) VirtualAlloc(NULL, (size), MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE)
+#define page_aligned_free(addr, size) VirtualFree((addr), 0, MEM_RELEASE)
+#else /* defined(_MSC_VER) */
+#pragma error("Aligned alloc not supported")
+#endif
+
+#ifndef MIN
+#define MIN(a, b) (((a) < (b)) ? (a) : (b))
+#endif
+
+
+static void output_done_callback(const hailo_stream_read_async_completion_info_t *completion_info)
+{
+ hailo_output_stream stream = (hailo_output_stream)completion_info->opaque;
+ hailo_status status = HAILO_UNINITIALIZED;
+
+ switch (completion_info->status) {
+ case HAILO_SUCCESS:
+ // Real applications can forward the buffer to post-process/display. Here we just re-launch new async reads.
+ status = hailo_stream_read_raw_buffer_async(stream, completion_info->buffer_addr, completion_info->buffer_size,
+ output_done_callback, stream);
+ if ((HAILO_SUCCESS != status) && (HAILO_STREAM_NOT_ACTIVATED != status)) {
+ fprintf(stderr, "Failed read async with status=%d\n", status);
+ }
+ break;
+ case HAILO_STREAM_ABORTED_BY_USER:
+ // Transfer was canceled, finish gracefully.
+ break;
+ default:
+ fprintf(stderr, "Got an unexpected status on callback. status=%d\n", completion_info->status);
+ }
+}
+
+static void input_done_callback(const hailo_stream_write_async_completion_info_t *completion_info)
+{
+ hailo_input_stream stream = (hailo_input_stream)completion_info->opaque;
+ hailo_status status = HAILO_UNINITIALIZED;
+
+ switch (completion_info->status) {
+ case HAILO_SUCCESS:
+ // Real applications may free the buffer and replace it with new buffer ready to be sent. Here we just re-launch
+ // new async writes.
+ status = hailo_stream_write_raw_buffer_async(stream, completion_info->buffer_addr, completion_info->buffer_size,
+ input_done_callback, stream);
+ if ((HAILO_SUCCESS != status) && (HAILO_STREAM_NOT_ACTIVATED != status)) {
+ fprintf(stderr, "Failed write async with status=%d\n", status);
+ }
+ break;
+ case HAILO_STREAM_ABORTED_BY_USER:
+ // Transfer was canceled, finish gracefully.
+ break;
+ default:
+ fprintf(stderr, "Got an unexpected status on callback. status=%d\n", completion_info->status);
+ }
+}
+
+static hailo_status infer(hailo_configured_network_group network_group, size_t number_input_streams,
+ hailo_input_stream *input_streams, size_t number_output_streams, hailo_output_stream *output_streams,
+ size_t ongoing_transfers)
+{
+ hailo_status status = HAILO_UNINITIALIZED;
+ hailo_activated_network_group activated_network_group = NULL;
+ size_t i = 0;
+ size_t frame_index = 0;
+ size_t frame_size = 0;
+ size_t stream_index = 0;
+ void *current_buffer = NULL;
+ void *buffers[MAX_EDGE_LAYERS * MAX_ONGOING_TRANSFERS] = {0};
+ size_t allocated_buffers = 0;
+
+ status = hailo_activate_network_group(network_group, NULL, &activated_network_group);
+ REQUIRE_SUCCESS(status, l_exit, "Failed activate network group status=%d", status);
+
+ // We launch "ongoing_transfers" async operations for both input and output streams. On each async callback, we launch
+ // some new operation with the same buffer.
+ for (stream_index = 0; stream_index < number_output_streams; stream_index++) {
+ frame_size = hailo_get_output_stream_frame_size(output_streams[stream_index]);
+
+ // ongoing_transfers is less than or equal to the stream's max async queue size, so we can start parallel reads.
+ for (frame_index = 0; frame_index < ongoing_transfers; frame_index++) {
+ // Buffers read from async operation must be page aligned.
+ current_buffer = page_aligned_alloc(frame_size);
+ REQUIRE_ACTION(INVALID_ADDR != current_buffer, status=HAILO_OUT_OF_HOST_MEMORY, l_deactivate, "allocation failed");
+ buffers[allocated_buffers++] = current_buffer;
+
+ status = hailo_stream_read_raw_buffer_async(output_streams[stream_index], current_buffer, frame_size,
+ output_done_callback, output_streams[stream_index]);
+ REQUIRE_SUCCESS(status, l_deactivate, "Failed read async with status=%d", status);
+ }
+ }
+
+ for (stream_index = 0; stream_index < number_input_streams; stream_index++) {
+ frame_size = hailo_get_input_stream_frame_size(input_streams[stream_index]);
+
+ // ongoing_transfers is less than or equal to the stream's max async queue size, so we can start parallel writes.
+ for (frame_index = 0; frame_index < ongoing_transfers; frame_index++) {
+ // Buffers written to async operation must be page aligned.
+ current_buffer = page_aligned_alloc(frame_size);
+ REQUIRE_ACTION(INVALID_ADDR != current_buffer, status=HAILO_OUT_OF_HOST_MEMORY, l_deactivate, "allocation failed");
+ buffers[allocated_buffers++] = current_buffer;
+
+ status = hailo_stream_write_raw_buffer_async(input_streams[stream_index], current_buffer, frame_size,
+ input_done_callback, input_streams[stream_index]);
+ REQUIRE_SUCCESS(status, l_deactivate, "Failed write async with status=%d", status);
+ }
+ }
+
+ // After all async operations are launched, the inference will continue until we deactivate the network.
+ hailo_sleep(INFER_TIME_SECONDS);
+
+ status = HAILO_SUCCESS;
+l_deactivate:
+ // Calling hailo_deactivate_network_group will make sure that all async operations are done. All pending async I/O
+ // operations will be canceled and their callbacks called with status=HAILO_STREAM_ABORTED_BY_USER.
+ (void) hailo_deactivate_network_group(activated_network_group);
+
+ // There are no async I/O operations ongoing so it is safe to free the buffers now.
+ for (i = 0; i < allocated_buffers; i++) page_aligned_free(buffers[i], frame_size);
+
+l_exit:
+ return status;
+}
+
+static hailo_status configure_device(hailo_device device, const char *hef_file,
+ hailo_configured_network_group *network_group)
+{
+ hailo_status status = HAILO_UNINITIALIZED;
+ hailo_hef hef = NULL;
+ hailo_configure_params_t configure_params = {0};
+ size_t i = 0;
+ size_t network_group_size = 1;
+
+ // Load HEF file.
+ status = hailo_create_hef_file(&hef, hef_file);
+ REQUIRE_SUCCESS(status, l_exit, "Failed reading hef file %s", hef_file);
+
+ // Create configure params
+ status = hailo_init_configure_params_by_device(hef, device, &configure_params);
+ REQUIRE_SUCCESS(status, l_exit, "Failed init configure params");
+ REQUIRE_ACTION(configure_params.network_group_params_count == 1, status=HAILO_INVALID_ARGUMENT, l_exit,
+ "Unexpected network group size");
+
+ // Set HAILO_STREAM_FLAGS_ASYNC for all streams in order to use async api.
+ for (i = 0; i < configure_params.network_group_params[0].stream_params_by_name_count; i++) {
+ configure_params.network_group_params[0].stream_params_by_name[i].stream_params.flags = HAILO_STREAM_FLAGS_ASYNC;
+ }
+
+ status = hailo_configure_device(device, hef, &configure_params, network_group, &network_group_size);
+ REQUIRE_SUCCESS(status, l_release_hef, "Failed configuring device");
+
+ status = HAILO_SUCCESS;
+l_release_hef:
+ (void) hailo_release_hef(hef);
+l_exit:
+ return status;
+}
+
+int main()
+{
+ hailo_status status = HAILO_UNINITIALIZED;
+ hailo_device device = NULL;
+ hailo_configured_network_group network_group = NULL;
+ hailo_stream_info_t input_streams_info[MAX_EDGE_LAYERS_PER_DIR] = {0};
+ hailo_stream_info_t output_streams_info[MAX_EDGE_LAYERS_PER_DIR] = {0};
+ hailo_input_stream input_streams[MAX_EDGE_LAYERS_PER_DIR] = {NULL};
+ hailo_output_stream output_streams[MAX_EDGE_LAYERS_PER_DIR] = {NULL};
+ size_t number_input_streams = 0;
+ size_t number_output_streams = 0;
+ size_t index = 0;
+ size_t queue_size = 0;
+ size_t ongoing_transfers = MAX_ONGOING_TRANSFERS;
+
+ // Create device object.
+ status = hailo_create_device_by_id(NULL, &device);
+ REQUIRE_SUCCESS(status, l_exit, "Failed to create device");
+
+ // Configure device with HEF.
+ status = configure_device(device, HEF_FILE, &network_group);
+ REQUIRE_SUCCESS(status, l_release_device, "Failed configure_device");
+
+ // Get input/output stream objects.
+ status = hailo_network_group_get_input_stream_infos(network_group, input_streams_info, MAX_EDGE_LAYERS_PER_DIR,
+ &number_input_streams);
+ REQUIRE_SUCCESS(status, l_release_device, "Failed getting input streams infos");
+
+ status = hailo_network_group_get_output_stream_infos(network_group, output_streams_info, MAX_EDGE_LAYERS_PER_DIR,
+ &number_output_streams);
+ REQUIRE_SUCCESS(status, l_release_device, "Failed getting output streams infos");
+
+ for (index = 0; index < number_input_streams; index++) {
+ status = hailo_get_input_stream(network_group, input_streams_info[index].name, &input_streams[index]);
+ REQUIRE_SUCCESS(status, l_release_device, "Failed getting input stream %s", input_streams_info[index].name);
+
+ status = hailo_input_stream_get_async_max_queue_size(input_streams[index], &queue_size);
+ REQUIRE_SUCCESS(status, l_release_device, "Failed getting queue size");
+
+ ongoing_transfers = MIN(queue_size, ongoing_transfers);
+ }
+
+ for (index = 0; index < number_output_streams; index++) {
+ status = hailo_get_output_stream(network_group, output_streams_info[index].name, &output_streams[index]);
+ REQUIRE_SUCCESS(status, l_release_device, "Failed getting output stream %s", output_streams_info[index].name);
+
+ status = hailo_output_stream_get_async_max_queue_size(output_streams[index], &queue_size);
+ REQUIRE_SUCCESS(status, l_release_device, "Failed getting queue size");
+
+ ongoing_transfers = MIN(queue_size, ongoing_transfers);
+ }
+
+ // Run infer.
+ status = infer(network_group, number_input_streams, input_streams, number_output_streams, output_streams,
+ ongoing_transfers);
+ REQUIRE_SUCCESS(status, l_release_device, "Failed performing inference");
+
+ status = HAILO_SUCCESS;
+ printf("Inference ran successfully\n");
+
+l_release_device:
+ (void) hailo_release_device(device);
+l_exit:
+ return (int)status;
+}
find_package(Threads REQUIRED)
set(THREADS_PREFER_PTHREAD_FLAG ON)
-find_package(HailoRT 4.13.0 EXACT REQUIRED)
+find_package(HailoRT 4.14.0 EXACT REQUIRED)
SET_SOURCE_FILES_PROPERTIES(raw_streams_example.c PROPERTIES LANGUAGE C)
status = hailo_create_hef_file(&hef, HEF_FILE);
REQUIRE_SUCCESS(status, l_release_device, "Failed creating hef file %s", HEF_FILE);
- status = hailo_init_configure_params(hef, HAILO_STREAM_INTERFACE_PCIE, &configure_params);
+ status = hailo_init_configure_params_by_device(hef, device, &configure_params);
REQUIRE_SUCCESS(status, l_release_hef, "Failed init configure params");
status = hailo_configure_device(device, hef, &configure_params, &network_group, &network_group_size);
l_release_device:
(void) hailo_release_device(device);
l_exit:
- return status;
+ return (int)status;
}
find_package(Threads REQUIRED)
set(THREADS_PREFER_PTHREAD_FLAG ON)
-find_package(HailoRT 4.13.0 EXACT REQUIRED)
+find_package(HailoRT 4.14.0 EXACT REQUIRED)
SET_SOURCE_FILES_PROPERTIES(switch_network_groups_example.c PROPERTIES LANGUAGE C)
#define INFER_FRAME_COUNT (100)
#define HEF_COUNT (2)
#define DEVICE_COUNT (1)
+#define BATCH_SIZE_1 (1)
+#define BATCH_SIZE_2 (2)
#define SCHEDULER_TIMEOUT_MS (100)
#define SCHEDULER_THRESHOLD (3)
for (size_t frame_index = 0; frame_index < input_frame_sizes[i]; frame_index++) {
src_data[i][frame_index] = (uint8_t)(rand() % 256);
}
- }
+ }
for (size_t i = 0; i < output_vstream_size; i++) {
status = hailo_get_output_vstream_frame_size(output_vstreams[i], &output_frame_sizes[i]);
read_thread_args_t read_args[HEF_COUNT][MAX_EDGE_LAYERS];
char HEF_FILES[HEF_COUNT][MAX_HEF_PATH_LEN] = {"hefs/multi_network_shortcut_net.hef", "hefs/shortcut_net.hef"};
+ uint16_t batch_sizes[HEF_COUNT] = {BATCH_SIZE_1, BATCH_SIZE_2};
status = hailo_init_vdevice_params(¶ms);
REQUIRE_SUCCESS(status, l_exit, "Failed init vdevice_params");
status = hailo_create_hef_file(&hef[hef_index], HEF_FILES[hef_index]);
REQUIRE_SUCCESS(status, l_release_hef, "Failed creating hef file %s", HEF_FILES[hef_index]);
- status = hailo_init_configure_params(hef[hef_index], HAILO_STREAM_INTERFACE_PCIE, &configure_params);
+ status = hailo_init_configure_params_by_vdevice(hef[hef_index], vdevice, &configure_params);
REQUIRE_SUCCESS(status, l_release_hef, "Failed init configure params");
+ // Modify batch_size for each network group
+ for (size_t i = 0; i < configure_params.network_group_params_count; i++) {
+ configure_params.network_group_params[i].batch_size = batch_sizes[hef_index];
+ configure_params.network_group_params[i].power_mode = HAILO_POWER_MODE_ULTRA_PERFORMANCE;
+ }
+
status = hailo_configure_vdevice(vdevice, hef[hef_index], &configure_params, &network_groups[hef_index], &network_groups_size);
REQUIRE_SUCCESS(status, l_release_hef, "Failed configuring vdevcie");
REQUIRE_ACTION(network_groups_size == 1, status = HAILO_INVALID_ARGUMENT, l_release_hef,
"Unexpected network group size");
- // Set scheduler's timeout and threshold for the first network group, in order to give priority to the second network group
if (0 == hef_index) {
+ // Set scheduler's timeout and threshold for the first network group, it will give priority to the second network group
status = hailo_set_scheduler_timeout(network_groups[hef_index], SCHEDULER_TIMEOUT_MS, NULL);
REQUIRE_SUCCESS(status, l_release_hef, "Failed setting scheduler timeout");
status = hailo_set_scheduler_threshold(network_groups[hef_index], SCHEDULER_THRESHOLD, NULL);
REQUIRE_SUCCESS(status, l_release_hef, "Failed setting scheduler threshold");
+
+ // Setting higher priority to the first network-group directly.
+ // The practical meaning is that the first network will be ready to run only if ``SCHEDULER_THRESHOLD`` send requests have been accumulated,
+ // or more than ``SCHEDULER_TIMEOUT_MS`` time has passed and at least one send request has been accumulated.
+ // However when both the first and the second networks are ready to run, the first network will be preferred over the second network.
+ status = hailo_set_scheduler_priority(network_groups[hef_index], HAILO_SCHEDULER_PRIORITY_NORMAL+1, NULL);
+ REQUIRE_SUCCESS(status, l_release_hef, "Failed setting scheduler priority");
}
status = build_vstreams(network_groups[hef_index],
l_release_hef:
for (hef_index = 0; hef_index < HEF_COUNT; hef_index++) {
if (NULL != hef[hef_index]) {
- (void)hailo_release_hef(hef[hef_index]);
+ (void)hailo_release_hef(hef[hef_index]);
}
}
(void)hailo_release_vdevice(vdevice);
l_exit:
- return status;
+ return (int)status;
}
\ No newline at end of file
find_package(Threads REQUIRED)
set(THREADS_PREFER_PTHREAD_FLAG ON)
-find_package(HailoRT 4.13.0 EXACT REQUIRED)
+find_package(HailoRT 4.14.0 EXACT REQUIRED)
SET_SOURCE_FILES_PROPERTIES(switch_network_groups_manually_example.c PROPERTIES LANGUAGE C)
/**
* @file switch_network_groups_manually_example.c
* This example demonstrates basic usage of HailoRT streaming api over multiple network groups, using vstreams.
- * It loads several HEF networks with a single input and a single output into a Hailo VDevice and performs a inference on each one.
+ * It loads several HEF networks with a single input and a single output into a Hailo VDevice and performs a inference on each one.
* After inference is finished, the example switches to the next HEF and start inference again.
**/
status = hailo_vstream_read_raw_buffer(output_vstreams[hef_index],
dst_data[hef_index], output_frame_size[hef_index]);
REQUIRE_SUCCESS(status, l_deactivate_network_group, "Failed reading output frame from device");
-
+
// Process data here
}
-
+
// Deavticate network after finishing inference
status = hailo_deactivate_network_group(*(output_vstream_args->activated_network_group));
REQUIRE_SUCCESS(status, l_deactivate_network_group, "Failed Deactivating network");
-
+
// Dont activate on last iteration
if (hef_index < HEF_COUNT - 1) {
// Activate next network so input thread can start sending again
status = hailo_create_hef_file(&hef[hef_index], HEF_FILES[hef_index]);
REQUIRE_SUCCESS(status, l_release_hef, "Failed creating hef file %s", HEF_FILES[hef_index]);
- status = hailo_init_configure_params(hef[hef_index], HAILO_STREAM_INTERFACE_PCIE, &configure_params);
+ status = hailo_init_configure_params_by_vdevice(hef[hef_index], vdevice, &configure_params);
REQUIRE_SUCCESS(status, l_release_hef, "Failed init configure params");
status = hailo_configure_vdevice(vdevice, hef[hef_index], &configure_params, &network_groups[hef_index], &network_groups_size);
REQUIRE_SUCCESS(status, l_release_hef, "Failed configuring vdevcie");
- REQUIRE_ACTION(network_groups_size == 1, status = HAILO_INVALID_ARGUMENT, l_release_hef,
+ REQUIRE_ACTION(network_groups_size == 1, status = HAILO_INVALID_ARGUMENT, l_release_hef,
"Unexpected network group size");
// Mae sure each hef is single input single output
status = hailo_make_input_vstream_params(network_groups[hef_index], true, HAILO_FORMAT_TYPE_AUTO,
&input_vstream_params[hef_index], &input_vstream_size);
REQUIRE_SUCCESS(status, l_release_hef, "Failed making input virtual stream params");
- REQUIRE_ACTION(input_vstream_size == 1, status = HAILO_INVALID_ARGUMENT, l_release_hef,
+ REQUIRE_ACTION(input_vstream_size == 1, status = HAILO_INVALID_ARGUMENT, l_release_hef,
"INVALID HEF - Only hefs with single input vstream are allowed");
status = hailo_make_output_vstream_params(network_groups[hef_index], true, HAILO_FORMAT_TYPE_AUTO,
&output_vstream_params[hef_index], &output_vstream_size);
REQUIRE_SUCCESS(status, l_release_hef, "Failed making output virtual stream params");
- REQUIRE_ACTION(output_vstream_size == 1, status = HAILO_INVALID_ARGUMENT, l_release_hef,
+ REQUIRE_ACTION(output_vstream_size == 1, status = HAILO_INVALID_ARGUMENT, l_release_hef,
"INVALID HEF - Only hefs with single output vstream are allowed");
}
l_release_hef:
for (hef_index = 0; hef_index < HEF_COUNT; hef_index++) {
if (NULL != hef[hef_index]) {
- (void)hailo_release_hef(hef[hef_index]);
+ (void)hailo_release_hef(hef[hef_index]);
}
}
(void)hailo_release_vdevice(vdevice);
l_exit:
- return status;
+ return (int)status;
}
find_package(Threads REQUIRED)
set(THREADS_PREFER_PTHREAD_FLAG ON)
-find_package(HailoRT 4.13.0 EXACT REQUIRED)
+find_package(HailoRT 4.14.0 EXACT REQUIRED)
SET_SOURCE_FILES_PROPERTIES(vstreams_example.c PROPERTIES LANGUAGE C)
size_t output_vstreams_size = MAX_EDGE_LAYERS;
hailo_input_vstream input_vstreams[MAX_EDGE_LAYERS] = {NULL};
hailo_output_vstream output_vstreams[MAX_EDGE_LAYERS] = {NULL};
+ bool quantized = true;
status = hailo_create_vdevice(NULL, &vdevice);
REQUIRE_SUCCESS(status, l_exit, "Failed to create vdevice");
status = hailo_create_hef_file(&hef, HEF_FILE);
REQUIRE_SUCCESS(status, l_release_vdevice, "Failed reading hef file");
- status = hailo_init_configure_params(hef, HAILO_STREAM_INTERFACE_PCIE, &config_params);
+ status = hailo_init_configure_params_by_vdevice(hef, vdevice, &config_params);
REQUIRE_SUCCESS(status, l_release_hef, "Failed initializing configure parameters");
status = hailo_configure_vdevice(vdevice, hef, &config_params, &network_group, &network_group_size);
- REQUIRE_SUCCESS(status, l_release_hef, "Failed configure vdevcie from hef");
+ REQUIRE_SUCCESS(status, l_release_hef, "Failed configure vdevice from hef");
REQUIRE_ACTION(network_group_size == 1, status = HAILO_INVALID_ARGUMENT, l_release_hef,
"Invalid network group size");
- status = hailo_make_input_vstream_params(network_group, true, HAILO_FORMAT_TYPE_AUTO,
+
+ // Set input format type to auto, and mark the data as quantized - libhailort will not scale the data before writing to the HW
+ quantized = true;
+ status = hailo_make_input_vstream_params(network_group, quantized, HAILO_FORMAT_TYPE_AUTO,
input_vstream_params, &input_vstreams_size);
REQUIRE_SUCCESS(status, l_release_hef, "Failed making input virtual stream params");
- status = hailo_make_output_vstream_params(network_group, true, HAILO_FORMAT_TYPE_AUTO,
+ /* The input format order in the example HEF is NHWC in the user-side (may be seen using 'hailortcli parse-hef <HEF_PATH>).
+ Here we override the user-side format order to be NCHW */
+ for (size_t i = 0 ; i < input_vstreams_size; i++) {
+ input_vstream_params[i].params.user_buffer_format.order = HAILO_FORMAT_ORDER_NCHW;
+ }
+
+ // Set output format type to float32, and mark the data as not quantized - libhailort will de-quantize the data after reading from the HW
+ // Note: this process might affect the overall performance
+ quantized = false;
+ status = hailo_make_output_vstream_params(network_group, quantized, HAILO_FORMAT_TYPE_FLOAT32,
output_vstream_params, &output_vstreams_size);
REQUIRE_SUCCESS(status, l_release_hef, "Failed making output virtual stream params");
l_release_vdevice:
(void) hailo_release_vdevice(vdevice);
l_exit:
- return status;
+ return (int)status;
}
add_subdirectory(vstreams_example)
add_subdirectory(infer_pipeline_example)
add_subdirectory(raw_streams_example)
-add_subdirectory(raw_async_streams_example)
add_subdirectory(multi_network_vstream_example)
add_subdirectory(switch_network_groups_example)
add_subdirectory(switch_network_groups_manually_example)
add_subdirectory(multi_process_example)
add_subdirectory(notification_callback_example)
-add_custom_target(cpp_hailort_examples)
-add_dependencies(cpp_hailort_examples
+
+set(CPP_EXAMPLE_TARGETS
cpp_vstreams_example
cpp_infer_pipeline_example
cpp_raw_streams_example
- cpp_raw_async_streams_example
cpp_multi_network_vstream_example
cpp_switch_network_groups_example
cpp_switch_network_groups_manually_example
cpp_multi_device_example
cpp_power_measurement_example
cpp_multi_process_example
- cpp_notification_callback_example)
\ No newline at end of file
+ cpp_notification_callback_example
+)
+
+if(NOT CMAKE_SYSTEM_NAME STREQUAL QNX)
+ # TODO: HRT-10956 support QNX async examples
+ add_subdirectory(raw_async_streams_multi_thread_example)
+ add_subdirectory(raw_async_streams_single_thread_example)
+ set(CPP_EXAMPLE_TARGETS ${C_EXAMPLE_TARGETS}
+ cpp_raw_async_streams_multi_thread_example
+ cpp_raw_async_streams_single_thread_example)
+endif()
+
+add_custom_target(cpp_hailort_examples)
+add_dependencies(cpp_hailort_examples ${CPP_EXAMPLE_TARGETS})
\ No newline at end of file
cmake_minimum_required(VERSION 3.0.0)
-find_package(HailoRT 4.13.0 EXACT REQUIRED)
+find_package(HailoRT 4.14.0 EXACT REQUIRED)
add_executable(cpp_infer_pipeline_example infer_pipeline_example.cpp)
target_link_libraries(cpp_infer_pipeline_example PRIVATE HailoRT::libhailort)
find_package(Threads REQUIRED)
set(THREADS_PREFER_PTHREAD_FLAG ON)
-find_package(HailoRT 4.13.0 EXACT REQUIRED)
+find_package(HailoRT 4.14.0 EXACT REQUIRED)
add_executable(cpp_multi_device_example multi_device_example.cpp)
target_link_libraries(cpp_multi_device_example PRIVATE HailoRT::libhailort Threads::Threads)
#define HEF_FILE ("hefs/shortcut_net.hef")
+constexpr size_t BATCH_SIZE = 1;
constexpr size_t FRAMES_COUNT = 100;
constexpr bool QUANTIZED = true;
constexpr hailo_format_type_t FORMAT_TYPE = HAILO_FORMAT_TYPE_AUTO;
using namespace hailort;
-Expected<std::shared_ptr<ConfiguredNetworkGroup>> configure_network_group(VDevice &vdevice)
+Expected<std::shared_ptr<ConfiguredNetworkGroup>> configure_network_group(VDevice &vdevice, Hef &hef, uint16_t batch_size)
{
- auto hef = Hef::create(HEF_FILE);
- if (!hef) {
- return make_unexpected(hef.status());
- }
-
- auto configure_params = vdevice.create_configure_params(hef.value());
+ auto configure_params = vdevice.create_configure_params(hef);
if (!configure_params) {
+ std::cerr << "Failed to create configure params" << std::endl;
return make_unexpected(configure_params.status());
}
- auto network_groups = vdevice.configure(hef.value(), configure_params.value());
+ // Modify batch_size and power_mode for each network group
+ for (auto& network_group_params : configure_params.value()) {
+ network_group_params.second.batch_size = batch_size;
+ network_group_params.second.power_mode = HAILO_POWER_MODE_ULTRA_PERFORMANCE;
+ }
+
+ auto network_groups = vdevice.configure(hef, configure_params.value());
if (!network_groups) {
+ std::cerr << "Failed to configure vdevice" << std::endl;
return make_unexpected(network_groups.status());
}
hailo_status infer(std::vector<InputVStream> &input_streams, std::vector<OutputVStream> &output_streams)
{
-
hailo_status status = HAILO_SUCCESS; // Success oriented
hailo_status input_status[MAX_LAYER_EDGES] = {HAILO_UNINITIALIZED};
hailo_status output_status[MAX_LAYER_EDGES] = {HAILO_UNINITIALIZED};
int main()
{
+ uint16_t batch_size = BATCH_SIZE;
+
auto scan_res = hailort::Device::scan();
if (!scan_res) {
std::cerr << "Failed to scan, status = " << scan_res.status() << std::endl;
return scan_res.status();
}
+ std::cout << "Found " << scan_res.value().size() << " devices" << std::endl;
hailo_vdevice_params_t params;
auto status = hailo_init_vdevice_params(¶ms);
return vdevice.status();
}
- auto network_group = configure_network_group(*vdevice.value());
+ auto hef = Hef::create(HEF_FILE);
+ if (!hef) {
+ std::cerr << "Failed to create hef: " << HEF_FILE << ", status = " << hef.status() << std::endl;
+ return hef.status();
+ }
+
+ auto network_group = configure_network_group(*vdevice.value(), hef.value(), batch_size);
if (!network_group) {
std::cerr << "Failed to configure network group " << HEF_FILE << std::endl;
return network_group.status();
find_package(Threads REQUIRED)
set(THREADS_PREFER_PTHREAD_FLAG ON)
-find_package(HailoRT 4.13.0 EXACT REQUIRED)
+find_package(HailoRT 4.14.0 EXACT REQUIRED)
add_executable(cpp_multi_network_vstream_example multi_network_vstream_example.cpp)
target_link_libraries(cpp_multi_network_vstream_example PRIVATE HailoRT::libhailort Threads::Threads)
find_package(Threads REQUIRED)
set(THREADS_PREFER_PTHREAD_FLAG ON)
-find_package(HailoRT 4.13.0 EXACT REQUIRED)
+find_package(HailoRT 4.14.0 EXACT REQUIRED)
add_executable(cpp_multi_process_example multi_process_example.cpp)
target_link_libraries(cpp_multi_process_example PRIVATE HailoRT::libhailort Threads::Threads)
**/
/**
* @file multi_process_example.cpp
- * This example demonstrates the basic usage of HailoRT as a service.
+ * This example demonstrates the basic usage of HailoRT multi-process service.
* The program creates a virtual device with multi_process_service flag and uses the HailoRT API to run inference using VStreams.
* The network_groups switching is performed automatically by the HailoRT scheduler.
*
cmake_minimum_required(VERSION 3.0.0)
-find_package(HailoRT 4.13.0 EXACT REQUIRED)
+find_package(HailoRT 4.14.0 EXACT REQUIRED)
add_executable(cpp_notification_callback_example notification_callback_example.cpp)
target_link_libraries(cpp_notification_callback_example PRIVATE HailoRT::libhailort)
int main()
{
- auto device_ids = Device::scan();
- if (!device_ids) {
- std::cerr << "Failed to scan, status = " << device_ids.status() << std::endl;
- return device_ids.status();
- }
- if (device_ids->size() < 1){
- std::cerr << "Failed to find a connected hailo device." << std::endl;
- return HAILO_INVALID_OPERATION;
- }
- auto device = Device::create(device_ids->at(0));
+ auto device = Device::create();
if (!device) {
std::cerr << "Failed to create device " << device.status() << std::endl;
return device.status();
cmake_minimum_required(VERSION 3.0.0)
-find_package(HailoRT 4.13.0 EXACT REQUIRED)
+find_package(HailoRT 4.14.0 EXACT REQUIRED)
add_executable(cpp_power_measurement_example power_measurement_example.cpp)
target_link_libraries(cpp_power_measurement_example PRIVATE HailoRT::libhailort)
+++ /dev/null
-cmake_minimum_required(VERSION 3.0.0)
-
-find_package(Threads REQUIRED)
-set(THREADS_PREFER_PTHREAD_FLAG ON)
-
-find_package(HailoRT 4.13.0 EXACT REQUIRED)
-
-add_executable(cpp_raw_async_streams_example buffer_pool.cpp raw_async_streams_example.cpp)
-target_link_libraries(cpp_raw_async_streams_example PRIVATE HailoRT::libhailort Threads::Threads)
-
-if(WIN32)
- target_compile_options(cpp_raw_async_streams_example PRIVATE
- /DWIN32_LEAN_AND_MEAN
- /DNOMINMAX # NOMINMAX is required in order to play nice with std::min/std::max (otherwise Windows.h defines it's own)
- /wd4201 /wd4251
- )
-endif()
-
-set_target_properties(cpp_raw_async_streams_example PROPERTIES CXX_STANDARD 14)
\ No newline at end of file
+++ /dev/null
-/**\r
- * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved.\r
- * Distributed under the MIT license (https://opensource.org/licenses/MIT)\r
-**/\r
-/**\r
- * @file buffer_pool.cpp\r
- * @brief Implementation of vdma buffer pool\r
- **/\r
-\r
-#include "buffer_pool.hpp"\r
-#include "hailo/hailort.h"\r
-#include "hailo/expected.hpp"\r
-\r
-Expected<BufferPoolPtr> BufferPool::create(size_t num_buffers, size_t buffer_size,\r
- hailo_vdma_buffer_direction_flags_t data_direction_flags, Device &device)\r
-{\r
- std::queue<std::shared_ptr<DmaMappedBuffer>> queue;\r
- for (auto i = 0; i < num_buffers; i++) {\r
- auto mapped_buffer = DmaMappedBuffer::create(buffer_size, data_direction_flags, device);\r
- if (!mapped_buffer) {\r
- return make_unexpected(mapped_buffer.status());\r
- }\r
-\r
- auto mapped_buffer_ptr = std::make_shared<DmaMappedBuffer>(mapped_buffer.release());\r
- if (nullptr == mapped_buffer_ptr) {\r
- return make_unexpected(HAILO_OUT_OF_HOST_MEMORY);\r
- }\r
-\r
- queue.push(mapped_buffer_ptr);\r
- }\r
- \r
- auto result = std::make_shared<BufferPool>(num_buffers, std::move(queue));\r
- if (nullptr == result) {\r
- return make_unexpected(HAILO_OUT_OF_HOST_MEMORY);\r
- }\r
-\r
- return result;\r
-}\r
-\r
-BufferPool::BufferPool(size_t max_size, std::queue<std::shared_ptr<DmaMappedBuffer>> &&queue) :\r
- m_max_size(max_size),\r
- m_mutex(),\r
- m_cv(),\r
- m_queue(queue)\r
-{}\r
-\r
-BufferPool::~BufferPool()\r
-{\r
- m_cv.notify_all();\r
-}\r
-\r
-std::shared_ptr<DmaMappedBuffer> BufferPool::dequeue()\r
-{\r
- std::unique_lock<std::mutex> lock(m_mutex);\r
- m_cv.wait(lock, [this] { return m_queue.size() > 0; });\r
- auto buffer = m_queue.front();\r
- m_queue.pop();\r
-\r
- return buffer;\r
-}\r
-void BufferPool::enqueue(std::shared_ptr<DmaMappedBuffer> buffer)\r
-{\r
- {\r
- std::unique_lock<std::mutex> lock(m_mutex);\r
- m_cv.wait(lock, [this] { return m_max_size > m_queue.size(); });\r
- m_queue.push(buffer);\r
- }\r
-\r
- m_cv.notify_one();\r
-}\r
-\r
-void BufferPool::wait_for_pending_buffers()\r
-{\r
- std::unique_lock<std::mutex> lock(m_mutex);\r
- m_cv.wait(lock, [this] { return m_max_size == m_queue.size(); });\r
-}\r
+++ /dev/null
-/**\r
- * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved.\r
- * Distributed under the MIT license (https://opensource.org/licenses/MIT)\r
-**/\r
-/**\r
- * @file buffer_pool.hpp\r
- * @brief Pool of vdma mapped buffers, allowing FIFO queue access to buffers\r
- **/\r
-\r
-#ifndef _HAILO_BUFFER_POOL_HPP_\r
-#define _HAILO_BUFFER_POOL_HPP_\r
-\r
-#include "hailo/hailort.hpp"\r
-#include "hailo/expected.hpp"\r
-\r
-#include <memory>\r
-#include <mutex>\r
-#include <condition_variable>\r
-#include <queue>\r
-\r
-\r
-using namespace hailort;\r
-\r
-class BufferPool;\r
-using BufferPoolPtr = std::shared_ptr<BufferPool>;\r
-\r
-class BufferPool final\r
-{\r
-public:\r
- static Expected<BufferPoolPtr> create(size_t num_buffers, size_t buffer_size,\r
- hailo_vdma_buffer_direction_flags_t data_direction_flags, Device &device);\r
- BufferPool(size_t max_size, std::queue<std::shared_ptr<DmaMappedBuffer>> &&queue);\r
- BufferPool(BufferPool &&) = delete;\r
- BufferPool(const BufferPool &) = delete;\r
- BufferPool &operator=(BufferPool &&) = delete;\r
- BufferPool &operator=(const BufferPool &) = delete;\r
- ~BufferPool();\r
-\r
- std::shared_ptr<DmaMappedBuffer> dequeue();\r
- void enqueue(std::shared_ptr<DmaMappedBuffer> buffer);\r
- void wait_for_pending_buffers();\r
-\r
-private:\r
- const size_t m_max_size;\r
- std::mutex m_mutex;\r
- std::condition_variable m_cv;\r
- std::queue<std::shared_ptr<DmaMappedBuffer>> m_queue;\r
-};\r
-\r
-#endif /* _HAILO_BUFFER_POOL_HPP_ */\r
+++ /dev/null
-/**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
- * Distributed under the MIT license (https://opensource.org/licenses/MIT)
- **/
-/**
- * @file raw_async_streams_example
- * This example demonstrates using low level async streams over c++
- **/
-
-#include "hailo/hailort.hpp"
-#include "buffer_pool.hpp"
-
-#include <thread>
-#include <iostream>
-
-
-constexpr size_t FRAMES_COUNT = 10000;
-constexpr size_t BUFFER_POOL_SIZE = 10;
-constexpr auto TIMEOUT = std::chrono::milliseconds(1000);
-
-using namespace hailort;
-
-Expected<std::shared_ptr<ConfiguredNetworkGroup>> configure_network_group(Device &device, const std::string &hef_path)
-{
- auto hef = Hef::create(hef_path);
- if (!hef) {
- return make_unexpected(hef.status());
- }
-
- auto configure_params = device.create_configure_params(hef.value());
- if (!configure_params) {
- return make_unexpected(configure_params.status());
- }
-
- // change stream_params here
- for (auto &ng_name_params_pair : *configure_params) {
- for (auto &stream_params_name_pair : ng_name_params_pair.second.stream_params_by_name) {
- stream_params_name_pair.second.flags = HAILO_STREAM_FLAGS_ASYNC;
- }
- }
-
- auto network_groups = device.configure(hef.value(), configure_params.value());
- if (!network_groups) {
- return make_unexpected(network_groups.status());
- }
-
- if (1 != network_groups->size()) {
- std::cerr << "Invalid amount of network groups" << std::endl;
- return make_unexpected(HAILO_INTERNAL_FAILURE);
- }
-
- return std::move(network_groups->at(0));
-}
-
-void read_all(OutputStream &output, BufferPoolPtr buffer_pool, size_t frames_to_read, hailo_status &status)
-{
- for (size_t i = 0; i < frames_to_read; i++) {
- status = output.wait_for_ready(output.get_frame_size(), TIMEOUT);
- if (HAILO_SUCCESS != status) {
- return;
- }
- status = output.read_async(buffer_pool->dequeue(),
- [buffer_pool](std::shared_ptr<DmaMappedBuffer> buffer, const hailo_async_transfer_completion_info_t &, void *) {
- buffer_pool->enqueue(buffer);
- });
- if (HAILO_SUCCESS != status) {
- return;
- }
- }
-}
-
-void write_all(InputStream &input, BufferPoolPtr buffer_pool, size_t frames_to_write, hailo_status &status)
-{
- for (size_t i = 0; i < frames_to_write; i++) {
- status = input.wait_for_ready(input.get_frame_size(), TIMEOUT);
- if (HAILO_SUCCESS != status) {
- return;
- }
- status = input.write_async(buffer_pool->dequeue(),
- [buffer_pool](std::shared_ptr<DmaMappedBuffer> buffer, const hailo_async_transfer_completion_info_t &, void *) {
- buffer_pool->enqueue(buffer);
- });
- if (HAILO_SUCCESS != status) {
- return;
- }
- }
-}
-
-int main()
-{
- auto device = Device::create();
- if (!device) {
- std::cerr << "Failed create device " << device.status() << std::endl;
- return device.status();
- }
-
- static const auto HEF_FILE = "hefs/shortcut_net.hef";
- auto network_group = configure_network_group(*device.value(), HEF_FILE);
- if (!network_group) {
- std::cerr << "Failed to configure network group" << HEF_FILE << std::endl;
- return network_group.status();
- }
-
- auto activated_network_group = network_group.value()->activate();
- if (!activated_network_group) {
- std::cerr << "Failed to activate network group " << activated_network_group.status() << std::endl;
- return activated_network_group.status();
- }
-
- // Assume one input and output
- auto output = network_group->get()->get_output_streams()[0];
- auto input = network_group->get()->get_input_streams()[0];
-
- auto output_buffer_pool = BufferPool::create(BUFFER_POOL_SIZE, output.get().get_frame_size(), HAILO_VDMA_BUFFER_DIRECTION_FLAGS_D2H, *device.value());
- if (!output_buffer_pool) {
- std::cerr << "Failed to create output buffer pool" << std::endl;
- return output_buffer_pool.status();
- }
- hailo_status output_status = HAILO_UNINITIALIZED;
- auto output_thread = std::make_unique<std::thread>(read_all, output, output_buffer_pool.value(), FRAMES_COUNT, std::ref(output_status));
-
- auto input_buffer_pool = BufferPool::create(BUFFER_POOL_SIZE, input.get().get_frame_size(), HAILO_VDMA_BUFFER_DIRECTION_FLAGS_H2D, *device.value());
- if (!input_buffer_pool) {
- std::cerr << "Failed to create input buffer pool" << std::endl;
- return input_buffer_pool.status();
- }
- hailo_status input_status = HAILO_UNINITIALIZED;
- auto input_thread = std::make_unique<std::thread>(write_all, input, input_buffer_pool.value(), FRAMES_COUNT, std::ref(input_status));
-
- // Join threads
- input_thread->join();
- output_thread->join();
- if (HAILO_SUCCESS != input_status) {
- return input_status;
- }
- if (HAILO_SUCCESS != output_status) {
- return output_status;
- }
-
- // The read/write threads have completed but the transfers issued by them haven't necessarily completed.
- // We'll wait for the output buffer queue to fill back up, since the callback we registered enqueues buffers
- // back to the pool + we issued the same number of reads as writes
- output_buffer_pool.value()->wait_for_pending_buffers();
-
- return HAILO_SUCCESS;
-}
--- /dev/null
+cmake_minimum_required(VERSION 3.0.0)
+
+find_package(Threads REQUIRED)
+set(THREADS_PREFER_PTHREAD_FLAG ON)
+
+find_package(HailoRT 4.14.0 EXACT REQUIRED)
+
+add_executable(cpp_raw_async_streams_multi_thread_example raw_async_streams_multi_thread_example.cpp)
+target_link_libraries(cpp_raw_async_streams_multi_thread_example PRIVATE HailoRT::libhailort Threads::Threads)
+
+if(WIN32)
+ target_compile_options(cpp_raw_async_streams_multi_thread_example PRIVATE
+ /DWIN32_LEAN_AND_MEAN
+ /DNOMINMAX # NOMINMAX is required in order to play nice with std::min/std::max (otherwise Windows.h defines it's own)
+ /wd4201 /wd4251
+ )
+endif()
+
+set_target_properties(cpp_raw_async_streams_multi_thread_example PROPERTIES CXX_STANDARD 14)
\ No newline at end of file
--- /dev/null
+/**
+ * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+ **/
+/**
+ * @file raw_async_streams_multi_thread_example
+ * This example demonstrates using low level async streams over c++
+ **/
+
+#include "hailo/hailort.hpp"
+
+#include <thread>
+#include <iostream>
+
+#if defined(__unix__)
+#include <sys/mman.h>
+#endif
+
+constexpr auto TIMEOUT = std::chrono::milliseconds(1000);
+
+using namespace hailort;
+
+using AlignedBuffer = std::shared_ptr<uint8_t>;
+static AlignedBuffer page_aligned_alloc(size_t size)
+{
+#if defined(__unix__)
+ auto addr = mmap(NULL, size, PROT_WRITE | PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+ if (MAP_FAILED == addr) throw std::bad_alloc();
+ return AlignedBuffer(reinterpret_cast<uint8_t*>(addr), [size](void *addr) { munmap(addr, size); });
+#elif defined(_MSC_VER)
+ auto addr = VirtualAlloc(NULL, size, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
+ if (!addr) throw std::bad_alloc();
+ return AlignedBuffer(reinterpret_cast<uint8_t*>(addr), [](void *addr){ VirtualFree(addr, 0, MEM_RELEASE); });
+#else
+#pragma error("Aligned alloc not supported")
+#endif
+}
+
+Expected<std::shared_ptr<ConfiguredNetworkGroup>> configure_network_group(Device &device, const std::string &hef_path)
+{
+ auto hef = Hef::create(hef_path);
+ if (!hef) {
+ return make_unexpected(hef.status());
+ }
+
+ auto configure_params = device.create_configure_params(hef.value());
+ if (!configure_params) {
+ return make_unexpected(configure_params.status());
+ }
+
+ // change stream_params here
+ for (auto &ng_name_params_pair : *configure_params) {
+ for (auto &stream_params_name_pair : ng_name_params_pair.second.stream_params_by_name) {
+ stream_params_name_pair.second.flags = HAILO_STREAM_FLAGS_ASYNC;
+ }
+ }
+
+ auto network_groups = device.configure(hef.value(), configure_params.value());
+ if (!network_groups) {
+ return make_unexpected(network_groups.status());
+ }
+
+ if (1 != network_groups->size()) {
+ std::cerr << "Invalid amount of network groups" << std::endl;
+ return make_unexpected(HAILO_INTERNAL_FAILURE);
+ }
+
+ return std::move(network_groups->at(0));
+}
+
+static void output_async_callback(const OutputStream::CompletionInfo &completion_info)
+{
+ // Real applications can free the buffer or forward it to post-process/display.
+ if ((HAILO_SUCCESS != completion_info.status) && (HAILO_STREAM_ABORTED_BY_USER != completion_info.status)) {
+ // We will get HAILO_STREAM_ABORTED_BY_USER when activated_network_group is destructed.
+ std::cerr << "Got an unexpected status on callback. status=" << completion_info.status << std::endl;
+ }
+}
+
+static void input_async_callback(const InputStream::CompletionInfo &completion_info)
+{
+ // Real applications can free the buffer or reuse it for next transfer.
+ if ((HAILO_SUCCESS != completion_info.status) && (HAILO_STREAM_ABORTED_BY_USER != completion_info.status)) {
+ // We will get HAILO_STREAM_ABORTED_BY_USER when activated_network_group is destructed.
+ std::cerr << "Got an unexpected status on callback. status=" << completion_info.status << std::endl;
+ }
+}
+
+int main()
+{
+ auto device = Device::create();
+ if (!device) {
+ std::cerr << "Failed create device " << device.status() << std::endl;
+ return EXIT_FAILURE;
+ }
+
+ static const auto HEF_FILE = "hefs/shortcut_net.hef";
+ auto network_group = configure_network_group(*device.value(), HEF_FILE);
+ if (!network_group) {
+ std::cerr << "Failed to configure network group " << HEF_FILE << std::endl;
+ return EXIT_FAILURE;
+ }
+
+ // Assume one input and output
+ auto &output = network_group->get()->get_output_streams()[0].get();
+ auto &input = network_group->get()->get_input_streams()[0].get();
+
+ // Allocate buffers. The buffers sent to the async API must be page aligned.
+ // For simplicity, in this example, we pass one buffer for each stream (It may be problematic in output since the
+ // buffer will be overridden on each read).
+ // Note - the buffers are allocated before we activate the network group. This will ensure that they won't be freed
+ // until the network group will become inactive.
+ auto output_buffer = page_aligned_alloc(output.get_frame_size());
+ auto input_buffer = page_aligned_alloc(input.get_frame_size());
+
+ // The destructor of activated_network_group will make sure that all async operations are done. All pending
+ // operations will be canceled and their callbacks will be called with status=HAILO_STREAM_ABORTED_BY_USER.
+ // Be sure to capture variables in the callbacks that will be destructed after the activated_network_group.
+ // Otherwise, the lambda would have access an uninitialized data.
+ auto activated_network_group = network_group.value()->activate();
+ if (!activated_network_group) {
+ std::cerr << "Failed to activate network group " << activated_network_group.status() << std::endl;
+ return EXIT_FAILURE;
+ }
+
+ std::atomic<hailo_status> output_status(HAILO_UNINITIALIZED);
+ std::thread output_thread([&]() {
+ while (true) {
+ output_status = output.wait_for_async_ready(output.get_frame_size(), TIMEOUT);
+ if (HAILO_SUCCESS != output_status) { return; }
+
+ output_status = output.read_async(output_buffer.get(), output.get_frame_size(), output_async_callback);
+ if (HAILO_SUCCESS != output_status) { return; }
+ }
+ });
+
+ std::atomic<hailo_status> input_status(HAILO_UNINITIALIZED);
+ std::thread input_thread([&]() {
+ while (true) {
+ input_status = input.wait_for_async_ready(input.get_frame_size(), TIMEOUT);
+ if (HAILO_SUCCESS != input_status) { return; }
+
+ input_status = input.write_async(input_buffer.get(), input.get_frame_size(), input_async_callback);
+ if (HAILO_SUCCESS != input_status) { return; }
+ }
+ });
+
+ // After all async operations are launched, the inference is running.
+ std::this_thread::sleep_for(std::chrono::seconds(5));
+
+ // Make it stop. We explicitly destruct activated_network_group to stop all async I/O.
+ activated_network_group->reset();
+
+ // Thread should be stopped with HAILO_STREAM_NOT_ACTIVATED status.
+ output_thread.join();
+ input_thread.join();
+ if ((HAILO_STREAM_NOT_ACTIVATED != output_status) || (HAILO_STREAM_NOT_ACTIVATED != input_status)) {
+ std::cerr << "Got unexpected statues from thread: " << output_status << ", " << input_status << std::endl;
+ return EXIT_FAILURE;
+ }
+
+ std::cout << "Inference finished successfully" << std::endl;
+ return EXIT_SUCCESS;
+}
\ No newline at end of file
--- /dev/null
+cmake_minimum_required(VERSION 3.0.0)
+
+find_package(Threads REQUIRED)
+set(THREADS_PREFER_PTHREAD_FLAG ON)
+
+find_package(HailoRT 4.14.0 EXACT REQUIRED)
+
+add_executable(cpp_raw_async_streams_single_thread_example raw_async_streams_single_thread_example.cpp)
+target_link_libraries(cpp_raw_async_streams_single_thread_example PRIVATE HailoRT::libhailort Threads::Threads)
+
+if(WIN32)
+ target_compile_options(cpp_raw_async_streams_single_thread_example PRIVATE
+ /DWIN32_LEAN_AND_MEAN
+ /DNOMINMAX # NOMINMAX is required in order to play nice with std::min/std::max (otherwise Windows.h defines it's own)
+ /wd4201 /wd4251
+ )
+endif()
+
+set_target_properties(cpp_raw_async_streams_single_thread_example PROPERTIES CXX_STANDARD 14)
\ No newline at end of file
--- /dev/null
+/**
+ * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+ **/
+/**
+ * @file raw_async_streams_single_thread_example
+ * This example demonstrates using low level async streams using single thread over c++.
+ **/
+
+#include "hailo/hailort.hpp"
+
+#include <thread>
+#include <iostream>
+#include <queue>
+#include <condition_variable>
+
+#if defined(__unix__)
+#include <sys/mman.h>
+#endif
+
+using namespace hailort;
+
+using AlignedBuffer = std::shared_ptr<uint8_t>;
+static AlignedBuffer page_aligned_alloc(size_t size)
+{
+#if defined(__unix__)
+ auto addr = mmap(NULL, size, PROT_WRITE | PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+ if (MAP_FAILED == addr) throw std::bad_alloc();
+ return AlignedBuffer(reinterpret_cast<uint8_t*>(addr), [size](void *addr) { munmap(addr, size); });
+#elif defined(_MSC_VER)
+ auto addr = VirtualAlloc(NULL, size, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
+ if (!addr) throw std::bad_alloc();
+ return AlignedBuffer(reinterpret_cast<uint8_t*>(addr), [](void *addr){ VirtualFree(addr, 0, MEM_RELEASE); });
+#else
+#pragma error("Aligned alloc not supported")
+#endif
+}
+
+static hailo_status infer(ConfiguredNetworkGroup &network_group, InputStream &input, OutputStream &output)
+{
+ auto input_queue_size = input.get_async_max_queue_size();
+ auto output_queue_size = output.get_async_max_queue_size();
+ if (!input_queue_size || !output_queue_size) {
+ std::cerr << "Failed getting async queue size" << std::endl;
+ return HAILO_INTERNAL_FAILURE;
+ }
+
+ // We store buffers vector here as a guard for the memory. The buffer will be freed only after
+ // activated_network_group will be released.
+ std::vector<AlignedBuffer> buffer_guards;
+
+ OutputStream::TransferDoneCallback read_done = [&output, &read_done](const OutputStream::CompletionInfo &completion_info) {
+ hailo_status status = HAILO_UNINITIALIZED;
+ switch (completion_info.status) {
+ case HAILO_SUCCESS:
+ // Real applications can forward the buffer to post-process/display. Here we just re-launch new async read.
+ status = output.read_async(completion_info.buffer_addr, completion_info.buffer_size, read_done);
+ if ((HAILO_SUCCESS != status) && (HAILO_STREAM_NOT_ACTIVATED != status)) {
+ std::cerr << "Failed read async with status=" << status << std::endl;
+ }
+ break;
+ case HAILO_STREAM_ABORTED_BY_USER:
+ // Transfer was canceled, finish gracefully.
+ break;
+ default:
+ std::cerr << "Got an unexpected status on callback. status=" << completion_info.status << std::endl;
+ }
+ };
+
+ InputStream::TransferDoneCallback write_done = [&input, &write_done](const InputStream::CompletionInfo &completion_info) {
+ hailo_status status = HAILO_UNINITIALIZED;
+ switch (completion_info.status) {
+ case HAILO_SUCCESS:
+ // Real applications may free the buffer and replace it with new buffer ready to be sent. Here we just
+ // re-launch new async write.
+ status = input.write_async(completion_info.buffer_addr, completion_info.buffer_size, write_done);
+ if ((HAILO_SUCCESS != status) && (HAILO_STREAM_NOT_ACTIVATED != status)) {
+ std::cerr << "Failed read async with status=" << status << std::endl;
+ }
+ break;
+ case HAILO_STREAM_ABORTED_BY_USER:
+ // Transfer was canceled, finish gracefully.
+ break;
+ default:
+ std::cerr << "Got an unexpected status on callback. status=" << completion_info.status << std::endl;
+ }
+ };
+
+ // The destructor of activated_network_group will make sure that all async operations are done. All pending
+ // operations will be canceled and their callbacks will be called with status=HAILO_STREAM_ABORTED_BY_USER.
+ // Be sure to capture variables in the callbacks that will be destructed after the activated_network_group.
+ // Otherwise, the lambda would have access an uninitialized data.
+ auto activated_network_group = network_group.activate();
+ if (!activated_network_group) {
+ std::cerr << "Failed to activate network group " << activated_network_group.status() << std::endl;
+ return activated_network_group.status();
+ }
+
+ // We launch "*output_queue_size" async read operation. On each async callback, we launch a new async read operation.
+ for (size_t i = 0; i < *output_queue_size; i++) {
+ // Buffers read from async operation must be page aligned.
+ auto buffer = page_aligned_alloc(output.get_frame_size());
+ auto status = output.read_async(buffer.get(), output.get_frame_size(), read_done);
+ if (HAILO_SUCCESS != status) {
+ std::cerr << "read_async failed with status=" << status << std::endl;
+ return status;
+ }
+
+ buffer_guards.emplace_back(buffer);
+ }
+
+ // We launch "*input_queue_size" async write operation. On each async callback, we launch a new async write operation.
+ for (size_t i = 0; i < *input_queue_size; i++) {
+ // Buffers written to async operation must be page aligned.
+ auto buffer = page_aligned_alloc(input.get_frame_size());
+ auto status = input.write_async(buffer.get(), input.get_frame_size(), write_done);
+ if (HAILO_SUCCESS != status) {
+ std::cerr << "write_async failed with status=" << status << std::endl;
+ return status;
+ }
+
+ buffer_guards.emplace_back(buffer);
+ }
+
+ // After all async operations are launched, the inference will continue until the activated_network_group
+ // destructor is called.
+ std::this_thread::sleep_for(std::chrono::seconds(5));
+
+ return HAILO_SUCCESS;
+}
+
+
+static Expected<std::shared_ptr<ConfiguredNetworkGroup>> configure_network_group(Device &device, const std::string &hef_path)
+{
+ auto hef = Hef::create(hef_path);
+ if (!hef) {
+ return make_unexpected(hef.status());
+ }
+
+ auto configure_params = device.create_configure_params(hef.value());
+ if (!configure_params) {
+ return make_unexpected(configure_params.status());
+ }
+
+ // change stream_params to operate in async mode
+ for (auto &ng_name_params_pair : *configure_params) {
+ for (auto &stream_params_name_pair : ng_name_params_pair.second.stream_params_by_name) {
+ stream_params_name_pair.second.flags = HAILO_STREAM_FLAGS_ASYNC;
+ }
+ }
+
+ auto network_groups = device.configure(hef.value(), configure_params.value());
+ if (!network_groups) {
+ return make_unexpected(network_groups.status());
+ }
+
+ if (1 != network_groups->size()) {
+ std::cerr << "Invalid amount of network groups" << std::endl;
+ return make_unexpected(HAILO_INTERNAL_FAILURE);
+ }
+
+ return std::move(network_groups->at(0));
+}
+
+int main()
+{
+ auto device = Device::create();
+ if (!device) {
+ std::cerr << "Failed to create device " << device.status() << std::endl;
+ return device.status();
+ }
+
+ static const auto HEF_FILE = "hefs/shortcut_net.hef";
+ auto network_group = configure_network_group(*device.value(), HEF_FILE);
+ if (!network_group) {
+ std::cerr << "Failed to configure network group" << HEF_FILE << std::endl;
+ return network_group.status();
+ }
+
+ // Assume one input and output
+ auto output = network_group->get()->get_output_streams()[0];
+ auto input = network_group->get()->get_input_streams()[0];
+
+ // Now start the inference
+ auto status = infer(*network_group.value(), input.get(), output.get());
+ if (HAILO_SUCCESS != status) {
+ std::cerr << "Inference failed with " << status << std::endl;
+ return status;
+ }
+
+ std::cout << "Inference finished successfully" << std::endl;
+ return HAILO_SUCCESS;
+}
find_package(Threads REQUIRED)
set(THREADS_PREFER_PTHREAD_FLAG ON)
-find_package(HailoRT 4.13.0 EXACT REQUIRED)
+find_package(HailoRT 4.14.0 EXACT REQUIRED)
add_executable(cpp_raw_streams_example raw_streams_example.cpp)
target_link_libraries(cpp_raw_streams_example PRIVATE HailoRT::libhailort Threads::Threads)
int main()
{
- auto device_ids = Device::scan();
- if (!device_ids) {
- std::cerr << "Failed to scan, status = " << device_ids.status() << std::endl;
- return device_ids.status();
- }
- if (device_ids->size() < 1){
- std::cerr << "Failed to find a connected hailo device." << std::endl;
- return HAILO_INVALID_OPERATION;
- }
- auto device = Device::create(device_ids->at(0));
+ auto device = Device::create();
if (!device) {
std::cerr << "Failed to create device " << device.status() << std::endl;
return device.status();
find_package(Threads REQUIRED)
set(THREADS_PREFER_PTHREAD_FLAG ON)
-find_package(HailoRT 4.13.0 EXACT REQUIRED)
+find_package(HailoRT 4.14.0 EXACT REQUIRED)
add_executable(cpp_switch_network_groups_example switch_network_groups_example.cpp)
target_link_libraries(cpp_switch_network_groups_example PRIVATE HailoRT::libhailort Threads::Threads)
constexpr hailo_format_type_t FORMAT_TYPE = HAILO_FORMAT_TYPE_AUTO;
constexpr size_t INFER_FRAME_COUNT = 100;
constexpr uint32_t DEVICE_COUNT = 1;
+constexpr size_t BATCH_SIZE_1 = 1;
+constexpr size_t BATCH_SIZE_2 = 2;
constexpr std::chrono::milliseconds SCHEDULER_TIMEOUT_MS(100);
constexpr uint32_t SCHEDULER_THRESHOLD = 3;
return VDevice::create(params);
}
-Expected<std::vector<std::shared_ptr<ConfiguredNetworkGroup>>> configure_hefs(VDevice &vdevice, std::vector<std::string> &hef_paths)
+Expected<std::vector<std::shared_ptr<ConfiguredNetworkGroup>>> configure_hefs(VDevice &vdevice, std::vector<std::string> &hef_paths,
+ const std::vector<uint16_t> &batch_sizes)
{
std::vector<std::shared_ptr<ConfiguredNetworkGroup>> results;
+ assert(hef_paths.size() == batch_sizes.size());
+ size_t i = 0;
for (const auto &path : hef_paths) {
auto hef_exp = Hef::create(path);
if (!hef_exp) {
}
auto hef = hef_exp.release();
+ auto configure_params = vdevice.create_configure_params(hef);
+ if (!configure_params) {
+ std::cerr << "Failed to create configure params" << std::endl;
+ return make_unexpected(configure_params.status());
+ }
+
+ // Modify batch_size for each network group
+ for (auto& network_group_params : configure_params.value()) {
+ network_group_params.second.batch_size = batch_sizes[i];
+ network_group_params.second.power_mode = HAILO_POWER_MODE_ULTRA_PERFORMANCE;
+ }
+ i++;
+
auto added_network_groups = vdevice.configure(hef);
if (!added_network_groups) {
return make_unexpected(added_network_groups.status());
}
auto vdevice = vdevice_exp.release();
+ std::vector<uint16_t> batch_sizes { BATCH_SIZE_1, BATCH_SIZE_2 };
std::vector<std::string> hef_paths = {"hefs/multi_network_shortcut_net.hef", "hefs/shortcut_net.hef"};
- auto configured_network_groups_exp = configure_hefs(*vdevice, hef_paths);
+
+ auto configured_network_groups_exp = configure_hefs(*vdevice, hef_paths, batch_sizes);
if (!configured_network_groups_exp) {
std::cerr << "Failed to configure HEFs, status = " << configured_network_groups_exp.status() << std::endl;
return configured_network_groups_exp.status();
}
auto configured_network_groups = configured_network_groups_exp.release();
- // Set scheduler's timeout and threshold for the first network group, in order to give priority to the second network group
+ // Set scheduler's timeout and threshold for the first network group, it will give priority to the second network group
auto status = configured_network_groups[0]->set_scheduler_timeout(SCHEDULER_TIMEOUT_MS);
if (HAILO_SUCCESS != status) {
std::cerr << "Failed to set scheduler timeout, status = " << status << std::endl;
return status;
}
+ // Setting higher priority to the first network-group directly.
+ // The practical meaning is that the first network will be ready to run only if ``SCHEDULER_THRESHOLD`` send requests have been accumulated,
+ // or more than ``SCHEDULER_TIMEOUT_MS`` time has passed and at least one send request has been accumulated.
+ // However when both the first and the second networks are ready to run, the first network will be preferred over the second network.
+ status = configured_network_groups[0]->set_scheduler_priority(HAILO_SCHEDULER_PRIORITY_NORMAL+1);
+ if (HAILO_SUCCESS != status) {
+ std::cerr << "Failed to set scheduler priority, status = " << status << std::endl;
+ return status;
+ }
+
auto vstreams_per_network_group_exp = build_vstreams(configured_network_groups);
if (!vstreams_per_network_group_exp) {
std::cerr << "Failed to create vstreams, status = " << vstreams_per_network_group_exp.status() << std::endl;
find_package(Threads REQUIRED)
set(THREADS_PREFER_PTHREAD_FLAG ON)
-find_package(HailoRT 4.13.0 EXACT REQUIRED)
+find_package(HailoRT 4.14.0 EXACT REQUIRED)
add_executable(cpp_switch_network_groups_manually_example switch_network_groups_manually_example.cpp)
target_link_libraries(cpp_switch_network_groups_manually_example PRIVATE HailoRT::libhailort Threads::Threads)
find_package(Threads REQUIRED)
set(THREADS_PREFER_PTHREAD_FLAG ON)
-find_package(HailoRT 4.13.0 EXACT REQUIRED)
+find_package(HailoRT 4.14.0 EXACT REQUIRED)
add_executable(cpp_vstreams_example vstreams_example.cpp)
target_link_libraries(cpp_vstreams_example PRIVATE HailoRT::libhailort Threads::Threads)
#define HEF_FILE ("hefs/shortcut_net.hef")
constexpr size_t FRAMES_COUNT = 100;
-constexpr bool QUANTIZED = true;
-constexpr hailo_format_type_t FORMAT_TYPE = HAILO_FORMAT_TYPE_AUTO;
constexpr size_t MAX_LAYER_EDGES = 16;
using namespace hailort;
return network_group.status();
}
- auto vstreams = VStreamsBuilder::create_vstreams(*network_group.value(), QUANTIZED, FORMAT_TYPE);
- if (!vstreams) {
- std::cerr << "Failed creating vstreams " << vstreams.status() << std::endl;
- return vstreams.status();
+ // Set input format type to auto, and mark the data as quantized - libhailort will not scale the data before writing to the HW
+ bool quantized = true;
+ auto input_vstream_params = network_group.value()->make_input_vstream_params(quantized, HAILO_FORMAT_TYPE_AUTO, HAILO_DEFAULT_VSTREAM_TIMEOUT_MS,
+ HAILO_DEFAULT_VSTREAM_QUEUE_SIZE);
+ if (!input_vstream_params) {
+ std::cerr << "Failed creating input vstreams params " << input_vstream_params.status() << std::endl;
+ return input_vstream_params.status();
}
- if (vstreams->first.size() > MAX_LAYER_EDGES || vstreams->second.size() > MAX_LAYER_EDGES) {
+ /* The input format order in the example HEF is NHWC in the user-side (may be seen using 'hailortcli parse-hef <HEF_PATH>).
+ Here we override the user-side format order to be NCHW */
+ for (auto ¶ms_pair : *input_vstream_params) {
+ params_pair.second.user_buffer_format.order = HAILO_FORMAT_ORDER_NCHW;
+ }
+
+ auto input_vstreams = VStreamsBuilder::create_input_vstreams(*network_group.value(), *input_vstream_params);
+ if (!input_vstreams) {
+ std::cerr << "Failed creating input vstreams " << input_vstreams.status() << std::endl;
+ return input_vstreams.status();
+ }
+
+ // Set output format type to float32, and mark the data as not quantized - libhailort will de-quantize the data after reading from the HW
+ // Note: this process might affect the overall performance
+ quantized = false;
+ auto output_vstream_params = network_group.value()->make_output_vstream_params(quantized, HAILO_FORMAT_TYPE_FLOAT32, HAILO_DEFAULT_VSTREAM_TIMEOUT_MS,
+ HAILO_DEFAULT_VSTREAM_QUEUE_SIZE);
+ if (!output_vstream_params) {
+ std::cerr << "Failed creating output vstreams params " << output_vstream_params.status() << std::endl;
+ return output_vstream_params.status();
+ }
+ auto output_vstreams = VStreamsBuilder::create_output_vstreams(*network_group.value(), *output_vstream_params);
+ if (!output_vstreams) {
+ std::cerr << "Failed creating output vstreams " << output_vstreams.status() << std::endl;
+ return output_vstreams.status();
+ }
+
+ if (input_vstreams->size() > MAX_LAYER_EDGES || output_vstreams->size() > MAX_LAYER_EDGES) {
std::cerr << "Trying to infer network with too many input/output virtual streams, Maximum amount is " <<
MAX_LAYER_EDGES << " (either change HEF or change the definition of MAX_LAYER_EDGES)"<< std::endl;
return HAILO_INVALID_OPERATION;
}
- auto status = infer(vstreams->first, vstreams->second);
+ auto status = infer(*input_vstreams, *output_vstreams);
if (HAILO_SUCCESS != status) {
std::cerr << "Inference failed " << status << std::endl;
return status;
HAILO_NET_FLOW_YOLOX_NMS = 15;
HAILO_NET_FLOW_SSD_NMS = 16;
HAILO_NET_FLOW_IOU_NMS = 17;
+ WRITE_DATA_BY_TYPE = 18;
+ NMS_OUTPUT_BURST = 19;
+ DUAL_DIRECTION_STREAM_INDEX = 20;
+ HAILO_NET_FLOW_ARGMAX = 21;
+ HAILO_NET_FLOW_SOFTMAX = 22;
+ ALIGNED_FORMAT_TYPE = 23;
+ OUTPUT_SCALE_PER_FEATURE = 25;
+ PERIPH_CALCULATION_IN_HAILORT = 26;
UNUSED = 0XFFFF;
}
uint32 cls_pad_index = 4;
};
+message ProtoHEFYoloxBboxDecoder {
+ // Pixels stride for given bbox
+ uint32 stride = 1;
+
+ // Index of the pad connected to the encoded layer in the decoder (reg layer)
+ uint32 reg_pad_index = 2;
+
+ // Index of the pad connected to the classes scores layer in the decoder (cls layer)
+ uint32 cls_pad_index = 3;
+
+ // Index of the pad connected to the objectness scores layer in the decoder (objectness layer)
+ uint32 obj_pad_index = 4;
+};
+
+message ProtoHEFYoloxNmsOp {
+ // Input image dimensions
+ double image_height = 1;
+ double image_width = 2;
+
+ // List of bbox decoders (anchors) for the NMS layer. Each model has its own number of boxes per anchor
+ repeated ProtoHEFYoloxBboxDecoder bbox_decoders = 3;
+};
+
message ProtoHEFSSDNmsOp {
// Input image dimensions
double image_height = 1;
// Additional information needed for specific NMS types
oneof nms_op {
ProtoHEFYoloNmsOp yolo_nms_op = 7; // YOLOv5 post process
- ProtoHEFYoloNmsOp yolox_nms_op = 8; // YOLO-X post process (ignores bbox decoder coordinations)
+ ProtoHEFYoloxNmsOp yolox_nms_op = 8; // YOLO-X post process
ProtoHEFSSDNmsOp ssd_nms_op = 9; // SSD post process
ProtoHEFIOUNmsOp iou_op = 10; // IoU only
}
};
+enum ProtoHEFLogitsType {
+ PROTO_HEF_ARGMAX_TYPE = 0;
+ PROTO_HEF_SOFTMAX_TYPE = 1;
+}
+
+message ProtoHEFLogitsOp {
+ // Logits type (softmax/argmax)
+ ProtoHEFLogitsType logits_type = 1;
+};
+
enum ProtoHEFFormatOrder {
PROTO__FORMAT__ORDER__AUTO = 0;
PROTO__FORMAT__ORDER__NHWC = 1;
PROTO__UINT16 = 1;
};
+enum ProtoHEFFormatType {
+ PROTO__FORMAT__TYPE__AUTO = 0;
+ PROTO__FORMAT__TYPE__UINT8 = 1;
+ PROTO__FORMAT__TYPE__UINT16 = 2;
+ PROTO__FORMAT__TYPE__MAX_ENUM = 0XFFFF;
+};
+
message ProtoHEFTensorShape {
uint32 height = 1;
uint32 padded_height = 2;
string name = 2;
// Additional information describing the data going through this pad's interface
- ProtoHEFFormatOrder format = 3;
- ProtoHEFDataType data_bytes = 4;
+ ProtoHEFFormatOrder format_order = 3;
+ ProtoHEFDataType data_bytes = 4; // Unused (kept for compatibility). Should use format_type field
+ ProtoHEFFormatType format_type = 8;
ProtoHEFEdgeLayerNumericInfo numeric_info = 5;
oneof shape_info {
ProtoHEFTensorShape tensor_shape = 6;
// Op type for NMS post-process
ProtoHEFNmsOp nms_op = 5;
+
+ // Op type for Logits post-processing
+ ProtoHEFLogitsOp logits_op = 6;
}
};
ProtoHEFActionWaitForModuleConfigDone wait_for_module_config_done = 11;
ProtoHEFActionDebugSleep debug_sleep = 12;
ProtoHEFActionEnableNMS enable_nms = 13;
+ ProtoHEFActionWriteDataByType write_data_by_type = 14;
}
}
uint64 duration_in_usec = 1;
}
+enum ProtoHEFWriteDataType {
+ DATA_FROM_ACTION = 0;
+ BATCH_SIZE = 1;
+};
+
+message ProtoHEFActionWriteDataByType {
+ // The address to write the data
+ uint64 address = 1;
+
+ // Data type - the data to write
+ ProtoHEFWriteDataType data_type = 2;
+
+ // The data that would be written if data_type=DATA_FROM_ACTION
+ bytes data = 3;
+
+ // The mask to use - ignore if data_type=DATA_FROM_ACTION and data size > 4
+ uint32 mask = 4;
+
+ // Network index
+ uint32 network_index = 5;
+
+ // data shift
+ uint32 shift = 6;
+}
+
message InitialL3 {
// L3 cut index sequencer should start from
uint32 initial_l3_index = 1;
// Index of the network
uint32 network_index = 2;
+
+ // Number of classes
+ uint32 number_of_classes = 3;
+
+ // Burst-size
+ uint32 burst_size = 4;
}
// None action - Do not do anything
ProtoHEFNmsInfo nms_info = 1;
}
+enum ProtoHEFNmsBurstType {
+ // No burst
+ PROTO__NMS_BURST_TYPE__NO_BURST = 0;
+ // No image delimiter, burst per class
+ PROTO__NMS_BURST_TYPE__H8_PER_CLASS = 1;
+ // Image delimiter and burst per class
+ PROTO__NMS_BURST_TYPE__H15_PER_CLASS = 2;
+ // Image delimiter and burst per image
+ PROTO__NMS_BURST_TYPE__H15_PER_FRAME = 3;
+}
+
// NMS specific parameters
message ProtoHEFNmsInfo {
uint32 type_index = 1;
bool is_defused = 5;
ProtoHEFNmsDefuseInfo defuse_info = 6;
uint64 input_division_factor = 7;
+ uint32 burst_size = 8;
+ ProtoHEFNmsBurstType burst_type = 9;
}
message ProtoHEFNmsDefuseInfo {
}
message ProtoHEFEdgeLayerNumericInfo {
- float qp_zp = 1;
- float qp_scale = 2;
+ float qp_zp = 1; // TODO: Remove, use vector
+ float qp_scale = 2; // TODO: Remove, use vector
float limvals_min = 3;
float limvals_max = 4;
+ repeated double qp_zps = 5; // zp per feature
+ repeated double qp_scales = 6; // scale per feature
}
// An object that can be repeated in order to provide the order of the triggers.
uint32 dense_alignment_size = 1;
uint32 axi_width = 2;
uint32 memory_width = 3;
-}
\ No newline at end of file
+}
#define _HAILO_BUFFER_HPP_
#include "hailo/expected.hpp"
+#include "hailo/buffer_storage.hpp"
#include <memory>
#include <cstdint>
#include <cassert>
+/** hailort namespace */
namespace hailort
{
// Empty buffer (points to null, size is zero)
Buffer();
+ // Buffer backed by the storage param
+ Buffer(BufferStoragePtr storage);
~Buffer() = default;
Buffer(const Buffer& other) = delete;
* Create functions, may fail be due to out of memory
*/
// Creates a buffer size bytes long, without setting the memory
- static Expected<Buffer> create(size_t size);
+ static Expected<Buffer> create(size_t size, const BufferStorageParams ¶ms = {});
// Creates a buffer size bytes long, setting the memory to default_value
- static Expected<Buffer> create(size_t size, uint8_t default_value);
+ static Expected<Buffer> create(size_t size, uint8_t default_value, const BufferStorageParams ¶ms = {});
// Creates a copy of the data pointed to by src, size bytes long
- static Expected<Buffer> create(const uint8_t *src, size_t size);
+ static Expected<Buffer> create(const uint8_t *src, size_t size, const BufferStorageParams ¶ms = {});
// Creates a new buffer with the contents of the initializer_list
- static Expected<Buffer> create(std::initializer_list<uint8_t> init);
-
+ static Expected<Buffer> create(std::initializer_list<uint8_t> init, const BufferStorageParams ¶ms = {});
+
// Creates a buffer size bytes long, without setting the memory
- static Expected<BufferPtr> create_shared(size_t size);
+ static Expected<BufferPtr> create_shared(size_t size, const BufferStorageParams ¶ms = {});
// Creates a buffer size bytes long, setting the memory to default_value
- static Expected<BufferPtr> create_shared(size_t size, uint8_t default_value);
+ static Expected<BufferPtr> create_shared(size_t size, uint8_t default_value, const BufferStorageParams ¶ms = {});
+ // Creates a copy of the data pointed to by src, size bytes long
+ static Expected<BufferPtr> create_shared(const uint8_t *src, size_t size, const BufferStorageParams ¶ms = {});
// Moves the data pointed to by other into the lvalue:
// * other is invalidated.
iterator begin();
iterator end();
+ BufferStorage &storage();
+
// Returns a pointer to the start of the buffer
uint8_t* data() noexcept;
const uint8_t* data() const noexcept;
// Returns the size of the buffer
size_t size() const noexcept;
-
- // Returns a pointer to the start of the buffer and releases the ownership
- // Free the returned pointer with `delete`
- uint8_t* release() noexcept;
// Casts the buffer to a string of length size().
// If there's a null char in the buffer, the string will terminate at the null char
T* as_pointer() const
{
assert(m_size >= sizeof(T));
- return reinterpret_cast<T*>(m_data.get());
+ return reinterpret_cast<T*>(m_data);
}
// Returns a copy of the data at the start of the buffer, cast to T
T as_type() const
{
assert(m_size >= sizeof(T));
- return *(reinterpret_cast<const T*>(m_data.get()));
+ return *(reinterpret_cast<const T*>(m_data));
}
// The following functions return a copy of the data at the start of the buffer, cast to uint16/32/64_t
- // Note: If this->size() is less than the size of the ineger type, then the copy will hold data
+ // Note: If this->size() is less than the size of the integer type, then the copy will hold data
// that isn't from the buffer!
uint16_t as_uint16() const;
uint32_t as_uint32() const;
}
// The following functions return references of the data at the start of the buffer, cast to uint16/32/64_t
- // Note: If this->size() is less than the size of the ineger type, then the copy will hold data
+ // Note: If this->size() is less than the size of the integer type, then the copy will hold data
// that isn't from the buffer!
uint16_t& as_uint16();
uint32_t& as_uint32();
uint64_t& as_uint64();
private:
- Buffer(std::unique_ptr<uint8_t[]> data, size_t size);
-
- std::unique_ptr<uint8_t[]> m_data;
+ // Initialization dependency
+ BufferStoragePtr m_storage;
+ uint8_t *m_data;
size_t m_size;
};
explicit MemoryView(Buffer &buffer);
MemoryView(void *data, size_t size);
~MemoryView() = default;
-
+
MemoryView& operator=(MemoryView&& other) = default;
MemoryView(const MemoryView &) = default;
MemoryView& operator=(MemoryView &) = default;
--- /dev/null
+/**\r
+ * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved.\r
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)\r
+**/\r
+/**\r
+ * @file buffer_storage.hpp\r
+ * @brief TODO: fill me (HRT-10026)\r
+ **/\r
+\r
+#ifndef _HAILO_BUFFER_STORAGE_HPP_\r
+#define _HAILO_BUFFER_STORAGE_HPP_\r
+\r
+#include "hailo/hailort.h"\r
+#include "hailo/expected.hpp"\r
+\r
+#include <memory>\r
+#include <cstdint>\r
+#include <functional>\r
+#include <vector>\r
+#include <unordered_map>\r
+#include <string>\r
+\r
+\r
+/** hailort namespace */\r
+namespace hailort\r
+{\r
+\r
+// Forward declarations\r
+class Device;\r
+class VDevice;\r
+class BufferStorage;\r
+class HeapStorage;\r
+class DmaStorage;\r
+class HailoRTDriver;\r
+\r
+namespace vdma {\r
+ class DmaAbleBuffer;\r
+ using DmaAbleBufferPtr = std::shared_ptr<DmaAbleBuffer>;\r
+\r
+ class MappedBuffer;\r
+ using MappedBufferPtr = std::shared_ptr<MappedBuffer>;\r
+}\r
+\r
+\r
+/*! Buffer storage parameters. Analogical to hailo_buffer_parameters_t */\r
+struct HAILORTAPI BufferStorageParams\r
+{\r
+public:\r
+ struct HeapParams\r
+ {\r
+ public:\r
+ HeapParams();\r
+ };\r
+\r
+ struct DmaMappingParams\r
+ {\r
+ public:\r
+ static Expected<DmaMappingParams> create(const hailo_buffer_dma_mapping_params_t ¶ms);\r
+ // DmaMappingParams for a buffer to be mapped to device\r
+ DmaMappingParams(Device &device, hailo_dma_buffer_direction_t data_direction);\r
+ // DmaMappingParams for a buffer to be mapped to all the underlying devices held by vdevice\r
+ DmaMappingParams(VDevice &vdevice, hailo_dma_buffer_direction_t data_direction);\r
+ // DmaMappingParams for a buffer to be lazily mapped upon it's first async transfer to a given device\r
+ DmaMappingParams();\r
+\r
+ // Note: We hold a pointer to a Device/VDevice/neither, since DmaMappingParams support mapping to\r
+ // a device, vdevice or lazy mapping\r
+ Device *device;\r
+ VDevice *vdevice;\r
+ hailo_dma_buffer_direction_t data_direction;\r
+\r
+ private:\r
+ DmaMappingParams(const hailo_buffer_dma_mapping_params_t ¶ms);\r
+ };\r
+\r
+ static Expected<BufferStorageParams> create(const hailo_buffer_parameters_t ¶ms);\r
+ // Dma buffer params for lazy mapping\r
+ static BufferStorageParams create_dma();\r
+ // Dma buffer params for mapping to device in data_direction\r
+ static BufferStorageParams create_dma(Device &device, hailo_dma_buffer_direction_t data_direction);\r
+ // Dma buffer params for mapping to vdevice in data_direction\r
+ static BufferStorageParams create_dma(VDevice &vdevice, hailo_dma_buffer_direction_t data_direction);\r
+\r
+ // Defaults to heap params\r
+ BufferStorageParams();\r
+\r
+ hailo_buffer_flags_t flags;\r
+ union {\r
+ HeapParams heap_params;\r
+ DmaMappingParams dma_mapping_params;\r
+ };\r
+};\r
+\r
+using BufferStoragePtr = std::shared_ptr<BufferStorage>;\r
+\r
+class HAILORTAPI BufferStorage\r
+{\r
+public:\r
+ enum class Type {\r
+ HEAP,\r
+ DMA\r
+ };\r
+\r
+ static Expected<BufferStoragePtr> create(size_t size, const BufferStorageParams ¶ms);\r
+\r
+ BufferStorage(BufferStorage&& other) noexcept = default;\r
+ BufferStorage(const BufferStorage &) = delete;\r
+ BufferStorage &operator=(BufferStorage &&) = delete;\r
+ BufferStorage &operator=(const BufferStorage &) = delete;\r
+ virtual ~BufferStorage() = default;\r
+\r
+ Type type() const;\r
+ virtual size_t size() const = 0;\r
+ virtual void *user_address() = 0;\r
+ // Returns the pointer managed by this object and releases ownership\r
+ // TODO: Add a free function pointer? (HRT-10024)\r
+ // // Free the returned pointer with `delete`\r
+ // TODO: after release the containing buffer will hold pointers to values that were released.\r
+ // Document that this can happen? Disable this behavior somehow? (HRT-10024)\r
+ virtual Expected<void *> release() noexcept = 0;\r
+ // Maps the storage to device in data_direction.\r
+ // - If the mapping is new - true is returned.\r
+ // - If the mapping already exists - false is returned.\r
+ // - Otherwise - Unexpected with a failure status is returned.\r
+ virtual Expected<bool> dma_map(Device &device, hailo_dma_buffer_direction_t data_direction) = 0;\r
+ // Maps the backing buffer to a device via driver in data_direction, returning a pointer to it.\r
+ // - If the mapping is new - true is returned.\r
+ // - If the mapping already exists - false is returned.\r
+ // - Otherwise - Unexpected with a failure status is returned.\r
+ virtual Expected<bool> dma_map(HailoRTDriver &driver, hailo_dma_buffer_direction_t data_direction) = 0;\r
+\r
+ // Internal functions\r
+ virtual Expected<vdma::MappedBufferPtr> get_dma_mapped_buffer(const std::string &device_id) = 0;\r
+\r
+protected:\r
+ explicit BufferStorage(Type type);\r
+\r
+ const Type m_type;\r
+};\r
+\r
+using HeapStoragePtr = std::shared_ptr<HeapStorage>;\r
+\r
+class HAILORTAPI HeapStorage : public BufferStorage\r
+{\r
+public:\r
+ static Expected<HeapStoragePtr> create(size_t size);\r
+ HeapStorage(std::unique_ptr<uint8_t[]> data, size_t size);\r
+ HeapStorage(HeapStorage&& other) noexcept;\r
+ HeapStorage(const HeapStorage &) = delete;\r
+ HeapStorage &operator=(HeapStorage &&) = delete;\r
+ HeapStorage &operator=(const HeapStorage &) = delete;\r
+ virtual ~HeapStorage() = default;\r
+\r
+ virtual size_t size() const override;\r
+ virtual void *user_address() override;\r
+ virtual Expected<void *> release() noexcept override;\r
+ virtual Expected<bool> dma_map(Device &device, hailo_dma_buffer_direction_t data_direction) override;\r
+ virtual Expected<bool> dma_map(HailoRTDriver &driver, hailo_dma_buffer_direction_t data_direction) override;\r
+\r
+ // Internal functions\r
+ virtual Expected<vdma::MappedBufferPtr> get_dma_mapped_buffer(const std::string &device_id) override;\r
+\r
+private:\r
+ std::unique_ptr<uint8_t[]> m_data;\r
+ size_t m_size;\r
+};\r
+\r
+// ************************************* NOTE - START ************************************* //\r
+// DmaStorage isn't currently supported and is for internal use only //\r
+// **************************************************************************************** //\r
+using DmaStoragePtr = std::shared_ptr<DmaStorage>;\r
+\r
+// TODO: HRT-10026 doc this\r
+class HAILORTAPI DmaStorage : public BufferStorage\r
+{\r
+public:\r
+ // Creates a DmaStorage instance holding a dma-able buffer size bytes large.\r
+ // The buffer isn't mapped to dma until dma_map is called.\r
+ static Expected<DmaStoragePtr> create(size_t size);\r
+ // Creates a DmaStorage instance holding a dma-able buffer size bytes large.\r
+ // The buffer is mapped to device in data_direction.\r
+ static Expected<DmaStoragePtr> create(size_t size,\r
+ hailo_dma_buffer_direction_t data_direction, Device &device);\r
+ // Creates a DmaStorage instance holding a dma-able buffer size bytes large.\r
+ // The buffer is mapped to vdevice.get_physical_devices() in data_direction.\r
+ static Expected<DmaStoragePtr> create(size_t size,\r
+ hailo_dma_buffer_direction_t data_direction, VDevice &vdevice);\r
+\r
+ // TODO: doc that the addr needs to be on a new page and aligned to 64B (HRT-9559)\r
+ // probably best just to call mmap\r
+ // Creates a DmaStorage instance backed by the size bytes large buffer pointed to by user_address.\r
+ // The buffer isn't mapped to dma until dma_map is called.\r
+ static Expected<DmaStoragePtr> create_from_user_address(void *user_address, size_t size);\r
+ // Creates a DmaStorage instance backed by the size bytes large buffer pointed to by user_address.\r
+ // The buffer is mapped to device in data_direction.\r
+ static Expected<DmaStoragePtr> create_from_user_address(void *user_address, size_t size,\r
+ hailo_dma_buffer_direction_t data_direction, Device &device);\r
+ // Creates a DmaStorage instance backed by the size bytes large buffer pointed to by user_address.\r
+ // The buffer is mapped to vdevice.get_physical_devices() in data_direction.\r
+ static Expected<DmaStoragePtr> create_from_user_address(void *user_address, size_t size,\r
+ hailo_dma_buffer_direction_t data_direction, VDevice &device);\r
+\r
+ DmaStorage(const DmaStorage &other) = delete;\r
+ DmaStorage &operator=(const DmaStorage &other) = delete;\r
+ DmaStorage(DmaStorage &&other) noexcept = default;\r
+ DmaStorage &operator=(DmaStorage &&other) = delete;\r
+ virtual ~DmaStorage() = default;\r
+\r
+ virtual size_t size() const override;\r
+ virtual void *user_address() override;\r
+ virtual Expected<void *> release() noexcept override;\r
+ // TODO: thread safety (HRT-10669)\r
+ virtual Expected<bool> dma_map(Device &device, hailo_dma_buffer_direction_t data_direction) override;\r
+ virtual Expected<bool> dma_map(HailoRTDriver &driver, hailo_dma_buffer_direction_t data_direction) override;\r
+\r
+ // Internal functions\r
+ DmaStorage(vdma::DmaAbleBufferPtr &&dma_able_buffer);\r
+ virtual Expected<vdma::MappedBufferPtr> get_dma_mapped_buffer(const std::string &device_id) override;\r
+\r
+private:\r
+ // Creates a backing dma-able buffer (either user or hailort allocated).\r
+ // Maps said buffer to physical_devices in data_direction.\r
+ // By default (if physical_devices is empty), no mapping will occur\r
+ static Expected<DmaStoragePtr> create(void *user_address, size_t size,\r
+ hailo_dma_buffer_direction_t data_direction = HAILO_DMA_BUFFER_DIRECTION_MAX_ENUM,\r
+ std::vector<std::reference_wrapper<Device>> &&physical_devices = {});\r
+\r
+ vdma::DmaAbleBufferPtr m_dma_able_buffer;\r
+\r
+ // For each device (key is device_id), we store some vdma mapping.\r
+ // TODO: use (device_id, direction) as key - HRT-10656\r
+ std::unordered_map<std::string, vdma::MappedBufferPtr> m_mappings;\r
+};\r
+// ************************************** NOTE - END ************************************** //\r
+// DmaStorage isn't currently supported and is for internal use only //\r
+// **************************************************************************************** //\r
+\r
+} /* namespace hailort */\r
+\r
+#endif /* _HAILO_BUFFER_STORAGE_HPP_ */\r
#include <chrono>
+/** hailort namespace */
namespace hailort
{
std::chrono::milliseconds timeout);
/**
- * Creates a device if there is only one system device detected in the system.
- *
+ * Creates a device. If there are more than one device detected in the system, an arbitrary device is returned.
+ *
* @return Upon success, returns Expected of a unique_ptr to Device object.
* Otherwise, returns Unexpected of ::hailo_status error.
*/
/**
* Creates a device by the given device id.
- *
+ *
* @param[in] device_id Device id string, can represent several device types:
* [-] for pcie devices - pcie bdf (XXXX:XX:XX.X)
* [-] for ethernet devices - ip address (xxx.xxx.xxx.xxx)
- *
+ *
* @return Upon success, returns Expected of a unique_ptr to Device object.
* Otherwise, returns Unexpected of ::hailo_status error.
*/
static Expected<std::unique_ptr<Device>> create(const std::string &device_id);
/**
- * Creates pcie device if there is only one pcie device connected
- *
+ * Creates pcie device. If there are more than one device detected in the system, an arbitrary pcie device is
+ * returned.
+ *
* @return Upon success, returns Expected of a unique_ptr to Device object.
* Otherwise, returns Unexpected of ::hailo_status error.
*/
/**
* Creates a PCIe device by the given info.
- *
+ *
* @param[in] device_info Information about the device to open.
* @return Upon success, returns Expected of a unique_ptr to Device object.
* Otherwise, returns Unexpected of ::hailo_status error.
/**
* Creates an ethernet device by the given info.
- *
+ *
* @param[in] device_info Information about the device to open.
* @return Upon success, returns Expected of a unique_ptr to Device object.
* Otherwise, returns Unexpected of ::hailo_status error.
/**
* Creates an ethernet device by IP address.
- *
+ *
* @param[in] ip_addr The device IP address.
* @return Upon success, returns Expected of a unique_ptr to Device object.
* Otherwise, returns Unexpected of ::hailo_status error.
*/
static Expected<std::unique_ptr<Device>> create_eth(const std::string &ip_addr);
+ /**
+ * Creates an ethernet device by IP address, port number, timeout duration and max number of attempts
+ *
+ * @param[in] device_address The device IP address.
+ * @param[in] port The port number that the device will use for the Ethernet communication.
+ * @param[in] timeout_milliseconds The time in milliseconds to scan devices.
+ * @param[in] max_number_of_attempts The number of attempts to find a device.
+ * @return Upon success, returns Expected of a unique_ptr to Device object.
+ * Otherwise, returns Unexpected of ::hailo_status error.
+ */
+ static Expected<std::unique_ptr<Device>> create_eth(const std::string &device_address, uint16_t port, uint32_t timeout_milliseconds, uint8_t max_number_of_attempts);
+
/**
* Parse PCIe device BDF string into hailo device info structure.
- *
+ *
* @param[in] device_info_str BDF device info, format [\<domain\>].\<bus\>.\<device\>.\<func\>, same format as in lspci.
* @return Upon success, returns Expected of ::hailo_pcie_device_info_t containing the information.
* Otherwise, returns Unexpected of ::hailo_status error.
/**
* Returns a string of pcie device info.
- *
+ *
* @param[in] device_info A ::hailo_pcie_device_info_t containing the pcie device information.
* @return Upon success, returns Expected of a string containing the information.
* Otherwise, returns Unexpected of ::hailo_status error.
/**
* Returns the device type of the given device id string.
- *
+ *
* @param[in] device_id A std::string device id to check.
* @return Upon success, returns Expected of the device type.
* Otherwise, returns Unexpected of ::hailo_status error.
*/
static Expected<Type> get_device_type(const std::string &device_id);
+ /**
+ * Checks if 2 device ids represents the same device.
+ *
+ * @param[in] first A std::string first device id to check.
+ * @param[in] second A std::string second device id to check.
+ * @return true if the device ids represents the same device.
+ */
+ static bool device_ids_equal(const std::string &first, const std::string &second);
+
/**
* Create the default configure params from an hef.
*
+++ /dev/null
-/**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
- * Distributed under the MIT license (https://opensource.org/licenses/MIT)
- **/
-/**
- * @file dma_mapped_buffer.hpp
- * @brief The mapped buffer that is continuous in virtual memory, but not on physical memory.
- * We map the buffer to the IOMMU.
- *
- * The buffer can be used only with the help of a descriptors list that contains pointers to a physical
- * continuous "dma pages".
- *
- * There are 2 options to allocated the buffer:
- * 1. User mode allocation - the user mode calls `malloc` or `mmap` to allocate the buffer, then
- * using HailoRTDriver we map the driver to the IOMMU (and pin the pages to avoid pagigs).
- * This is the default option
- * 2. Kernel mode allocation - on some systems, the user mode doesn't allocate the memory in a "dma-able" address,
- * so we need to allocate the pages in driver.
- **/
-
-#ifndef _HAILO_DMA_MAPPED_BUFFER_HPP_
-#define _HAILO_DMA_MAPPED_BUFFER_HPP_
-
-#include "hailo/expected.hpp"
-#include "hailo/device.hpp"
-
-
-namespace hailort {
-
-// Forward deceleration across namespaces
-namespace vdma {
- class DescriptorList;
- class MappedBufferFactory;
- class BufferedChannel;
-}
-
-// ******************************************** NOTE ******************************************** //
-// Async Stream API and DmaMappedBuffer are currently not supported and are for internal use only //
-// ********************************************************************************************** //
-class HAILORTAPI DmaMappedBuffer final
-{
-public:
- static Expected<DmaMappedBuffer> create(size_t size,
- hailo_vdma_buffer_direction_flags_t data_direction_flags, Device &device);
- // TODO: doc that the addr needs to be on a new page and aligned to 64B (HRT-9559)
- // probably best just to call mmap
- static Expected<DmaMappedBuffer> create_from_user_address(void *user_address, size_t size,
- hailo_vdma_buffer_direction_flags_t data_direction_flags, Device &device);
-
- DmaMappedBuffer(const DmaMappedBuffer &other) = delete;
- DmaMappedBuffer &operator=(const DmaMappedBuffer &other) = delete;
- DmaMappedBuffer(DmaMappedBuffer &&other) noexcept;
- DmaMappedBuffer &operator=(DmaMappedBuffer &&other) = delete;
- ~DmaMappedBuffer();
-
- void *user_address();
- size_t size() const;
- hailo_status synchronize();
-
-private:
- static Expected<DmaMappedBuffer> create(void *user_address, size_t size,
- hailo_vdma_buffer_direction_flags_t data_direction_flags, Device &device);
-
- // Need access to pimpl
- friend class vdma::DescriptorList;
- friend class vdma::MappedBufferFactory;
- friend class vdma::BufferedChannel;
-
- class Impl;
- explicit DmaMappedBuffer(std::unique_ptr<Impl> pimpl);
- std::unique_ptr<Impl> pimpl;
-};
-
-} /* namespace hailort */
-
-#endif /* _HAILO_DMA_MAPPED_BUFFER_HPP_ */
\ No newline at end of file
}
#endif // defined (__QNX__)
+/** hailort namespace */
namespace hailort
{
class HAILORTAPI Waitable
{
-public:
+public:
explicit Waitable(underlying_waitable_handle_t handle);
virtual ~Waitable();
Waitable(Waitable&& other);
Waitable& operator=(Waitable&&) = delete;
// Blocks the current thread until the waitable is signaled
- // * If this->is_auto_reset(), then the Waitable is reset after wait returns with HAILO_SUCCESS
+ // * If this->is_auto_reset(), then the Waitable is reset after wait returns with HAILO_SUCCESS
// * Otherwise, the Waitable is not reset
- virtual hailo_status wait(std::chrono::milliseconds timeout) = 0;
+ virtual hailo_status wait(std::chrono::milliseconds timeout);
virtual hailo_status signal() = 0;
virtual bool is_auto_reset() = 0;
underlying_waitable_handle_t get_underlying_handle();
-#if defined(__QNX__)
- virtual void post_wait() = 0;
-#endif // defined (__QNX__)
static constexpr auto INIFINITE_TIMEOUT() { return std::chrono::milliseconds(HAILO_INFINITE); }
protected:
- #if defined(_MSC_VER) || defined(__QNX__)
+ virtual hailo_status post_wait() = 0;
+
static hailo_status wait_for_single_object(underlying_waitable_handle_t handle, std::chrono::milliseconds timeout);
- #else
+
+#if defined(__linux__)
// Waits on the fd until the waitable is signaled
static hailo_status eventfd_poll(underlying_waitable_handle_t fd, std::chrono::milliseconds timeout);
// Expected to be called after eventfd_poll returns HAILO_SUCCESS
static hailo_status eventfd_read(underlying_waitable_handle_t fd);
static hailo_status eventfd_write(underlying_waitable_handle_t fd);
- #endif
+#endif
underlying_waitable_handle_t m_handle;
+
+ friend class WaitableGroup;
};
class Event;
static Expected<Event> create(const State& initial_state);
static EventPtr create_shared(const State& initial_state);
- virtual hailo_status wait(std::chrono::milliseconds timeout) override;
virtual hailo_status signal() override;
virtual bool is_auto_reset() override;
hailo_status reset();
-#if defined(__QNX__)
- virtual void post_wait() override;
-#endif // defined (__QNX__)
+
+protected:
+ virtual hailo_status post_wait() override { return HAILO_SUCCESS; }
private:
+
static underlying_waitable_handle_t open_event_handle(const State& initial_state);
};
static Expected<Semaphore> create(uint32_t initial_count);
static SemaphorePtr create_shared(uint32_t initial_count);
- virtual hailo_status wait(std::chrono::milliseconds timeout) override;
virtual hailo_status signal() override;
virtual bool is_auto_reset() override;
+
#if defined(__QNX__)
Semaphore(underlying_waitable_handle_t handle, uint32_t initial_count);
Semaphore(Semaphore&& other);
- virtual void post_wait() override;
#endif // defined (__QNX__)
+protected:
+ virtual hailo_status post_wait() override;
+
private:
static underlying_waitable_handle_t open_semaphore_handle(uint32_t initial_count);
#if defined (__QNX__)
#include <type_traits>
+/** hailort namespace */
namespace hailort
{
#define HAILO_DEFAULT_INIT_AVERAGING_FACTOR (HAILO_AVERAGE_FACTOR_256)
#define HAILO_DEFAULT_BUFFERS_THRESHOLD (0)
#define HAILO_DEFAULT_MAX_ETHERNET_BANDWIDTH_BYTES_PER_SEC (106300000)
-#define HAILO_MAX_STREAMS_COUNT (32)
+#define HAILO_MAX_STREAMS_COUNT (40)
#define HAILO_DEFAULT_BATCH_SIZE (0)
#define HAILO_MAX_NETWORK_GROUPS (8)
#define HAILO_MAX_NETWORK_GROUP_NAME_SIZE (HAILO_MAX_NAME_SIZE)
HAILO_STATUS__X(77, HAILO_RPC_FAILED /*!< RPC failed */)\
HAILO_STATUS__X(78, HAILO_INVALID_SERVICE_VERSION /*!< Invalid service version */)\
HAILO_STATUS__X(79, HAILO_NOT_SUPPORTED /*!< Not supported operation */)\
+ HAILO_STATUS__X(80, HAILO_NMS_BURST_INVALID_DATA /*!< Invalid data in NMS burst */)\
+ HAILO_STATUS__X(81, HAILO_OUT_OF_HOST_CMA_MEMORY /*!< Cannot allocate more CMA memory at host */)\
+ HAILO_STATUS__X(82, HAILO_QUEUE_IS_FULL /*!< Cannot push more items into the queue */)\
+ HAILO_STATUS__X(83, HAILO_DMA_MAPPING_ALREADY_EXISTS /*!< DMA mapping already exists */)\
typedef enum {
#define HAILO_STATUS__X(value, name) name = value,
/** Must be last! */
HAILO_STATUS_COUNT,
-
+
/** Max enum value to maintain ABI Integrity */
HAILO_STATUS_MAX_ENUM = HAILO_MAX_ENUM
} hailo_status;
HAILO_STREAM_DIRECTION_MAX_ENUM = HAILO_MAX_ENUM
} hailo_stream_direction_t;
-// ******************************************** NOTE ******************************************** //
-// Async Stream API and DmaMappedBuffer are currently not supported and are for internal use only //
-// ********************************************************************************************** //
/** Stream flags */
typedef enum {
HAILO_STREAM_FLAGS_NONE = 0, /*!< No flags */
HAILO_STREAM_FLAGS_MAX_ENUM = HAILO_MAX_ENUM
} hailo_stream_flags_t;
-/** Hailo vdma buffer direction */
+// ************************************* NOTE - START ************************************* //
+// Dma buffer allocation isn't currently supported and is for internal use only //
+// **************************************************************************************** //
+/** Hailo dma buffer direction */
+typedef enum {
+ HAILO_DMA_BUFFER_DIRECTION_H2D = 0,
+ HAILO_DMA_BUFFER_DIRECTION_D2H = 1,
+ HAILO_DMA_BUFFER_DIRECTION_BOTH = 2,
+
+ /** Max enum value to maintain ABI Integrity */
+ HAILO_DMA_BUFFER_DIRECTION_MAX_ENUM = HAILO_MAX_ENUM
+} hailo_dma_buffer_direction_t;
+
+/** Hailo buffer flags */
typedef enum {
- HAILO_VDMA_BUFFER_DIRECTION_FLAGS_NONE = 0,
- HAILO_VDMA_BUFFER_DIRECTION_FLAGS_H2D = 1 << 0,
- HAILO_VDMA_BUFFER_DIRECTION_FLAGS_D2H = 1 << 1,
+ HAILO_BUFFER_FLAGS_NONE = 0, /*!< No flags - heap allocated buffer */
+ HAILO_BUFFER_FLAGS_DMA = 1 << 0, /*!< Buffer is mapped to DMA (will be page aligned implicitly) */
/** Max enum value to maintain ABI Integrity */
- HAILO_VDMA_BUFFER_DIRECTION_FLAGS_MAX_ENUM = HAILO_MAX_ENUM
-} hailo_vdma_buffer_direction_flags_t;
+ HAILO_BUFFER_FLAGS_MAX_ENUM = HAILO_MAX_ENUM
+} hailo_buffer_flags_t;
+
+/** Hailo buffer heap parameters */
+typedef struct {
+ EMPTY_STRUCT_PLACEHOLDER
+} hailo_buffer_heap_params_t;
+
+// Hailo buffer dma mapping parameters.
+// - If device is not NULL, the resulting buffer created by hailo_allocate_buffer will be mapped to the device.
+// - If vdevice is not NULL, the resulting buffer created by hailo_allocate_buffer will be mapped to all the
+// underlying devices held be vdevice.
+// - If both device and vdevice are null, the resulting buffer created by hailo_allocate_buffer will be lazily
+// mapped upon the first async transfer (i.e. when the buffer is passed to hailo_stream_read_raw_buffer_async
+// or hailo_stream_write_raw_buffer_async).
+typedef struct {
+ hailo_device device;
+ hailo_vdevice vdevice;
+ hailo_dma_buffer_direction_t direction;
+} hailo_buffer_dma_mapping_params_t;
+
+/** Hailo buffer parameters */
+typedef struct {
+ hailo_buffer_flags_t flags;
+ union {
+ hailo_buffer_heap_params_t heap_params;
+ hailo_buffer_dma_mapping_params_t dma_mapping_params;
+ };
+} hailo_buffer_parameters_t;
+// ************************************** NOTE - END ************************************** //
+// Dma buffer allocation isn't currently supported and is for internal use only //
+// **************************************************************************************** //
/** Input or output data transform parameters */
typedef struct {
char original_name[HAILO_MAX_STREAM_NAME_SIZE];
} hailo_nms_defuse_info_t;
+typedef enum {
+ HAILO_BURST_TYPE_NO_BURST = 0,
+ HAILO_BURST_TYPE_H8_PER_CLASS = 1,
+ HAILO_BURST_TYPE_H15_PER_CLASS = 2,
+ HAILO_BURST_TYPE_H15_PER_FRAME = 3
+} hailo_nms_burst_type_t;
+
/** NMS Internal HW Info */
typedef struct {
/** Amount of NMS classes */
uint32_t chunks_per_frame;
bool is_defused;
hailo_nms_defuse_info_t defuse_info;
+ /** Size of NMS burst in bytes */
+ uint32_t burst_size;
+ /** NMS burst type */
+ hailo_nms_burst_type_t burst_type;
} hailo_nms_info_t;
/** NMS Fuse Input */
} hailo_bbox_float32_t;
#pragma pack(pop)
+/**
+ * Completion info struct passed to the ::hailo_stream_write_async_callback_t after the async operation is
+ * done or has failed.
+ */
+typedef struct {
+ /**
+ * Status of the async transfer:
+ * - ::HAILO_SUCCESS - The transfer is complete.
+ * - ::HAILO_STREAM_ABORTED_BY_USER - The transfer was canceled (can happen after network deactivation).
+ * - Any other ::hailo_status on unexpected errors.
+ */
+ hailo_status status;
+
+ /** Address of the buffer passed to the async operation */
+ const void *buffer_addr;
+
+ /** Size of the buffer passed to the async operation. */
+ size_t buffer_size;
+
+ /** User specific data. Can be used as a context for the callback. */
+ void *opaque;
+} hailo_stream_write_async_completion_info_t;
+
+/**
+ * Async stream write complete callback prototype.
+ */
+typedef void (*hailo_stream_write_async_callback_t)(const hailo_stream_write_async_completion_info_t *info);
+
+/**
+ * Completion info struct passed to the ::hailo_stream_read_async_callback_t after the async operation is
+ * done or has failed.
+ */
typedef struct {
/**
- * - HAILO_SUCCESS when transfer is complete
- * - HAILO_STREAM_NOT_ACTIVATED due to stream deactivation
+ * Status of the async transfer:
+ * - ::HAILO_SUCCESS - The transfer is complete.
+ * - ::HAILO_STREAM_ABORTED_BY_USER - The transfer was canceled (can happen after network deactivation).
+ * - Any other ::hailo_status on unexpected errors.
*/
hailo_status status;
-} hailo_async_transfer_completion_info_t;
+
+ /** Address of the buffer passed to the async operation */
+ void *buffer_addr;
+
+ /** Size of the buffer passed to the async operation. */
+ size_t buffer_size;
+
+ /** User specific data. Can be used as a context for the callback. */
+ void *opaque;
+} hailo_stream_read_async_completion_info_t;
+
+/**
+ * Async stream read complete callback prototype.
+ */
+typedef void (*hailo_stream_read_async_callback_t)(const hailo_stream_read_async_completion_info_t *info);
/**
* Input or output stream information. In case of multiple inputs or outputs, each one has
HAILO_NOTIFICATION_ID_CONTEXT_SWITCH_BREAKPOINT_REACHED,
/** Matches hailo_notification_message_parameters_t::health_monitor_clock_changed_notification */
HAILO_NOTIFICATION_ID_HEALTH_MONITOR_CLOCK_CHANGED_EVENT,
+ /** Matches hailo_notification_message_parameters_t::hailo_hw_infer_manager_infer_done_notification */
+ HAILO_NOTIFICATION_ID_HW_INFER_MANAGER_INFER_DONE,
/** Must be last! */
HAILO_NOTIFICATION_ID_COUNT,
uint32_t current_clock;
} hailo_health_monitor_clock_changed_notification_message_t;
+typedef struct {
+ uint32_t infer_cycles;
+} hailo_hw_infer_manager_infer_done_notification_message_t;
+
/** Union of all notification messages parameters. See ::hailo_notification_t */
typedef union {
/** Ethernet rx error */
hailo_context_switch_breakpoint_reached_message_t context_switch_breakpoint_reached_notification;
/** Neural network core clock changed due to health monitor event */
hailo_health_monitor_clock_changed_notification_message_t health_monitor_clock_changed_notification;
+ /* HW infer manager finished infer notification */
+ hailo_hw_infer_manager_infer_done_notification_message_t hw_infer_manager_infer_done_notification;
} hailo_notification_message_parameters_t;
/** Notification data that will be passed to the callback passed in ::hailo_notification_callback */
* device scanned.
* @note ethernet devices are not considered "devices in the system", so they are not scanned in this function.
* use :hailo_scan_ethernet_devices for ethernet devices.
- *
+ *
* @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error.
*/
HAILORTAPI hailo_status hailo_scan_devices(hailo_scan_devices_params_t *params, hailo_device_id_t *device_ids,
/**
* Creates a device by the given device id.
- *
+ *
* @param[in] device_id Device id, can represent several device types:
* [-] for pcie devices - pcie bdf (XXXX:XX:XX.X or XX:XX.X)
* [-] for ethernet devices - ip address (xxx.xxx.xxx.xxx)
- * If NULL is given and there is only one available system device, use this device.
+ * If NULL is given, uses an arbitrary device found on the system.
* @param[out] device A pointer to a ::hailo_device that receives the allocated PCIe device.
* @return Upon success, returns Expected of a unique_ptr to Device object.
* Otherwise, returns Unexpected of ::hailo_status error.
- *
+ *
* @note To release a device, call the ::hailo_release_device function with the returned ::hailo_device.
*/
HAILORTAPI hailo_status hailo_create_device_by_id(const hailo_device_id_t *device_id, hailo_device *device);
/**
* Parse PCIe device BDF string into hailo device info structure.
- *
+ *
* @param[in] device_info_str BDF device info, format [\<domain\>].\<bus\>.\<device\>.\<func\>, same format as in lspci.
* @param[out] device_info A pointer to a ::hailo_pcie_device_info_t that receives the parsed device info.
* @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns an ::hailo_status error.
/**
* Creates a PCIe device.
- *
- * @param[in] device_info Information about the device to open. If NULL is given and there is only
- * one available PCIe device, use this device.
+ *
+ * @param[in] device_info Information about the device to open. If NULL is given, uses an arbitrary device found on
+ * the system.
* @param[out] device A pointer to a ::hailo_device that receives the allocated PCIe device.
* @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns an ::hailo_status error.
* @note To release a device, call the ::hailo_release_device function with the returned ::hailo_device.
/**
* Returns information on all available ethernet devices in the system.
- *
+ *
* @param[in] interface_name The name of the network interface to scan.
* @param[out] eth_device_infos A pointer to a buffer of ::hailo_eth_device_info_t that receives the
* information.
/**
* Creates an ethernet device.
- *
+ *
* @param[in] device_info Information about the device to open.
* @param[out] device A pointer to a ::hailo_device that receives the allocated ethernet device corresponding to
* the given information.
/**
* Release an open device.
- *
+ *
* @param[in] device A ::hailo_device object to be released.
* @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error.
*/
/**
* Returns the device type of the given device id string.
- *
+ *
* @param[in] device_id A :hailo_device_id_t device id to check.
* @param[out] device_type A :hailo_device_type_t returned device type.
* @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error.
HAILORTAPI hailo_status hailo_init_configure_params(hailo_hef hef, hailo_stream_interface_t stream_interface,
hailo_configure_params_t *params);
+/**
+ * Init configure params with default values for a given hef by virtual device.
+ *
+ * @param[in] hef A ::hailo_hef object to configure the @a device by.
+ * @param[in] vdevice A @a hailo_vdevice for which we init the params for.
+ * @param[out] params A @a hailo_configure_params_t to be filled.
+ * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error.
+ */
+HAILORTAPI hailo_status hailo_init_configure_params_by_vdevice(hailo_hef hef, hailo_vdevice vdevice,
+ hailo_configure_params_t *params);
+
+/**
+ * Init configure params with default values for a given hef by device.
+ *
+ * @param[in] hef A ::hailo_hef object to configure the @a device by.
+ * @param[in] device A @a hailo_device for which we init the params for.
+ * @param[out] params A @a hailo_configure_params_t to be filled.
+ * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error.
+ */
+HAILORTAPI hailo_status hailo_init_configure_params_by_device(hailo_hef hef, hailo_device device,
+ hailo_configure_params_t *params);
+
/**
* Init configure params with default values for a given hef, where all input_streams_params are init to be MIPI type.
*
/** @} */ // end of group_network_group_functions
+/** @defgroup group_buffer_functions Buffer functions
+ * @{
+ */
+// ************************************* NOTE - START ************************************* //
+// Dma buffer allocation isn't currently supported and is for internal use only //
+// **************************************************************************************** //
+// Free returned buffer via hailo_free_buffer
+HAILORTAPI hailo_status hailo_allocate_buffer(size_t size, const hailo_buffer_parameters_t *allocation_params, void **buffer_out);
+HAILORTAPI hailo_status hailo_free_buffer(void *buffer);
+// Maps buffer to dma. Free mapping by calling hailo_dma_unmap_buffer_from_device and then free buffer as needed
+// If buffer has already been mapped to device, then HAILO_DMA_MAPPING_ALREADY_EXISTS shall be returned
+HAILORTAPI hailo_status hailo_dma_map_buffer_to_device(void *buffer, size_t size, hailo_device device, hailo_dma_buffer_direction_t direction);
+HAILORTAPI hailo_status hailo_dma_unmap_buffer_from_device(void *buffer, hailo_device device, hailo_dma_buffer_direction_t direction);
+// ************************************** NOTE - END ************************************** //
+// Dma buffer allocation isn't currently supported and is for internal use only //
+// **************************************************************************************** //
+/** @} */ // end of group_buffer_functions
+
/** @defgroup group_stream_functions Stream functions
* @{
*/
/**
* Synchronously reads data from a stream.
- *
+ *
* @param[in] stream A ::hailo_output_stream object.
* @param[in] buffer A pointer to a buffer that receives the data read from @a stream.
* @param[in] size The amount of bytes to read, should be the frame size.
- *
+ *
* @note The output buffer format comes from the \e format field inside ::hailo_stream_info_t and the shape comes from
* the \e hw_shape field inside ::hailo_stream_info_t.
+ * @note @a size is expected to be stream_info.hw_frame_size.
*
- * @note @a size is expected to be a product of stream_info.hw_frame_size (i.e. more than one frame may be read)
- *
* @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error.
*/
HAILORTAPI hailo_status hailo_stream_read_raw_buffer(hailo_output_stream stream, void *buffer, size_t size);
/**
* Synchronously writes all data to a stream.
- *
+ *
* @param[in] stream A ::hailo_input_stream object.
* @param[in] buffer A pointer to a buffer that contains the data to be written to @a stream.
* @param[in] size The amount of bytes to write.
- *
+ *
* @note The input buffer format comes from the \e format field inside ::hailo_stream_info_t and the shape comes from
* the \e hw_shape field inside ::hailo_stream_info_t.
+ * @note @a size is expected to be stream_info.hw_frame_size.
*
- * @note @a size is expected to be a product of stream_info.hw_frame_size (i.e. more than one frame may be read)
- *
* @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error.
*/
HAILORTAPI hailo_status hailo_stream_write_raw_buffer(hailo_input_stream stream, const void *buffer, size_t size);
+/**
+ * Waits until the stream is ready to launch a new ::hailo_stream_read_raw_buffer_async operation. Each stream has a
+ * limited-size queue for ongoing transfers. You can retrieve the queue size for the given stream by calling
+ * ::hailo_output_stream_get_async_max_queue_size.
+ *
+ * @param[in] stream A ::hailo_output_stream object.
+ * @param[in] transfer_size Must be the result of ::hailo_get_output_stream_frame_size for the given stream.
+ * @param[in] timeout_ms Amount of time to wait until the stream is ready in milliseconds.
+ *
+ * @return Upon success, returns ::HAILO_SUCCESS. Otherwise:
+ * - If @a timeout_ms has passed and the stream is not ready, returns ::HAILO_TIMEOUT.
+ * - In any other error case, returns ::hailo_status error.
+ * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error.
+ */
+HAILORTAPI hailo_status hailo_stream_wait_for_async_output_ready(hailo_output_stream stream, size_t transfer_size,
+ uint32_t timeout_ms);
+
+/**
+ * Waits until the stream is ready to launch a new ::hailo_stream_write_raw_buffer_async operation. Each stream has a
+ * limited-size queue for ongoing transfers. You can retrieve the queue size for the given stream by calling
+ * ::hailo_input_stream_get_async_max_queue_size.
+ *
+ * @param[in] stream A ::hailo_input_stream object.
+ * @param[in] transfer_size Must be the result of ::hailo_get_input_stream_frame_size for the given stream.
+ * @param[in] timeout_ms Amount of time to wait until the stream is ready in milliseconds.
+ *
+ * @return Upon success, returns ::HAILO_SUCCESS. Otherwise:
+ * - If @a timeout_ms has passed and the stream is not ready, returns ::HAILO_TIMEOUT.
+ * - In any other error case, returns ::hailo_status error.
+ */
+HAILORTAPI hailo_status hailo_stream_wait_for_async_input_ready(hailo_input_stream stream, size_t transfer_size,
+ uint32_t timeout_ms);
+
+/**
+ * Returns the maximum amount of frames that can be simultaneously read from the stream (by
+ * ::hailo_stream_read_raw_buffer_async calls) before any one of the read operations is complete, as signified by
+ * @a user_callback being called.
+ *
+ * @param[in] stream A ::hailo_output_stream object.
+ * @param[out] queue_size Returns value of the queue
+ *
+ * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error.
+ */
+HAILORTAPI hailo_status hailo_output_stream_get_async_max_queue_size(hailo_output_stream stream, size_t *queue_size);
+
+/**
+ * Returns the maximum amount of frames that can be simultaneously written to the stream (by
+ * ::hailo_stream_write_raw_buffer_async calls) before any one of the write operations is complete, as signified by
+ * @a user_callback being called.
+ *
+ * @param[in] stream A ::hailo_input_stream object.
+ * @param[out] queue_size Returns value of the queue
+ *
+ * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error.
+ */
+HAILORTAPI hailo_status hailo_input_stream_get_async_max_queue_size(hailo_input_stream stream, size_t *queue_size);
+
+/**
+ * Reads into @a buffer from the stream asynchronously, initiating a deferred operation that will be completed
+ * later.
+ * - If the function call succeeds (i.e., ::hailo_stream_read_raw_buffer_async returns ::HAILO_SUCCESS), the deferred
+ * operation has been initiated. Until @a user_callback is called, the user cannot change or delete @a buffer.
+ * - If the function call fails (i.e., ::hailo_stream_read_raw_buffer_async returns a status other than
+ * ::HAILO_SUCCESS), the deferred operation will not be initiated and @a user_callback will not be invoked. The user
+ * is free to change or delete @a buffer.
+ * - @a user_callback is triggered upon successful completion or failure of the deferred operation.
+ * The callback receives a ::hailo_stream_read_async_completion_info_t object containing a pointer to the transferred
+ * buffer (@a buffer_addr) and the transfer status (@a status). If the operation has completed successfully, the
+ * contents of @a buffer will have been updated by the read operation.
+ *
+ * @param[in] stream A ::hailo_output_stream object.
+ * @param[in] buffer The buffer to be read into.
+ * The buffer must be aligned to the system page size.
+ * @param[in] size The size of the given buffer, expected to be the result of
+ * ::hailo_get_output_stream_frame_size.
+ * @param[in] user_callback The callback that will be called when the transfer is complete or has failed.
+ * @param[in] opaque Optional pointer to user-defined context (may be NULL if not desired).
+ *
+ * @return Upon success, returns ::HAILO_SUCCESS. Otherwise:
+ * - If the stream queue is full, returns ::HAILO_QUEUE_IS_FULL.
+ * In this case, please wait until @a user_callback is called on previous
+ * reads, or call ::hailo_stream_wait_for_async_output_ready. The size of the queue can be
+ * determined by calling ::hailo_output_stream_get_async_max_queue_size.
+ * - In any other error case, returns a ::hailo_status error.
+ *
+ * @note @a user_callback should execute as quickly as possible.
+ * @note The output buffer format comes from the \e format field inside ::hailo_stream_info_t and the shape comes from
+ * the \e hw_shape field inside ::hailo_stream_info_t.
+ * @note The address provided must be aligned to the system's page size, and the rest of the page should not be in
+ * use by any other part of the program to ensure proper functioning of the DMA operation. Memory for the
+ * provided address can be allocated using `mmap` on Unix-like systems or `VirtualAlloc` on Windows.
+ */
+HAILORTAPI hailo_status hailo_stream_read_raw_buffer_async(hailo_output_stream stream, void *buffer, size_t size,
+ hailo_stream_read_async_callback_t user_callback, void *opaque);
+
+/**
+ * Writes the contents of @a buffer to the stream asynchronously, initiating a deferred operation that will be
+ * completed later.
+ * - If the function call succeeds (i.e., ::hailo_stream_write_raw_buffer_async returns ::HAILO_SUCCESS), the deferred
+ * operation has been initiated. Until @a user_callback is called, the user cannot change or delete @a buffer.
+ * - If the function call fails (i.e., ::hailo_stream_write_raw_buffer_async returns a status other than
+ * ::HAILO_SUCCESS), the deferred operation will not be initiated and @a user_callback will not be invoked. The user
+ * is free to change or delete @a buffer.
+ * - @a user_callback is triggered upon successful completion or failure of the deferred operation. The callback
+ * receives a ::hailo_stream_write_async_completion_info_t object containing a pointer to the transferred buffer
+ * (@a buffer_addr) and the transfer status (@a status).
+ *
+ * @param[in] stream A ::hailo_input_stream object.
+ * @param[in] buffer The buffer to be written.
+ * The buffer must be aligned to the system page size.
+ * @param[in] size The size of the given buffer, expected to be the result of
+ * ::hailo_get_input_stream_frame_size.
+ * @param[in] user_callback The callback that will be called when the transfer is complete
+ * or has failed.
+ * @param[in] opaque Optional pointer to user-defined context (may be NULL if not desired).
+ *
+ * @return Upon success, returns ::HAILO_SUCCESS. Otherwise:
+ * - If the stream queue is full, returns ::HAILO_QUEUE_IS_FULL. In this case please wait
+ * until @a user_callback is called on previous writes, or call ::hailo_stream_wait_for_async_input_ready.
+ * The size of the queue can be determined by calling ::hailo_input_stream_get_async_max_queue_size.
+ * - In any other error case, returns a ::hailo_status error.
+ *
+ * @note @a user_callback should run as quickly as possible.
+ * @note The input buffer format comes from the \e format field inside ::hailo_stream_info_t and the shape comes from
+ * the \e hw_shape field inside ::hailo_stream_info_t.
+ * @note The address provided must be aligned to the system's page size, and the rest of the page should not be in
+ * use by any other part of the program to ensure proper functioning of the DMA operation. Memory for the
+ * provided address can be allocated using `mmap` on Unix-like systems or `VirtualAlloc` on Windows.
+ */
+HAILORTAPI hailo_status hailo_stream_write_raw_buffer_async(hailo_input_stream stream, const void *buffer, size_t size,
+ hailo_stream_write_async_callback_t user_callback, void *opaque);
+
/**
* Gets the size of a stream's frame on the host side in bytes
* (the size could be affected by the format type - for example using UINT16, or by the data not being quantized yet)
HAILORTAPI hailo_status hailo_release_output_demuxer(hailo_output_demuxer demuxer);
/**
- * Demultiplexing an output frame pointed to by @a src directly to the buffer pointed to by @a dst.
+ * Demultiplexing an output frame pointed to by @a src directly to the buffers pointed to by @a raw_buffers.
*
* @param[in] demuxer A ::hailo_output_demuxer object used for the demuxing.
* @param[in] src A pointer to a buffer to be demultiplexed.
* demultiplexed data read from the @a stream.
* @param[in] raw_buffers_count The number of ::hailo_stream_raw_buffer_t elements in the array pointed to by
* @a raw_buffers.
+ * @note The order of @a raw_buffers should be the same as returned from the function 'hailo_get_mux_infos_by_output_demuxer()'.
* @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error.
*/
HAILORTAPI hailo_status hailo_demux_raw_frame_by_output_demuxer(hailo_output_demuxer demuxer, const void *src,
size_t src_size, hailo_stream_raw_buffer_t *raw_buffers, size_t raw_buffers_count);
+/**
+ * Demultiplexing an output frame pointed to by @a src directly to the buffers pointed to by @a raw_buffers_by_name.
+ *
+ * @param[in] demuxer A ::hailo_output_demuxer object used for the demuxing.
+ * @param[in] src A pointer to a buffer to be demultiplexed.
+ * @param[in] src_size The number of bytes to demultiplexed. This number must be equal to the
+ * hw_frame_size, and less than or equal to the size of @a src buffer.
+ * @param[in,out] raw_buffers_by_name A pointer to an array of ::hailo_stream_raw_buffer_by_name_t that receives the
+ * demultiplexed data read from the @a stream. hailo_stream_raw_buffer_by_name_t::name should
+ * be filled with the demuxes names.
+ * @param[in] raw_buffers_count The number of ::hailo_stream_raw_buffer_by_name_t elements in the array pointed to by
+ * @a raw_buffers_by_name.
+ * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error.
+ */
+HAILORTAPI hailo_status hailo_demux_by_name_raw_frame_by_output_demuxer(hailo_output_demuxer demuxer, const void *src,
+ size_t src_size, hailo_stream_raw_buffer_by_name_t *raw_buffers_by_name, size_t raw_buffers_count);
+
/**
* Gets all multiplexed stream infos.
*
#include "hailo/runtime_statistics.hpp"
#include "hailo/network_rate_calculator.hpp"
#include "hailo/quantization.hpp"
-#include "hailo/dma_mapped_buffer.hpp"
#include "hailo/hailort_defaults.hpp"
#endif /* _HAILORT_HPP_ */
#include <vector>
+/** hailort namespace */
namespace hailort
{
static const uint32_t BBOX_PARAMS = sizeof(hailo_bbox_t) / sizeof(uint16_t);
static const uint32_t MAX_DEFUSED_LAYER_COUNT = 9;
static const size_t HW_DATA_ALIGNMENT = 8;
- static const uint64_t NMS_DELIMITER = 0xFFFFFFFFFFFFFFFF;
- static const uint64_t NMS_DUMMY_DELIMITER = 0xFFFFFFFFFFFFFFFE;
static const uint32_t MUX_INFO_COUNT = 32;
static const uint32_t MAX_MUX_PREDECESSORS = 4;
static const uint16_t ETH_INPUT_BASE_PORT = 32401;
static constexpr uint32_t get_nms_hw_frame_size(const hailo_nms_info_t &nms_info)
{
const uint32_t size_per_class = static_cast<uint32_t>(sizeof(nms_bbox_counter_t)) +
- nms_info.bbox_size * nms_info.max_bboxes_per_class;
+ nms_info.bbox_size * std::max(nms_info.burst_size, nms_info.max_bboxes_per_class);
const uint32_t size_per_chunk = nms_info.number_of_classes * size_per_class;
- // 1 delimiter for an entire frame (since we are reading delimiters directly into the buffer and replacing them)
- return nms_info.bbox_size + (nms_info.chunks_per_frame * size_per_chunk);
+ // Extra Burst size for frame (since may be reading bursts directly into the buffer and replacing them)
+ const uint32_t size_for_extra_burst = nms_info.bbox_size * nms_info.burst_size;
+ return (nms_info.chunks_per_frame * size_per_chunk) + size_for_extra_burst;
}
/**
return a;
}
+inline constexpr hailo_format_flags_t operator&(hailo_format_flags_t a, hailo_format_flags_t b)
+{
+ return static_cast<hailo_format_flags_t>(static_cast<int>(a) & static_cast<int>(b));
+}
+
+inline constexpr hailo_format_flags_t& operator&=(hailo_format_flags_t &a, hailo_format_flags_t b)
+{
+ a = a & b;
+ return a;
+}
+
+inline constexpr hailo_format_flags_t operator~(hailo_format_flags_t a)
+{
+ return static_cast<hailo_format_flags_t>(~(static_cast<int>(a)));
+}
+
inline constexpr hailo_vstream_stats_flags_t operator|(hailo_vstream_stats_flags_t a, hailo_vstream_stats_flags_t b)
{
return static_cast<hailo_vstream_stats_flags_t>(static_cast<int>(a) | static_cast<int>(b));
#include "hailo/expected.hpp"
#include "hailo/network_group.hpp"
-
+/** hailort namespace */
namespace hailort
{
#include <memory>
#include <map>
-
+/** hailort namespace */
namespace hailort
{
*/
std::string hash() const;
- Expected<std::string> get_hef_description(bool stream_infos, bool vstream_infos);
+ Expected<std::string> get_description(bool stream_infos, bool vstream_infos);
~Hef();
Hef(Hef &&);
#include "hailo/vstream.hpp"
+/** hailort namespace */
namespace hailort
{
#include <map>
#include <unordered_map>
-
+/** hailort namespace */
namespace hailort
{
struct LatencyMeasurementResult {
std::chrono::nanoseconds avg_hw_latency;
};
+
+struct HwInferResults {
+ uint16_t batch_count;
+ size_t total_transfer_size;
+ size_t total_frames_passed;
+ float32_t time_sec;
+ float32_t fps;
+ float32_t BW_Gbps;
+};
/*@}*/
using src_context_t = uint8_t;
virtual const std::string &get_network_group_name() const = 0;
virtual Expected<Buffer> get_intermediate_buffer(const IntermediateBufferKey &key) = 0;
-
+ // TODO HRT-10799: remove when enable batch switch flow for hailo15
virtual hailo_status set_keep_nn_config_during_reset(const bool keep_nn_config_during_reset) = 0;
/**
virtual Expected<std::vector<InputVStream>> create_input_vstreams(const std::map<std::string, hailo_vstream_params_t> &inputs_params) = 0;
virtual Expected<std::vector<OutputVStream>> create_output_vstreams(const std::map<std::string, hailo_vstream_params_t> &outputs_params) = 0;
+ virtual Expected<HwInferResults> run_hw_infer_estimator() = 0;
+
virtual hailo_status before_fork() { return HAILO_SUCCESS; }
virtual hailo_status after_fork_in_parent() { return HAILO_SUCCESS; }
virtual hailo_status after_fork_in_child() { return HAILO_SUCCESS; }
+ virtual Expected<std::vector<std::string>> get_sorted_output_names() = 0;
+ virtual Expected<std::vector<std::string>> get_stream_names_from_vstream_name(const std::string &vstream_name) = 0;
+ virtual Expected<std::vector<std::string>> get_vstream_names_from_stream_name(const std::string &stream_name) = 0;
+
+ static Expected<std::shared_ptr<ConfiguredNetworkGroup>> duplicate_network_group_client(uint32_t handle, const std::string &network_group_name);
+ virtual Expected<uint32_t> get_client_handle() const;
protected:
ConfiguredNetworkGroup() = default;
#include <vector>
+/** hailort namespace */
namespace hailort
{
Expected<std::map<uint16_t, uint32_t>> get_udp_ports_rates_dict(
std::vector<std::reference_wrapper<InputStream>> &udp_input_streams,
uint32_t fps, uint32_t max_supported_bandwidth = HAILO_DEFAULT_MAX_ETHERNET_BANDWIDTH_BYTES_PER_SEC);
+
+ // Undocumented, exported here for pyhailort usage
+ static hailo_status set_rate_limit(const std::string &ip, uint16_t port, uint32_t rate_bytes_per_sec);
+ static hailo_status reset_rate_limit(const std::string &ip, uint16_t port);
+ static Expected<std::string> get_interface_name(const std::string &ip);
};
} /* namespace hailort */
#include <math.h>
#include <fenv.h>
+#ifdef _MSC_VER
+#include <immintrin.h>
+#endif
+/** hailort namespace */
namespace hailort
{
+inline float bankers_round(float x)
+{
+#ifdef _MSC_VER
+ // These instructions are intrinsics that the Microsoft C/C++ compiler supports when x86 is targeted
+ __m128 xmm = _mm_set_ss(x);
+ xmm = _mm_round_ss(xmm, xmm, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+ return _mm_cvtss_f32(xmm);
+#else
+ return rintf(x);
+#endif
+}
+
class RoundingToNearestGuard final
{
public:
dst_ptr[i] = (T)(src_ptr[i]);
}
} else {
- auto rounding_tonearest_guard = RoundingToNearestGuard();
for (uint32_t i = 0; i < buffer_elements_count; i++) {
dst_ptr[i] = dequantize_output<T, Q>(src_ptr[i], quant_info);
}
/**
* De-quantize in place the output buffer pointed by @a dst_ptr from data type @a Q to data type @a T.
- *
+ *
* @param[inout] dst_ptr A pointer to the buffer to be de-quantized.
* @param[in] buffer_elements_count The number of elements in @a dst_ptr array.
* @param[in] quant_info Quantization info.
template <typename T, typename Q>
static void dequantize_output_buffer_in_place(T *dst_ptr, uint32_t buffer_elements_count, hailo_quant_info_t quant_info)
{
- if (is_identity_qp(quant_info)) {
+ dequantize_output_buffer_in_place<T, Q>(dst_ptr, 0, buffer_elements_count, quant_info.qp_zp, quant_info.qp_scale);
+ }
+
+ /**
+ * De-quantize in place the output buffer pointed by @a dst_ptr starting from @a offset from data type @a Q to data type @a T.
+ *
+ * @param[inout] dst_ptr A pointer to the buffer to be de-quantized.
+ * @param[in] offset The offset in @a dst_ptr array to start from.
+ * @param[in] buffer_elements_count The number of elements in @a dst_ptr array.
+ * @param[in] qp_zp Quantization zero point.
+ * @param[in] qp_scale Quantization scale.
+ */
+ template <typename T, typename Q>
+ static void dequantize_output_buffer_in_place(T *dst_ptr, uint32_t offset, uint32_t buffer_elements_count, float32_t qp_zp, float32_t qp_scale)
+ {
+ if (is_identity_qp(qp_zp, qp_scale)) {
for (int32_t i = (int32_t)buffer_elements_count - 1; i >= 0; i--) {
- dst_ptr[i] = (T)(*((Q*)dst_ptr + i));
+ dst_ptr[offset + i] = (T)(*((Q*)dst_ptr + offset + i));
}
} else {
- auto rounding_tonearest_guard = RoundingToNearestGuard();
for (int32_t i = (int32_t)buffer_elements_count - 1; i >= 0; i--) {
- dst_ptr[i] = dequantize_output<T, Q>(*((Q*)dst_ptr + i), quant_info);
+ dst_ptr[offset + i] = dequantize_output<T, Q>(*((Q*)dst_ptr + offset + i), qp_zp, qp_scale);
}
}
}
auto rounding_tonearest_guard = RoundingToNearestGuard();
if (is_identity_qp(quant_info)) {
for (uint32_t i = 0; i < buffer_elements_count; i++) {
- dst_ptr[i] = (Q)rintf(src_ptr[i]);
+ dst_ptr[i] = (Q)bankers_round(src_ptr[i]);
}
} else {
for (uint32_t i = 0; i < buffer_elements_count; i++) {
*/
static inline bool is_identity_qp(const hailo_quant_info_t &quant_info)
{
- return ((1 == quant_info.qp_scale) && (0 == quant_info.qp_zp));
+ return is_identity_qp(quant_info.qp_zp, quant_info.qp_scale);
+ }
+
+ /**
+ * Indicates whether the @a qp_zp and @a qp_scale is the identity scale.
+ * If true there is no need to fix the data's scale.
+ */
+ static inline bool is_identity_qp(float32_t qp_zp, float32_t qp_scale)
+ {
+ return ((1 == qp_scale) && (0 == qp_zp));
}
/**
template <typename T, typename Q>
static inline T dequantize_output(Q number, hailo_quant_info_t quant_info)
{
- return (T)((number - quant_info.qp_zp) * quant_info.qp_scale);
+ return dequantize_output<T, Q>(number, quant_info.qp_zp, quant_info.qp_scale);
+ }
+
+ /**
+ * De-quantize @a number from data type @a Q to data type @a T and fix it's scale according to @a qp_zp and @a qp_scale.
+ *
+ * @param[in] number The value to be de-quantized.
+ * @param[in] qp_zp Quantization zero point.
+ * @param[in] qp_scale Quantization scale.
+ *
+ * @return Returns the dequantized value of @a number.
+ *
+ */
+ template <typename T, typename Q>
+ static inline T dequantize_output(Q number, float32_t qp_zp, float32_t qp_scale)
+ {
+ return (T)((number - qp_zp) * qp_scale);
}
static inline float32_t clip(float32_t n, float32_t limval_min, float32_t limval_max)
static inline Q quantize_input(T number, hailo_quant_info_t quant_info)
{
float32_t clipped_number = clip((float32_t)number, quant_info.limvals_min, quant_info.limvals_max);
- return (Q)rintf((clipped_number / quant_info.qp_scale) + quant_info.qp_zp);
+ return (Q)bankers_round((clipped_number / quant_info.qp_scale) + quant_info.qp_zp);
}
};
#include <type_traits>
#include <memory>
-
+/** hailort namespace */
namespace hailort
{
#include <functional>
+/** hailort namespace */
namespace hailort
{
// Forward declaration
struct LayerInfo;
-class DmaMappedBuffer;
-
-using TransferDoneCallback = std::function<void(std::shared_ptr<DmaMappedBuffer> buffer,
- const hailo_async_transfer_completion_info_t &status,
- void *opaque)>;
/*! Input (host to device) stream representation */
InputStream(const InputStream&) = delete;
InputStream& operator=(const InputStream&) = delete;
+ /** Context passed to the \ref TransferDoneCallback after the async operation is done or has failed. */
+ struct CompletionInfo
+ {
+ /**
+ * Status of the async transfer.
+ * - ::HAILO_SUCCESS - When transfer is complete successfully.
+ * - ::HAILO_STREAM_ABORTED_BY_USER - The transfer was canceled (can happen after network deactivation).
+ * - Any other ::hailo_status on unexpected errors.
+ */
+ hailo_status status;
+
+ const void *buffer_addr; /* Points to the transferred buffer. */
+ size_t buffer_size; /* Size of the transferred buffer. */
+ };
+
+ /** Async transfer complete callback prototype. */
+ using TransferDoneCallback = std::function<void(const CompletionInfo &completion_info)>;
+
/**
* Set new timeout value to the input stream
*
/**
* Aborting the stream.
- *
+ *
* @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error.
*/
virtual hailo_status abort() = 0;
/**
* Clearing the aborted state of the stream.
- *
+ *
* @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error.
*/
virtual hailo_status clear_abort() = 0;
/**
* Writes all pending data to the underlying stream.
- *
+ *
* @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error.
*/
virtual hailo_status flush();
virtual bool is_scheduled() = 0;
/**
- * Writes the entire buffer to the stream without transformations
+ * Writes the entire buffer to the stream without transformations.
*
* @param[in] buffer The buffer to be written.
* @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns an ::hailo_status error.
+ *
* @note @a buffer is expected to be in the format dictated by this.stream_info.format
- * @note @a size is expected to be a product of this.stream_info.hw_frame_size (i.e. more than one frame may be written)
+ * @note @a buffer.size() is expected to be get_frame_size().
*/
virtual hailo_status write(const MemoryView &buffer);
- // ******************************************** NOTE ******************************************** //
- // Async Stream API and DmaMappedBuffer are currently not supported and are for internal use only //
- // ********************************************************************************************** //
- virtual hailo_status wait_for_ready(size_t transfer_size, std::chrono::milliseconds timeout); // Internal use only
- virtual hailo_status write_async(std::shared_ptr<DmaMappedBuffer> buffer, const TransferDoneCallback &user_callback,
- void *opaque = nullptr); // Internal use only
+ /**
+ * Writes the entire buffer to the stream without transformations.
+ *
+ * @param[in] buffer The buffer to be written.
+ * @param[in] size The size of the buffer given.
+ * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns an ::hailo_status error.
+ *
+ * @note @a buffer is expected to be in the format dictated by this.stream_info.format
+ * @note @a size is expected to be get_frame_size().
+ */
+ virtual hailo_status write(const void *buffer, size_t size);
+
+ /**
+ * Waits until the stream is ready to launch a new write_async() operation. Each stream contains some limited sized
+ * queue for ongoing transfers. Calling get_async_max_queue_size() will return the queue size for current stream.
+ *
+ * @param[in] transfer_size Must be get_frame_size().
+ * @param[in] timeout Amount of time to wait until the stream is ready in milliseconds.
+ *
+ * @return Upon success, returns ::HAILO_SUCCESS. Otherwise:
+ * - If @a timeout has passed and the stream is not ready, returns ::HAILO_TIMEOUT.
+ * - In any other error case, returns ::hailo_status error.
+ */
+ virtual hailo_status wait_for_async_ready(size_t transfer_size, std::chrono::milliseconds timeout);
+
+ /**
+ * Returns the maximum amount of frames that can be simultaneously written to the stream (by write_async() calls)
+ * before any one of the write operations is complete, as signified by @a user_callback being called.
+ *
+ * @return Upon success, returns Expected of a the queue size.
+ * Otherwise, returns Unexpected of ::hailo_status error.
+ */
+ virtual Expected<size_t> get_async_max_queue_size() const;
+
+ /**
+ * Writes the contents of @a buffer to the stream asynchronously, initiating a deferred operation that will be
+ * completed later.
+ * - If the function call succeeds (i.e., write_async() returns ::HAILO_SUCCESS), the deferred operation has been
+ * initiated. Until @a user_callback is called, the user cannot change or delete @a buffer.
+ * - If the function call fails (i.e., write_async() returns a status other than ::HAILO_SUCCESS), the deferred
+ * operation will not be initiated and @a user_callback will not be invoked. The user is free to change or delete
+ * @a buffer.
+ * - @a user_callback is triggered upon successful completion or failure of the deferred operation. The callback
+ * receives a \ref CompletionInfo object containing a pointer to the transferred buffer (@a buffer_addr) and the
+ * transfer status (@a status).
+ *
+ * @param[in] buffer The buffer to be written.
+ * The buffer must be aligned to the system page size.
+ * @param[in] user_callback The callback that will be called when the transfer is complete
+ * or has failed.
+ *
+ * @return Upon success, returns ::HAILO_SUCCESS. Otherwise:
+ * - If the stream queue is full, returns ::HAILO_QUEUE_IS_FULL. In this case please wait
+ * until @a user_callback is called on previous writes, or call wait_for_async_ready().
+ * The size of the queue can be determined by calling get_async_max_queue_size().
+ * - In any other error case, returns a ::hailo_status error.
+ *
+ * @note @a user_callback should run as quickly as possible.
+ * @note The buffer's format comes from the @a format field inside get_info() and the shape comes from
+ * the @a hw_shape field inside get_info().
+ * @note The address provided must be aligned to the system's page size, and the rest of the page should not be in
+ * use by any other part of the program to ensure proper functioning of the DMA operation. Memory for the
+ * provided address can be allocated using `mmap` on Unix-like systems or `VirtualAlloc` on Windows.
+ */
+ virtual hailo_status write_async(const MemoryView &buffer, const TransferDoneCallback &user_callback) = 0;
+
+ /**
+ * Writes the contents of @a buffer to the stream asynchronously, initiating a deferred operation that will be
+ * completed later.
+ * - If the function call succeeds (i.e., write_async() returns ::HAILO_SUCCESS), the deferred operation has been
+ * initiated. Until @a user_callback is called, the user cannot change or delete @a buffer.
+ * - If the function call fails (i.e., write_async() returns a status other than ::HAILO_SUCCESS), the deferred
+ * operation will not be initiated and @a user_callback will not be invoked. The user is free to change or delete
+ * @a buffer.
+ * - @a user_callback is triggered upon successful completion or failure of the deferred operation. The callback
+ * receives a \ref CompletionInfo object containing a pointer to the transferred buffer (@a buffer_addr) and the
+ * transfer status (@a status).
+ *
+ * @param[in] buffer The buffer to be written.
+ * The buffer must be aligned to the system page size.
+ * @param[in] size The size of the given buffer, expected to be get_frame_size().
+ * @param[in] user_callback The callback that will be called when the transfer is complete
+ * or has failed.
+ *
+ * @return Upon success, returns ::HAILO_SUCCESS. Otherwise:
+ * - If the stream queue is full, returns ::HAILO_QUEUE_IS_FULL. In this case please wait
+ * until @a user_callback is called on previous writes, or call wait_for_async_ready().
+ * The size of the queue can be determined by calling get_async_max_queue_size().
+ * - In any other error case, returns a ::hailo_status error.
+ *
+ * @note @a user_callback should run as quickly as possible.
+ * @note The buffer's format comes from the @a format field inside get_info() and the shape comes from
+ * the @a hw_shape field inside get_info().
+ * @note The address provided must be aligned to the system's page size, and the rest of the page should not be in
+ * use by any other part of the program to ensure proper functioning of the DMA operation. Memory for the
+ * provided address can be allocated using `mmap` on Unix-like systems or `VirtualAlloc` on Windows.
+ */
+ virtual hailo_status write_async(const void *buffer, size_t size, const TransferDoneCallback &user_callback) = 0;
+
+ // The usage of BufferPtr for async API isn't currently supported and is for internal use only.
+ virtual hailo_status write_async(BufferPtr buffer, const TransferDoneCallback &user_callback) = 0;
/**
* @returns A ::hailo_stream_info_t object containing the stream's info.
// get_network_group_activated_event is same as this function
virtual EventPtr &get_core_op_activated_event() = 0;
+
protected:
InputStream() = default;
InputStream(InputStream &&) = delete;
- // Note: Implement sync_write_all_raw_buffer_no_transform_impl for the actual stream interaction in sub classes
- virtual hailo_status sync_write_all_raw_buffer_no_transform_impl(void *buffer, size_t offset, size_t size) = 0;
+ // Note: Implement write_impl for the actual stream interaction in sub classes
+ virtual hailo_status write_impl(const MemoryView &buffer) = 0;
virtual hailo_status activate_stream(uint16_t dynamic_batch_size, bool resume_pending_stream_transfers) = 0;
virtual hailo_status deactivate_stream() = 0;
- virtual Expected<size_t> sync_write_raw_buffer(const MemoryView &buffer) = 0;
-
hailo_stream_info_t m_stream_info;
uint8_t m_dataflow_manager_id;
OutputStream(const OutputStream&) = delete;
OutputStream& operator=(const OutputStream&) = delete;
+ /** Context passed to the \ref TransferDoneCallback after the async operation is done or has failed. */
+ struct CompletionInfo
+ {
+ /**
+ * Status of the async transfer.
+ * - ::HAILO_SUCCESS - When transfer is complete successfully.
+ * - ::HAILO_STREAM_ABORTED_BY_USER - The transfer was canceled (can happen after network deactivation).
+ * - Any other ::hailo_status on unexpected errors.
+ */
+ hailo_status status;
+
+ void *buffer_addr; /* Points to the transferred buffer. */
+ size_t buffer_size; /* Size of the transferred buffer. */
+ };
+
+ /** Async transfer complete callback prototype. */
+ using TransferDoneCallback = std::function<void(const CompletionInfo &completion_info)>;
+
/**
* Set new timeout value to the output stream
*
* @return returns the output stream's timeout in milliseconds.
*/
virtual std::chrono::milliseconds get_timeout() const = 0;
-
+
/**
* @return returns the output stream's interface.
*/
/**
* Aborting the stream.
- *
+ *
* @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error.
*/
virtual hailo_status abort() = 0;
/**
* Clearing the abort flag of the stream.
- *
+ *
* @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error.
*/
virtual hailo_status clear_abort() = 0;
/**
* Reads the entire buffer from the stream without transformations
*
- * @param[out] buffer A pointer to a buffer that receives the data read from the stream.
+ * @param[in] buffer A buffer that receives the data read from the stream.
* @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns an ::hailo_status error.
- * @note Upon return, @a buffer is expected to be in the format dictated by this.stream_info.format
- * @note @a size is expected to be a product of this.stream_info.hw_frame_size (i.e. more than one frame may be read)
+ * @note Upon return, @a buffer is expected to be in the format dictated by this.get_info().format
+ * @note @a size is expected to be get_frame_size().
*/
virtual hailo_status read(MemoryView buffer);
- // ******************************************** NOTE ******************************************** //
- // Async Stream API and DmaMappedBuffer are currently not supported and are for internal use only //
- // ********************************************************************************************** //
- virtual hailo_status wait_for_ready(size_t transfer_size, std::chrono::milliseconds timeout); // Internal use only
- virtual hailo_status read_async(std::shared_ptr<DmaMappedBuffer> buffer, const TransferDoneCallback &user_callback,
- void *opaque = nullptr); // Internal use only
+ /**
+ * Reads the entire buffer from the stream without transformations
+ *
+ * @param[in] buffer A pointer to a buffer that receives the data read from the stream.
+ * @param[in] size The size of the given buffer.
+ *
+ * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns an ::hailo_status error.
+ *
+ * @note Upon return, @a buffer is expected to be in the format dictated by this.get_info().format
+ * @note @a size is expected to be get_frame_size().
+ */
+ virtual hailo_status read(void *buffer, size_t size);
+
+ /**
+ * Waits until the stream is ready to launch a new read_async() operation. Each stream contains some limited sized
+ * queue for ongoing transfers. Calling get_async_max_queue_size() will return the queue size for current stream.
+ *
+ * @param[in] transfer_size Must be get_frame_size().
+ * @param[in] timeout Amount of time to wait until the stream is ready in milliseconds.
+ *
+ * @return Upon success, returns ::HAILO_SUCCESS. Otherwise:
+ * - If @a timeout has passed and the stream is not ready, returns ::HAILO_TIMEOUT.
+ * - In any other error case, returns ::hailo_status error.
+ */
+ virtual hailo_status wait_for_async_ready(size_t transfer_size, std::chrono::milliseconds timeout);
+
+ /**
+ * Returns the maximum amount of frames that can be simultaneously read from the stream (by read_async() calls)
+ * before any one of the read operations is complete, as signified by @a user_callback being called.
+ *
+ * @return Upon success, returns Expected of a the queue size.
+ * Otherwise, returns Unexpected of ::hailo_status error.
+ */
+ virtual Expected<size_t> get_async_max_queue_size() const;
+
+ /**
+ * Reads into @a buffer from the stream asynchronously, initiating a deferred operation that will be completed
+ * later.
+ * - If the function call succeeds (i.e., read_async() returns ::HAILO_SUCCESS), the deferred operation has been
+ * initiated. Until @a user_callback is called, the user cannot change or delete @a buffer.
+ * - If the function call fails (i.e., read_async() returns a status other than ::HAILO_SUCCESS), the deferred
+ * operation will not be initiated and @a user_callback will not be invoked. The user is free to change or
+ * delete @a buffer.
+ * - @a user_callback is triggered upon successful completion or failure of the deferred operation.
+ * The callback receives a \ref CompletionInfo object containing a pointer to the transferred buffer
+ * (@a buffer_addr) and the transfer status (@a status). If the operation has completed successfully, the contents
+ * of @a buffer will have been updated by the read operation.
+ *
+ * @param[in] buffer The buffer to be read into.
+ * The buffer must be aligned to the system page size.
+ * @param[in] user_callback The callback that will be called when the transfer is complete or has failed.
+ *
+ * @return Upon success, returns ::HAILO_SUCCESS. Otherwise:
+ * - If the stream queue is full, returns ::HAILO_QUEUE_IS_FULL.
+ * In this case, please wait until @a user_callback is called on previous
+ * reads, or call wait_for_async_ready(). The size of the queue can be
+ * determined by calling get_async_max_queue_size().
+ * - In any other error case, returns a ::hailo_status error.
+ * @note @a user_callback should execute as quickly as possible.
+ * @note The buffer's format is determined by the @a format field inside get_info(),
+ * and the shape is determined by the @a hw_shape field inside get_info().
+ * @note The address provided must be aligned to the system's page size, and the rest of the page should not be in
+ * use by any other part of the program to ensure proper functioning of the DMA operation. Memory for the
+ * provided address can be allocated using `mmap` on Unix-like systems or `VirtualAlloc` on Windows.
+ */
+ virtual hailo_status read_async(MemoryView buffer, const TransferDoneCallback &user_callback) = 0;
+
+ /**
+ * Reads into @a buffer from the stream asynchronously, initiating a deferred operation that will be completed
+ * later.
+ * - If the function call succeeds (i.e., read_async() returns ::HAILO_SUCCESS), the deferred operation has been
+ * initiated. Until @a user_callback is called, the user cannot change or delete @a buffer.
+ * - If the function call fails (i.e., read_async() returns a status other than ::HAILO_SUCCESS), the deferred
+ * operation will not be initiated and @a user_callback will not be invoked. The user is free to change or
+ * delete @a buffer.
+ * - @a user_callback is triggered upon successful completion or failure of the deferred operation.
+ * The callback receives a \ref CompletionInfo object containing a pointer to the transferred buffer
+ * (@a buffer_addr) and the transfer status (@a status). If the operation has completed successfully, the contents
+ * of @a buffer will have been updated by the read operation.
+ *
+ * @param[in] buffer The buffer to be read into.
+ * The buffer must be aligned to the system page size.
+ * @param[in] size The size of the given buffer, expected to be get_frame_size().
+ * @param[in] user_callback The callback that will be called when the transfer is complete or has failed.
+ *
+ * @return Upon success, returns ::HAILO_SUCCESS. Otherwise:
+ * - If the stream queue is full, returns ::HAILO_QUEUE_IS_FULL.
+ * In this case, please wait until @a user_callback is called on previous
+ * reads, or call wait_for_async_ready(). The size of the queue can be
+ * determined by calling get_async_max_queue_size().
+ * - In any other error case, returns a ::hailo_status error.
+ * @note @a user_callback should execute as quickly as possible.
+ * @note The buffer's format is determined by the @a format field inside get_info(),
+ * and the shape is determined by the @a hw_shape field inside get_info()
+ * @note The address provided must be aligned to the system's page size, and the rest of the page should not be in
+ * use by any other part of the program to ensure proper functioning of the DMA operation. Memory for the
+ * provided address can be allocated using `mmap` on Unix-like systems or `VirtualAlloc` on Windows.
+ */
+ virtual hailo_status read_async(void *buffer, size_t size, const TransferDoneCallback &user_callback) = 0;
+
+ // The usage of BufferPtr for async API isn't currently supported and is for internal use only.
+ virtual hailo_status read_async(BufferPtr buffer, const TransferDoneCallback &user_callback) = 0;
// get_network_group_activated_event is same as this function
virtual EventPtr &get_core_op_activated_event() = 0;
virtual hailo_status activate_stream(uint16_t dynamic_batch_size, bool resume_pending_stream_transfers) = 0;
virtual hailo_status deactivate_stream() = 0;
- virtual hailo_status read_all(MemoryView &buffer) = 0;
-
- virtual Expected<size_t> sync_read_raw_buffer(MemoryView &buffer) = 0;
+ virtual hailo_status read_impl(MemoryView &buffer) = 0;
hailo_stream_info_t m_stream_info;
uint8_t m_dataflow_manager_id;
std::atomic<uint32_t> m_invalid_frames_count;
+protected:
+ hailo_status read_nms(void *buffer, size_t offset, size_t size);
+
private:
virtual const LayerInfo& get_layer_info() = 0;
- hailo_status read_nms(void *buffer, size_t offset, size_t size);
void increase_invalid_frames_count(uint32_t value);
friend class HefConfigurator;
friend class HwReadElement;
friend class OutputDemuxer;
friend class CoreOp;
+ friend class NMSStreamReader;
};
} /* namespace hailort */
#include <map>
#include <vector>
-
+/** hailort namespace */
namespace hailort
{
* @param[in] src A buffer to be demultiplexed.
* @param[out] raw_buffers A vector of buffers that receives the demultiplexed data read from the stream.
* The order of @a raw_buffers vector will remain as is.
+ * @note The order of @a raw_buffers should be the same as returned from the function 'get_edges_stream_info()'.
* @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error.
*/
virtual hailo_status transform_demux(const MemoryView src, std::vector<MemoryView> &raw_buffers) = 0;
#include "hailo/device.hpp"
+/** hailort namespace */
namespace hailort
{
#include "hailo/network_group.hpp"
#include "hailo/runtime_statistics.hpp"
+/** hailort namespace */
namespace hailort
{
hailo_status before_fork();
hailo_status after_fork_in_parent();
hailo_status after_fork_in_child();
+ bool is_aborted();
+
+ // Added to match the same API as InputStream. Will be filled when async API will be implemented for vstreams.
+ using TransferDoneCallback = void(*);
protected:
explicit InputVStream(std::shared_ptr<InputVStreamInternal> vstream);
std::shared_ptr<InputVStreamInternal> m_vstream;
friend class VStreamsBuilderUtils;
+ friend class HailoRtRpcService;
};
class HAILORTAPI OutputVStream
hailo_status before_fork();
hailo_status after_fork_in_parent();
hailo_status after_fork_in_child();
+ bool is_aborted();
+
+ // Added to match the same API as InputStream. Will be filled when async API will be implemented for vstreams.
+ using TransferDoneCallback = void(*);
protected:
explicit OutputVStream(std::shared_ptr<OutputVStreamInternal> vstream);
friend class VStreamsBuilderUtils;
friend class VDeviceCoreOp;
+ friend class HailoRtRpcService;
};
/*! Contains the virtual streams creation functions */
add_subdirectory(core_op)
add_subdirectory(net_flow)
-
-set(HAILORT_CPP_SOURCES "${HAILORT_CPP_SOURCES}" "${HAILORT_OPS_CPP_SOURCES}")
-
if(HAILO_BUILD_SERVICE)
add_subdirectory(service)
endif()
set(HAILO_OS_DIR ${HAILO_OS_DIR} CACHE INTERNAL "Absolute path of os-dir")
set(HAILO_FULL_OS_DIR ${HAILO_FULL_OS_DIR} CACHE INTERNAL "Absolute Full path of os-dir")
set(HAILORT_CPP_SOURCES ${HAILORT_CPP_SOURCES} CACHE INTERNAL "Absolute paths of hailort's cpp source files")
-set(HAILORT_CPP_OS_SOURCES ${HAILORT_CPP_OS_SOURCES} CACHE INTERNAL "Absolute paths of os-related source files")
set(COMMON_C_SOURCES ${COMMON_C_SOURCES} CACHE INTERNAL "Absolute paths of common source files")
-set(HAILORT_SRCS_ABS ${HAILORT_CPP_SOURCES} ${HAILORT_CPP_OS_SOURCES} ${HAILORT_COMMON_CPP_SOURCES} ${COMMON_C_SOURCES} CACHE INTERNAL "All absolute paths of hailort's source files")
-set(HAILORT_OPS_CPP_SOURCES ${HAILORT_OPS_CPP_SOURCES} PARENT_SCOPE)
+set(HAILORT_SRCS_ABS ${HAILORT_CPP_SOURCES} ${HAILORT_COMMON_CPP_SOURCES} ${COMMON_C_SOURCES} CACHE INTERNAL "All absolute paths of hailort's source files")
SET_SOURCE_FILES_PROPERTIES(${C_SOURCES} PROPERTIES LANGUAGE CXX)
add_library(libhailort SHARED ${HAILORT_SRCS_ABS})
${HAILORT_INC_DIR}/hailo/platform.h
${HAILORT_INC_DIR}/hailo/hailort.hpp
+ ${HAILORT_INC_DIR}/hailo/buffer_storage.hpp
${HAILORT_INC_DIR}/hailo/buffer.hpp
${HAILORT_INC_DIR}/hailo/device.hpp
${HAILORT_INC_DIR}/hailo/event.hpp
${HAILORT_INC_DIR}/hailo/network_rate_calculator.hpp
${HAILORT_INC_DIR}/hailo/vdevice.hpp
${HAILORT_INC_DIR}/hailo/quantization.hpp
- ${HAILORT_INC_DIR}/hailo/dma_mapped_buffer.hpp
${HAILORT_INC_DIR}/hailo/hailort_defaults.hpp
)
set(SRC_FILES
${CMAKE_CURRENT_SOURCE_DIR}/core_op.cpp
-
+
${CMAKE_CURRENT_SOURCE_DIR}/resource_manager/resource_manager.cpp
${CMAKE_CURRENT_SOURCE_DIR}/resource_manager/resource_manager_builder.cpp
${CMAKE_CURRENT_SOURCE_DIR}/resource_manager/config_buffer.cpp
- ${CMAKE_CURRENT_SOURCE_DIR}/resource_manager/inter_context_buffer.cpp
- ${CMAKE_CURRENT_SOURCE_DIR}/resource_manager/ddr_channels_pair.cpp
+ ${CMAKE_CURRENT_SOURCE_DIR}/resource_manager/intermediate_buffer.cpp
${CMAKE_CURRENT_SOURCE_DIR}/resource_manager/channel_allocator.cpp
${CMAKE_CURRENT_SOURCE_DIR}/resource_manager/context_switch_buffer_builder.cpp
)
-set(HAILORT_CPP_SOURCES ${HAILORT_CPP_SOURCES} ${SRC_FILES} ${HAILORT_OPS_CPP_SOURCES} PARENT_SCOPE)
+set(HAILORT_CPP_SOURCES ${HAILORT_CPP_SOURCES} ${SRC_FILES} PARENT_SCOPE)
return result;
}
-Expected<OutputStreamWithParamsVector> CoreOp::get_output_streams_from_vstream_names(
- const std::map<std::string, hailo_vstream_params_t> &outputs_params)
-{
- OutputStreamWithParamsVector results;
- std::unordered_map<std::string, hailo_vstream_params_t> outputs_edges_params;
- for (auto &name_params_pair : outputs_params) {
- auto stream_names = m_metadata->get_stream_names_from_vstream_name(name_params_pair.first);
- CHECK_EXPECTED(stream_names);
-
- for (auto &stream_name : stream_names.value()) {
- CHECK_AS_EXPECTED(contains(m_output_streams, stream_name), HAILO_NOT_FOUND);
- auto output_stream = m_output_streams.at(stream_name);
- if (output_stream->get_info().is_mux) {
- outputs_edges_params.emplace(name_params_pair);
- }
- else {
- NameToVStreamParamsMap name_to_params = {name_params_pair};
- results.emplace_back(output_stream, name_to_params);
- }
- }
- }
- // Add non mux streams to result
- hailo_status status = add_mux_streams_by_edges_names(results, outputs_edges_params);
- CHECK_SUCCESS_AS_EXPECTED(status);
-
- return results;
-}
-
-// This function adds to results the OutputStreams that correspond to the edges in outputs_edges_params.
-// If an edge name appears in outputs_edges_params then all of its predecessors must appear in outputs_edges_params as well, Otherwise, an error is returned.
-// We use the set seen_edges in order to mark the edges already evaluated by one of its' predecessor.
-hailo_status CoreOp::add_mux_streams_by_edges_names(OutputStreamWithParamsVector &results,
- const std::unordered_map<std::string, hailo_vstream_params_t> &outputs_edges_params)
-{
- std::unordered_set<std::string> seen_edges;
- for (auto &name_params_pair : outputs_edges_params) {
- if (seen_edges.end() != seen_edges.find(name_params_pair.first)) {
- // Edge has already been seen by one of its predecessors
- continue;
- }
- auto output_streams = get_output_streams_by_vstream_name(name_params_pair.first);
- CHECK_EXPECTED_AS_STATUS(output_streams);
- CHECK(output_streams->size() == 1, HAILO_INVALID_ARGUMENT,
- "mux streams cannot be separated into multiple streams");
- auto output_stream = output_streams.release()[0];
-
- // TODO: Find a better way to get the mux edges without creating OutputDemuxer
- auto expected_demuxer = OutputDemuxer::create(*output_stream);
- CHECK_EXPECTED_AS_STATUS(expected_demuxer);
-
- NameToVStreamParamsMap name_to_params;
- for (auto &edge : expected_demuxer.value()->get_edges_stream_info()) {
- auto edge_name_params_pair = outputs_edges_params.find(edge.name);
- CHECK(edge_name_params_pair != outputs_edges_params.end(), HAILO_INVALID_ARGUMENT,
- "All edges of stream {} must be in output vstream params. edge {} is missing.",
- name_params_pair.first, edge.name);
- seen_edges.insert(edge.name);
- name_to_params.insert(*edge_name_params_pair);
- }
- results.emplace_back(output_stream, name_to_params);
- }
- return HAILO_SUCCESS;
-}
-
-Expected<OutputStreamPtrVector> CoreOp::get_output_streams_by_vstream_name(const std::string &name)
-{
- auto stream_names = m_metadata->get_stream_names_from_vstream_name(name);
- CHECK_EXPECTED(stream_names);
-
- OutputStreamPtrVector output_streams;
- output_streams.reserve(stream_names->size());
- for (const auto &stream_name : stream_names.value()) {
- CHECK_AS_EXPECTED(contains(m_output_streams, stream_name), HAILO_NOT_FOUND);
- output_streams.emplace_back(m_output_streams.at(stream_name));
- }
-
- return output_streams;
-}
-
Expected<LayerInfo> CoreOp::get_layer_info(const std::string &stream_name)
{
for (auto layer_info : m_metadata->get_all_layer_infos()) {
return status;
}
-Expected<std::vector<std::string>> CoreOp::get_vstream_names_from_stream_name(const std::string &stream_name)
-{
- return m_metadata->get_vstream_names_from_stream_name(stream_name);
-}
-
const SupportedFeatures &CoreOp::get_supported_features()
{
return m_metadata->supported_features();
return m_core_op_activated_event->wait(timeout);
}
-Expected<std::vector<std::vector<std::string>>> CoreOp::get_output_vstream_groups()
-{
- std::vector<std::vector<std::string>> results;
-
- for (auto output_stream : get_output_streams()) {
- auto vstreams_group = get_vstream_names_from_stream_name(output_stream.get().name());
- CHECK_EXPECTED(vstreams_group);
- results.push_back(vstreams_group.release());
- }
-
- return results;
-}
-
-Expected<std::vector<std::map<std::string, hailo_vstream_params_t>>> CoreOp::make_output_vstream_params_groups(
- bool quantized, hailo_format_type_t format_type, uint32_t timeout_ms, uint32_t queue_size)
-{
- auto params = make_output_vstream_params(quantized, format_type, timeout_ms, queue_size);
- CHECK_EXPECTED(params);
-
- auto groups = get_output_vstream_groups();
- CHECK_EXPECTED(groups);
-
- std::vector<std::map<std::string, hailo_vstream_params_t>> results(groups->size(), std::map<std::string, hailo_vstream_params_t>());
-
- size_t pipeline_group_index = 0;
- for (const auto &group : groups.release()) {
- for (const auto &name_pair : params.value()) {
- if (contains(group, name_pair.first)) {
- results[pipeline_group_index].insert(name_pair);
- }
- }
- pipeline_group_index++;
- }
-
- return results;
-}
-
-Expected<std::map<std::string, hailo_vstream_params_t>> CoreOp::make_input_vstream_params(
- bool quantized, hailo_format_type_t format_type, uint32_t timeout_ms, uint32_t queue_size,
- const std::string &network_name)
-{
- auto input_vstream_infos = m_metadata->get_input_vstream_infos(network_name);
- CHECK_EXPECTED(input_vstream_infos);
-
- std::map<std::string, hailo_vstream_params_t> res;
- auto status = Hef::Impl::fill_missing_vstream_params_with_default(res, input_vstream_infos.value(), quantized,
- format_type, timeout_ms, queue_size);
- CHECK_SUCCESS_AS_EXPECTED(status);
- return res;
-}
-
-Expected<std::map<std::string, hailo_vstream_params_t>> CoreOp::make_output_vstream_params(
- bool quantized, hailo_format_type_t format_type, uint32_t timeout_ms, uint32_t queue_size,
- const std::string &network_name)
-{
- auto output_vstream_infos = m_metadata->get_output_vstream_infos(network_name);
- CHECK_EXPECTED(output_vstream_infos);
- std::map<std::string, hailo_vstream_params_t> res;
- auto status = Hef::Impl::fill_missing_vstream_params_with_default(res, output_vstream_infos.value(), quantized,
- format_type, timeout_ms, queue_size);
- CHECK_SUCCESS_AS_EXPECTED(status);
- return res;
-}
-
-Expected<std::vector<hailo_network_info_t>> CoreOp::get_network_infos() const
-{
- return m_metadata->get_network_infos();
-}
-
Expected<std::vector<hailo_stream_info_t>> CoreOp::get_all_stream_infos(
const std::string &network_name) const
{
return m_metadata->get_all_stream_infos(network_name);
}
-Expected<std::vector<hailo_vstream_info_t>> CoreOp::get_input_vstream_infos(
- const std::string &network_name) const
-{
- return m_metadata->get_input_vstream_infos(network_name);
-}
-
-Expected<std::vector<hailo_vstream_info_t>> CoreOp::get_output_vstream_infos(
- const std::string &network_name) const
-{
- return m_metadata->get_output_vstream_infos(network_name);
-}
-
-Expected<std::vector<hailo_vstream_info_t>> CoreOp::get_all_vstream_infos(
- const std::string &network_name) const
-{
- return m_metadata->get_all_vstream_infos(network_name);
-}
-
AccumulatorPtr CoreOp::get_activation_time_accumulator() const
{
return m_activation_time_accumulator;
virtual std::vector<std::reference_wrapper<OutputStream>> get_output_streams_by_interface(hailo_stream_interface_t stream_interface);
virtual ExpectedRef<InputStream> get_input_stream_by_name(const std::string& name);
virtual ExpectedRef<OutputStream> get_output_stream_by_name(const std::string& name);
- virtual Expected<OutputStreamWithParamsVector> get_output_streams_from_vstream_names(
- const std::map<std::string, hailo_vstream_params_t> &outputs_params);
virtual Expected<LatencyMeasurementResult> get_latency_measurement(const std::string &network_name="");
- // TODO: HRT-9546 - Remove func, should be only in CNG
- virtual Expected<std::map<std::string, hailo_vstream_params_t>> make_input_vstream_params(
- bool quantized, hailo_format_type_t format_type, uint32_t timeout_ms, uint32_t queue_size,
- const std::string &network_name="");
- // TODO: HRT-9546 - Remove func, should be only in CNG
- virtual Expected<std::map<std::string, hailo_vstream_params_t>> make_output_vstream_params(
- bool quantized, hailo_format_type_t format_type, uint32_t timeout_ms, uint32_t queue_size,
- const std::string &network_name="");
- // TODO: HRT-9546 - Remove func, should be only in CNG
- virtual Expected<std::vector<std::map<std::string, hailo_vstream_params_t>>> make_output_vstream_params_groups(
- bool quantized, hailo_format_type_t format_type, uint32_t timeout_ms, uint32_t queue_size);
-
- // TODO: HRT-9546 - Remove func, should be only in CNG
- virtual Expected<std::vector<std::vector<std::string>>> get_output_vstream_groups();
-
- // TODO: HRT-9546 - Remove func, should be only in CNG
- Expected<std::vector<std::string>> get_vstream_names_from_stream_name(const std::string &stream_name);
virtual hailo_status activate_impl(uint16_t dynamic_batch_size, bool resume_pending_stream_transfers = false) = 0;
virtual hailo_status deactivate_impl(bool keep_nn_config_during_reset = false) = 0;
- virtual Expected<std::vector<hailo_network_info_t>> get_network_infos() const;
virtual Expected<std::vector<hailo_stream_info_t>> get_all_stream_infos(const std::string &network_name="") const;
- virtual Expected<std::vector<hailo_vstream_info_t>> get_input_vstream_infos(const std::string &network_name="") const;
- virtual Expected<std::vector<hailo_vstream_info_t>> get_output_vstream_infos(const std::string &network_name="") const;
- virtual Expected<std::vector<hailo_vstream_info_t>> get_all_vstream_infos(const std::string &network_name="") const;
+
virtual AccumulatorPtr get_activation_time_accumulator() const;
virtual AccumulatorPtr get_deactivation_time_accumulator() const;
hailo_status create_streams_from_config_params(Device &device);
virtual bool is_multi_context() const;
virtual const ConfigureNetworkParams get_config_params() const;
-
+ virtual Expected<HwInferResults> run_hw_infer_estimator() = 0;
const SupportedFeatures &get_supported_features();
Expected<uint16_t> get_stream_batch_size(const std::string &stream_name);
const hailo_stream_parameters_t &stream_params, const std::string &stream_name);
hailo_status create_input_stream_from_config_params(Device &device,
const hailo_stream_parameters_t &stream_params, const std::string &stream_name);
- hailo_status add_mux_streams_by_edges_names(OutputStreamWithParamsVector &result,
- const std::unordered_map<std::string, hailo_vstream_params_t> &outputs_edges_params);
- Expected<OutputStreamPtrVector> get_output_streams_by_vstream_name(const std::string &name);
hailo_status activate_low_level_streams(uint16_t dynamic_batch_size, bool resume_pending_stream_transfers);
hailo_status deactivate_low_level_streams();
#include "core_op/resource_manager/config_buffer.hpp"
#include "vdma/memory/sg_buffer.hpp"
#include "vdma/memory/continuous_buffer.hpp"
+#include "vdma/memory/buffer_requirements.hpp"
#include <numeric>
namespace hailort {
-Expected<ConfigBuffer> ConfigBuffer::create(HailoRTDriver &driver, vdma::ChannelId channel_id,
- const std::vector<uint32_t> &cfg_sizes)
+Expected<std::unique_ptr<vdma::VdmaBuffer>> ConfigBuffer::create_buffer(HailoRTDriver &driver, vdma::ChannelId channel_id,
+ const std::vector<uint32_t> &cfg_sizes, const uint32_t buffer_size)
{
- const auto buffer_size = std::accumulate(cfg_sizes.begin(), cfg_sizes.end(), 0);
-
auto buffer_ptr = should_use_ccb(driver) ?
create_ccb_buffer(driver, buffer_size) :
create_sg_buffer(driver, channel_id, cfg_sizes);
+ if (should_use_ccb(driver) && (HAILO_OUT_OF_HOST_CMA_MEMORY == buffer_ptr.status())) {
+ /* Try to use sg buffer instead */
+ return create_sg_buffer(driver, channel_id, cfg_sizes);
+ } else {
+ return buffer_ptr;
+ }
+}
+
+Expected<ConfigBuffer> ConfigBuffer::create(HailoRTDriver &driver, vdma::ChannelId channel_id,
+ const std::vector<uint32_t> &cfg_sizes)
+{
+ const auto buffer_size = std::accumulate(cfg_sizes.begin(), cfg_sizes.end(), 0);
+ CHECK_AS_EXPECTED(IS_FIT_IN_UINT32(buffer_size), HAILO_INTERNAL_FAILURE, "config buffer size exceeded UINT32 range limit");
+ auto buffer_ptr = create_buffer(driver, channel_id, cfg_sizes, static_cast<uint32_t>(buffer_size));
CHECK_EXPECTED(buffer_ptr);
return ConfigBuffer(buffer_ptr.release(), channel_id, buffer_size);
{
// TODO HRT-9657: remove DEVICE interrupts
auto descriptors_count =
- m_buffer->program_descriptors(m_acc_buffer_offset, vdma::InterruptsDomain::DEVICE, m_acc_desc_count, false);
+ m_buffer->program_descriptors(m_acc_buffer_offset, vdma::InterruptsDomain::DEVICE, m_acc_desc_count);
CHECK_EXPECTED(descriptors_count);
m_acc_desc_count += descriptors_count.value();
Expected<std::unique_ptr<vdma::VdmaBuffer>> ConfigBuffer::create_sg_buffer(HailoRTDriver &driver,
vdma::ChannelId channel_id, const std::vector<uint32_t> &cfg_sizes)
{
- auto desc_sizes_pair = vdma::DescriptorList::get_desc_buffer_sizes_for_multiple_transfers(driver, 1, cfg_sizes);
- CHECK_EXPECTED(desc_sizes_pair);
- const auto page_size = desc_sizes_pair->first;
- const auto descs_count = desc_sizes_pair->second;
-
- size_t buffer_size = 0;
- for (const auto cfg_size : cfg_sizes) {
- const auto descs_count_for_cfg = DIV_ROUND_UP(cfg_size, page_size);
- buffer_size += descs_count_for_cfg * page_size;
- }
-
- auto buffer = vdma::SgBuffer::create(driver, buffer_size, descs_count, page_size, HailoRTDriver::DmaDirection::H2D,
- channel_id);
+ static const bool NOT_CIRCULAR = false;
+ // For config channels (In Hailo15), the page size must be a multiplication of host default page size.
+ // Therefore we use the flag force_default_page_size for those types of buffers.
+ auto const FORCE_DEFAULT_PAGE_SIZE = true;
+ auto buffer_size_requirements = vdma::BufferSizesRequirements::get_sg_buffer_requirements_multiple_transfers(
+ driver.desc_max_page_size(), 1, cfg_sizes, NOT_CIRCULAR, FORCE_DEFAULT_PAGE_SIZE);
+ CHECK_EXPECTED(buffer_size_requirements);
+ const auto page_size = buffer_size_requirements->desc_page_size();
+ const auto descs_count = buffer_size_requirements->descs_count();
+ const auto buffer_size = buffer_size_requirements->buffer_size();
+
+ auto buffer = vdma::SgBuffer::create(driver, buffer_size, descs_count, page_size, NOT_CIRCULAR,
+ HailoRTDriver::DmaDirection::H2D, channel_id);
CHECK_EXPECTED(buffer);
auto buffer_ptr = make_unique_nothrow<vdma::SgBuffer>(buffer.release());
Expected<std::unique_ptr<vdma::VdmaBuffer>> ConfigBuffer::create_ccb_buffer(HailoRTDriver &driver,
uint32_t buffer_size)
{
- buffer_size = vdma::ContinuousBuffer::get_buffer_size(buffer_size);
- auto buffer = vdma::ContinuousBuffer::create(buffer_size, driver);
- CHECK_EXPECTED(buffer);
+ static const bool NOT_CIRCULAR = false;
+ static const uint16_t SINGLE_TRANSFER = 1;
+ auto buffer_size_requirements = vdma::BufferSizesRequirements::get_ccb_buffer_requirements_single_transfer(
+ SINGLE_TRANSFER, buffer_size, NOT_CIRCULAR);
+ CHECK_EXPECTED(buffer_size_requirements);
+
+ auto buffer = vdma::ContinuousBuffer::create(buffer_size_requirements->buffer_size(), driver);
+ /* Don't print error here since this might be expected error that the libhailoRT can recover from
+ (out of host memory). If it's not the case, there is a print in hailort_driver.cpp file */
+ if (HAILO_OUT_OF_HOST_CMA_MEMORY == buffer.status()) {
+ return make_unexpected(buffer.status());
+ } else {
+ CHECK_EXPECTED(buffer);
+ }
auto buffer_ptr = make_unique_nothrow<vdma::ContinuousBuffer>(buffer.release());
CHECK_NOT_NULL_AS_EXPECTED(buffer_ptr, HAILO_OUT_OF_HOST_MEMORY);
case HailoRTDriver::DmaType::PCIE:
return false;
case HailoRTDriver::DmaType::DRAM:
- if (std::getenv("HAILO_FORCE_CONF_CHANNEL_OVER_DESC") != nullptr) {
+ if (nullptr != std::getenv("HAILO_FORCE_CONF_CHANNEL_OVER_DESC")) {
LOGGER__WARNING("Using desc instead of CCB for config channel is not optimal for performance.\n");
return false;
}
#define CCW_DATA_OFFSET (CCW_BYTES_IN_WORD * 2)
#define CCW_HEADER_SIZE (CCW_DATA_OFFSET)
-
class ConfigBuffer final
{
public:
vdma::ChannelId channel_id, const std::vector<uint32_t> &cfg_sizes);
static Expected<std::unique_ptr<vdma::VdmaBuffer>> create_ccb_buffer(HailoRTDriver &driver,
uint32_t buffer_size);
+ static Expected<std::unique_ptr<vdma::VdmaBuffer>> create_buffer(HailoRTDriver &driver, vdma::ChannelId channel_id,
+ const std::vector<uint32_t> &cfg_sizes, const uint32_t buffer_size);
static bool should_use_ccb(HailoRTDriver &driver);
return m_controls;
}
+const CONTROL_PROTOCOL__context_switch_context_type_t &ContextSwitchBufferBuilder::get_context_type() const
+{
+ return m_context_type;
+}
+
CONTROL_PROTOCOL__context_switch_context_info_single_control_t &ContextSwitchBufferBuilder::current_control()
{
assert(!m_controls.empty());
void write_action(MemoryView action);
const std::vector<CONTROL_PROTOCOL__context_switch_context_info_single_control_t> &get_controls() const;
+ const CONTROL_PROTOCOL__context_switch_context_type_t &get_context_type() const;
private:
CONTROL_PROTOCOL__context_switch_context_info_single_control_t ¤t_control();
+++ /dev/null
-/**\r
- * Copyright (c) 2022 Hailo Technologies Ltd. All rights reserved.\r
- * Distributed under the MIT license (https://opensource.org/licenses/MIT)\r
-**/\r
-/**\r
- * @file ddr_channels_pair.cpp\r
- **/\r
-\r
-#include "common/utils.hpp"\r
-\r
-#include "core_op/resource_manager/ddr_channels_pair.hpp"\r
-#include "vdma/memory/continuous_buffer.hpp"\r
-#include "vdma/memory/sg_buffer.hpp"\r
-\r
-\r
-namespace hailort\r
-{\r
-\r
-\r
-Expected<DdrChannelsPair> DdrChannelsPair::create(HailoRTDriver &driver, const DdrChannelsInfo &ddr_channels_info)\r
-{\r
- auto buffer_exp = should_use_ccb(driver) ?\r
- create_ccb_buffer(driver, ddr_channels_info.row_size, ddr_channels_info.min_buffered_rows) :\r
- create_sg_buffer(driver, ddr_channels_info.row_size, ddr_channels_info.min_buffered_rows, ddr_channels_info.d2h_channel_id);\r
- CHECK_EXPECTED(buffer_exp);\r
- auto buffer_ptr = buffer_exp.release();\r
-\r
- CHECK_AS_EXPECTED(0 == (ddr_channels_info.row_size % buffer_ptr->desc_page_size()), HAILO_INTERNAL_FAILURE,\r
- "DDR channel buffer row size must be a multiple of descriptor page size");\r
-\r
- const auto interrupts_domain = vdma::InterruptsDomain::NONE;\r
- const auto total_size = buffer_ptr->descs_count() * buffer_ptr->desc_page_size();\r
- auto desc_count_local = buffer_ptr->program_descriptors(total_size, interrupts_domain, 0, true);\r
- CHECK_EXPECTED(desc_count_local);\r
-\r
- return DdrChannelsPair(std::move(buffer_ptr), ddr_channels_info);\r
-}\r
-\r
-uint16_t DdrChannelsPair::descs_count() const\r
-{\r
- assert(IS_FIT_IN_UINT16(m_buffer->descs_count()));\r
- return static_cast<uint16_t>(m_buffer->descs_count());\r
-}\r
-\r
-uint32_t DdrChannelsPair::descriptors_per_frame() const\r
-{\r
- return (m_info.row_size / m_buffer->desc_page_size()) * m_info.total_buffers_per_frame;\r
-}\r
-\r
-Expected<Buffer> DdrChannelsPair::read() const\r
-{\r
- const auto size = m_buffer->size();\r
- auto res = Buffer::create(size);\r
- CHECK_EXPECTED(res);\r
-\r
- auto status = m_buffer->read(res->data(), size, 0);\r
- CHECK_SUCCESS_AS_EXPECTED(status);\r
-\r
- return res.release();\r
-}\r
-\r
-const DdrChannelsInfo& DdrChannelsPair::info() const\r
-{\r
- return m_info;\r
-}\r
-\r
-\r
-bool DdrChannelsPair::need_manual_credit_management() const\r
-{\r
- // On scatter gather manual credit management is needed\r
- return m_buffer->type() == vdma::VdmaBuffer::Type::SCATTER_GATHER;\r
-}\r
-\r
-CONTROL_PROTOCOL__host_buffer_info_t DdrChannelsPair::get_host_buffer_info() const\r
-{\r
- return m_buffer->get_host_buffer_info(m_info.row_size);\r
-}\r
-\r
-Expected<std::unique_ptr<vdma::VdmaBuffer>> DdrChannelsPair::create_sg_buffer(HailoRTDriver &driver,\r
- uint32_t row_size, uint16_t buffered_rows, vdma::ChannelId d2h_channel_id)\r
-{\r
- auto desc_sizes_pair = vdma::DescriptorList::get_desc_buffer_sizes_for_single_transfer(driver,\r
- buffered_rows, buffered_rows, row_size);\r
- CHECK_EXPECTED(desc_sizes_pair);\r
- const auto desc_page_size = desc_sizes_pair->first;\r
- const auto descs_count = desc_sizes_pair->second;\r
- // DdrChannels are circular so we need to allocate the full descriptors list.\r
- const auto buffer_size = desc_page_size * descs_count;\r
-\r
- auto buffer = vdma::SgBuffer::create(driver, buffer_size, descs_count, desc_page_size,\r
- HailoRTDriver::DmaDirection::BOTH, d2h_channel_id);\r
- CHECK_EXPECTED(buffer);\r
-\r
- auto buffer_ptr = make_unique_nothrow<vdma::SgBuffer>(buffer.release());\r
- CHECK_NOT_NULL_AS_EXPECTED(buffer_ptr, HAILO_OUT_OF_HOST_MEMORY);\r
-\r
- return std::unique_ptr<vdma::VdmaBuffer>(std::move(buffer_ptr));\r
-}\r
-\r
-DdrChannelsPair::DdrChannelsPair(std::unique_ptr<vdma::VdmaBuffer> &&buffer, const DdrChannelsInfo &ddr_channels_info) :\r
- m_buffer(std::move(buffer)),\r
- m_info(ddr_channels_info)\r
-{}\r
-\r
-Expected<std::unique_ptr<vdma::VdmaBuffer>> DdrChannelsPair::create_ccb_buffer(HailoRTDriver &driver,\r
- uint32_t row_size, uint16_t buffered_rows)\r
-{\r
- // The first 12 channels in D2H CCB ("regular channels") requires that the amount of descriptors will be a power\r
- // of 2. Altough the 4 last channels ("enhanced channels") don't have this requirements, we keep the code the same.\r
- auto buffer_size = vdma::ContinuousBuffer::get_buffer_size_desc_power2(row_size * buffered_rows);\r
- auto buffer = vdma::ContinuousBuffer::create(buffer_size, driver);\r
- CHECK_EXPECTED(buffer);\r
-\r
- auto buffer_ptr = make_unique_nothrow<vdma::ContinuousBuffer>(buffer.release());\r
- CHECK_NOT_NULL_AS_EXPECTED(buffer_ptr, HAILO_OUT_OF_HOST_MEMORY);\r
-\r
- return std::unique_ptr<vdma::VdmaBuffer>(std::move(buffer_ptr));\r
-}\r
-\r
-bool DdrChannelsPair::should_use_ccb(HailoRTDriver &driver)\r
-{\r
- switch (driver.dma_type()) {\r
- case HailoRTDriver::DmaType::PCIE:\r
- return false;\r
- case HailoRTDriver::DmaType::DRAM:\r
- if (std::getenv("HAILO_FORCE_DDR_CHANNEL_OVER_DESC") != nullptr) {\r
- LOGGER__WARNING("Using desc instead of CCB for ddr channel is not optimal for performance.\n");\r
- return false;\r
- }\r
- else {\r
- return true;\r
- }\r
- }\r
-\r
- // Shouldn't reach here\r
- assert(false);\r
- return false;\r
-}\r
-\r
-} /* namespace hailort */\r
+++ /dev/null
-/**\r
- * Copyright (c) 2022 Hailo Technologies Ltd. All rights reserved.\r
- * Distributed under the MIT license (https://opensource.org/licenses/MIT)\r
-**/\r
-/**\r
- * @file ddr_channels_pair.hpp\r
- * @brief DDR channel pairs are pair of vdma channels used in the same context for skip-connection.\r
- **/\r
-\r
-#ifndef _HAILO_DDR_CHANNELS_PAIR_HPP_\r
-#define _HAILO_DDR_CHANNELS_PAIR_HPP_\r
-\r
-#include "hailo/hailort.h"\r
-#include "hailo/buffer.hpp"\r
-\r
-#include "vdma/memory/vdma_buffer.hpp"\r
-\r
-\r
-namespace hailort\r
-{\r
-\r
-struct DdrChannelsInfo\r
-{\r
- vdma::ChannelId d2h_channel_id;\r
- uint8_t d2h_stream_index;\r
- vdma::ChannelId h2d_channel_id;\r
- uint8_t h2d_stream_index;\r
- uint8_t network_index;\r
- uint16_t row_size;\r
- uint16_t min_buffered_rows;\r
- // total_buffers_per_frame not same as core_buffer_per frame. \r
- //(In DDR core buffer per frame is 1). Used to calc total host descriptors_per_frame. \r
- uint16_t total_buffers_per_frame;\r
-};\r
-\r
-class DdrChannelsPair final\r
-{\r
-public:\r
- static Expected<DdrChannelsPair> create(HailoRTDriver &driver, const DdrChannelsInfo &ddr_channels_info);\r
-\r
- uint16_t descs_count() const;\r
- uint32_t descriptors_per_frame() const;\r
- Expected<Buffer> read() const;\r
- const DdrChannelsInfo & info() const;\r
-\r
- // Checks if the credits are automaticaly going from d2h channel to its h2d channel, or it needs to be done manually\r
- // (Using a fw task).\r
- bool need_manual_credit_management() const;\r
-\r
- CONTROL_PROTOCOL__host_buffer_info_t get_host_buffer_info() const;\r
-\r
-private:\r
- DdrChannelsPair(std::unique_ptr<vdma::VdmaBuffer> &&buffer, const DdrChannelsInfo &ddr_channels_info);\r
-\r
- static Expected<std::unique_ptr<vdma::VdmaBuffer>> create_sg_buffer(HailoRTDriver &driver,\r
- uint32_t row_size, uint16_t buffered_rows, vdma::ChannelId d2h_channel_id);\r
- static Expected<std::unique_ptr<vdma::VdmaBuffer>> create_ccb_buffer(HailoRTDriver &driver,\r
- uint32_t row_size, uint16_t buffered_rows);\r
-\r
- static bool should_use_ccb(HailoRTDriver &driver);\r
-\r
- std::unique_ptr<vdma::VdmaBuffer> m_buffer;\r
- DdrChannelsInfo m_info;\r
-};\r
-\r
-} /* namespace hailort */\r
-\r
-#endif /* _HAILO_DDR_CHANNELS_PAIR_HPP_ */\r
+++ /dev/null
-/**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
- * Distributed under the MIT license (https://opensource.org/licenses/MIT)
- **/
-/**
- * @file inter_context_buffer.cpp
- * @brief Manages inter-context buffer.
- */
-
-#include "core_op/resource_manager/resource_manager.hpp"
-#include "core_op/resource_manager/inter_context_buffer.hpp"
-#include "vdma/memory/sg_buffer.hpp"
-#include "vdma/memory/continuous_buffer.hpp"
-
-
-namespace hailort
-{
-
-Expected<InterContextBuffer> InterContextBuffer::create(HailoRTDriver &driver, uint32_t transfer_size,
- uint16_t max_batch_size, vdma::ChannelId d2h_channel_id)
-{
- auto buffer_exp = should_use_ccb(driver) ?
- create_ccb_buffer(driver, transfer_size, max_batch_size) :
- create_sg_buffer(driver, transfer_size, max_batch_size, d2h_channel_id);
- CHECK_EXPECTED(buffer_exp);
- auto buffer_ptr = buffer_exp.release();
-
- size_t acc_offset = 0;
- for (uint16_t i = 0; i < max_batch_size; i++) {
- const auto last_desc_interrupts_domain = ((max_batch_size - 1) == i) ?
- vdma::InterruptsDomain::DEVICE : vdma::InterruptsDomain::NONE;
- static const auto BUFFER_NOT_CIRCULAR = false;
- auto desc_count_local = buffer_ptr->program_descriptors(transfer_size, last_desc_interrupts_domain, acc_offset,
- BUFFER_NOT_CIRCULAR);
- CHECK_EXPECTED(desc_count_local, "Failed to program descs for inter context channels. Given max_batch_size is too big.");
- acc_offset += desc_count_local.value();
- }
-
- return InterContextBuffer(std::move(buffer_ptr), transfer_size, max_batch_size);
-}
-
-hailo_status InterContextBuffer::reprogram(uint16_t batch_size)
-{
- const auto prev_batch_size = m_dynamic_batch_size;
- auto status = set_dynamic_batch_size(batch_size);
- CHECK_SUCCESS(status);
-
- if (prev_batch_size == m_dynamic_batch_size) {
- LOGGER__TRACE("Batch size hasn't changed ({}); nothing to be done.", batch_size);
- return HAILO_SUCCESS;
- }
-
- status = m_buffer->reprogram_device_interrupts_for_end_of_batch(m_transfer_size, prev_batch_size,
- vdma::InterruptsDomain::NONE);
- CHECK_SUCCESS(status, "Failed reprogramming device interrupts for the end of the previous batch (size {})",
- prev_batch_size);
- status = m_buffer->reprogram_device_interrupts_for_end_of_batch(m_transfer_size, m_dynamic_batch_size,
- vdma::InterruptsDomain::DEVICE);
- CHECK_SUCCESS(status, "Failed reprogramming device interrupts for the end of the current batch (size {})",
- m_dynamic_batch_size);
-
- return HAILO_SUCCESS;
-}
-
-Expected<Buffer> InterContextBuffer::read()
-{
- const auto size = m_transfer_size * m_dynamic_batch_size;
- assert(size <= m_buffer->size());
-
- auto res = Buffer::create(size);
- CHECK_EXPECTED(res);
-
- auto status = m_buffer->read(res->data(), size, 0);
- CHECK_SUCCESS_AS_EXPECTED(status);
-
- return res.release();
-}
-
-CONTROL_PROTOCOL__host_buffer_info_t InterContextBuffer::get_host_buffer_info() const
-{
- return m_buffer->get_host_buffer_info(m_transfer_size);
-}
-
-InterContextBuffer::InterContextBuffer(std::unique_ptr<vdma::VdmaBuffer> &&buffer, uint32_t transfer_size,
- uint16_t batch_size) :
- m_buffer(std::move(buffer)),
- m_transfer_size(transfer_size),
- m_max_batch_size(batch_size),
- m_dynamic_batch_size(batch_size)
-{}
-
-hailo_status InterContextBuffer::set_dynamic_batch_size(uint16_t batch_size)
-{
- if (CONTROL_PROTOCOL__IGNORE_DYNAMIC_BATCH_SIZE == batch_size) {
- LOGGER__TRACE("Received CONTROL_PROTOCOL__IGNORE_DYNAMIC_BATCH_SIZE == batch_size; "
- "Leaving previously set value of {}", m_dynamic_batch_size);
- } else {
- CHECK(batch_size <= m_max_batch_size, HAILO_INVALID_ARGUMENT,
- "batch_size ({}) must be <= than m_max_batch_size ({})",
- batch_size, m_max_batch_size);
-
- LOGGER__TRACE("Setting intermediate buffer's batch_size to {}", batch_size);
- m_dynamic_batch_size = batch_size;
- }
-
- return HAILO_SUCCESS;
-}
-
-Expected<std::unique_ptr<vdma::VdmaBuffer>> InterContextBuffer::create_sg_buffer(HailoRTDriver &driver,
- uint32_t transfer_size, uint16_t batch_size, vdma::ChannelId d2h_channel_id)
-{
- auto desc_sizes_pair = vdma::DescriptorList::get_desc_buffer_sizes_for_single_transfer(driver,
- batch_size, batch_size, transfer_size);
- CHECK_EXPECTED(desc_sizes_pair);
- const auto desc_page_size = desc_sizes_pair->first;
- const auto descs_count = desc_sizes_pair->second;
-
- // TODO: HRT-9914 - Instead of using aligned descriptor for each transfer, we should do it for the all frame.
- const size_t desc_per_transfer = DIV_ROUND_UP(transfer_size, desc_page_size);
- const size_t buffer_size = desc_per_transfer * desc_page_size * batch_size;
- auto buffer = vdma::SgBuffer::create(driver, buffer_size, descs_count, desc_page_size,
- HailoRTDriver::DmaDirection::BOTH, d2h_channel_id);
- CHECK_EXPECTED(buffer);
-
- auto buffer_ptr = make_unique_nothrow<vdma::SgBuffer>(buffer.release());
- CHECK_NOT_NULL_AS_EXPECTED(buffer_ptr, HAILO_OUT_OF_HOST_MEMORY);
-
- return std::unique_ptr<vdma::VdmaBuffer>(std::move(buffer_ptr));
-}
-
-Expected<std::unique_ptr<vdma::VdmaBuffer>> InterContextBuffer::create_ccb_buffer(HailoRTDriver &driver,
- uint32_t transfer_size, uint16_t batch_size)
-{
- // The first 12 channels in D2H CCB ("regular channels") requires that the amount of descriptors will be a power
- // of 2. Altough the 4 last channels ("enhanced channels") don't have this requirements, we keep the code the same.
- auto buffer_size = vdma::ContinuousBuffer::get_buffer_size_desc_power2(transfer_size * batch_size);
- auto buffer = vdma::ContinuousBuffer::create(buffer_size, driver);
- CHECK_EXPECTED(buffer);
-
- auto buffer_ptr = make_unique_nothrow<vdma::ContinuousBuffer>(buffer.release());
- CHECK_NOT_NULL_AS_EXPECTED(buffer_ptr, HAILO_OUT_OF_HOST_MEMORY);
-
- return std::unique_ptr<vdma::VdmaBuffer>(std::move(buffer_ptr));
-}
-
-bool InterContextBuffer::should_use_ccb(HailoRTDriver &driver)
-{
- switch (driver.dma_type()) {
- case HailoRTDriver::DmaType::PCIE:
- return false;
- case HailoRTDriver::DmaType::DRAM:
- if (nullptr == std::getenv("HAILO_FORCE_INFER_CONTEXT_CHANNEL_OVER_DESC")) {
- return false;
- }
- else {
- LOGGER__INFO("Using (non default mode) CCB for inter context channels.\n");
- return true;
- }
- }
-
- // Shouldn't reach here
- assert(false);
- return false;
-}
-
-} /* namespace hailort */
+++ /dev/null
-/**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
- * Distributed under the MIT license (https://opensource.org/licenses/MIT)
- **/
-/**
- * @file inter_context_buffer.hpp
- * @brief Manages inter-context buffer.
- */
-
-#ifndef _HAILO_INTER_CONTEXT_BUFFER_HPP_
-#define _HAILO_INTER_CONTEXT_BUFFER_HPP_
-
-#include "hailo/expected.hpp"
-#include "hailo/buffer.hpp"
-
-#include "os/hailort_driver.hpp"
-#include "vdma/memory/vdma_buffer.hpp"
-
-#include "control_protocol.h"
-
-
-namespace hailort
-{
-
-class InterContextBuffer final {
-public:
- static Expected<InterContextBuffer> create(HailoRTDriver &driver, uint32_t transfer_size,
- uint16_t max_batch_size, vdma::ChannelId d2h_channel_id);
-
- hailo_status reprogram(uint16_t batch_size);
- Expected<Buffer> read();
-
- CONTROL_PROTOCOL__host_buffer_info_t get_host_buffer_info() const;
-
-private:
- InterContextBuffer(std::unique_ptr<vdma::VdmaBuffer> &&buffer, uint32_t transfer_size, uint16_t batch_size);
- hailo_status set_dynamic_batch_size(uint16_t batch_size);
-
- static Expected<std::unique_ptr<vdma::VdmaBuffer>> create_sg_buffer(HailoRTDriver &driver,
- uint32_t transfer_size, uint16_t batch_size, vdma::ChannelId d2h_channel_id);
- static Expected<std::unique_ptr<vdma::VdmaBuffer>> create_ccb_buffer(HailoRTDriver &driver,
- uint32_t transfer_size, uint16_t batch_size);
-
- static bool should_use_ccb(HailoRTDriver &driver);
-
- std::unique_ptr<vdma::VdmaBuffer> m_buffer;
- const uint32_t m_transfer_size;
- const uint16_t m_max_batch_size;
- uint16_t m_dynamic_batch_size;
-};
-
-} /* namespace hailort */
-
-#endif /* _HAILO_INTER_CONTEXT_BUFFER_HPP_ */
\ No newline at end of file
--- /dev/null
+/**
+ * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+ **/
+/**
+ * @file intermediate_buffer.cpp
+ * @brief Manages intermediate buffers, including inter-context and ddr buffers.
+ */
+
+#include "intermediate_buffer.hpp"
+
+#include "core_op/resource_manager/resource_manager.hpp"
+#include "vdma/memory/sg_buffer.hpp"
+#include "vdma/memory/continuous_buffer.hpp"
+#include "vdma/memory/buffer_requirements.hpp"
+
+
+namespace hailort
+{
+Expected<std::unique_ptr<vdma::VdmaBuffer>> IntermediateBuffer::create_buffer(HailoRTDriver &driver, uint32_t transfer_size,
+ uint16_t max_batch_size, vdma::ChannelId d2h_channel_id, StreamingType streaming_type)
+{
+ const bool is_circular = (streaming_type == StreamingType::CIRCULAR_CONTINUOS);
+ auto buffer_exp = should_use_ccb(driver, streaming_type) ?
+ create_ccb_buffer(driver, transfer_size, max_batch_size, is_circular) :
+ create_sg_buffer(driver, transfer_size, max_batch_size, d2h_channel_id, is_circular);
+
+ if (should_use_ccb(driver, streaming_type) && (HAILO_OUT_OF_HOST_CMA_MEMORY == buffer_exp.status())) {
+ /* Try to use sg buffer instead */
+ return create_sg_buffer(driver, transfer_size, max_batch_size, d2h_channel_id, is_circular);
+ } else {
+ return buffer_exp;
+ }
+}
+
+Expected<IntermediateBuffer> IntermediateBuffer::create(HailoRTDriver &driver, uint32_t transfer_size,
+ uint16_t max_batch_size, vdma::ChannelId d2h_channel_id, StreamingType streaming_type)
+{
+ auto buffer_exp = create_buffer(driver, transfer_size, max_batch_size, d2h_channel_id, streaming_type);
+ CHECK_EXPECTED(buffer_exp);
+ auto buffer_ptr = buffer_exp.release();
+
+ if (streaming_type == StreamingType::BURST) {
+ // We have max_batch_size transfers, so we program them one by one. The last transfer should report interrupt
+ // to the device.
+ size_t acc_offset = 0;
+ for (uint16_t i = 0; i < max_batch_size; i++) {
+ const auto last_desc_interrupts_domain = ((max_batch_size - 1) == i) ?
+ vdma::InterruptsDomain::DEVICE : vdma::InterruptsDomain::NONE;
+ auto desc_count_local = buffer_ptr->program_descriptors(transfer_size, last_desc_interrupts_domain, acc_offset);
+ CHECK_EXPECTED(desc_count_local, "Failed to program descs for inter context channels. Given max_batch_size is too big.");
+ acc_offset += desc_count_local.value();
+ }
+ } else {
+ // Program all descriptors, no need for interrupt.
+ const auto interrupts_domain = vdma::InterruptsDomain::NONE;
+ const auto total_size = buffer_ptr->descs_count() * buffer_ptr->desc_page_size();
+ auto desc_count_local = buffer_ptr->program_descriptors(total_size, interrupts_domain, 0);
+ CHECK_EXPECTED(desc_count_local);
+ }
+
+ return IntermediateBuffer(std::move(buffer_ptr), transfer_size, max_batch_size, streaming_type);
+}
+
+hailo_status IntermediateBuffer::set_dynamic_batch_size(uint16_t batch_size)
+{
+ if (m_streaming_type == StreamingType::CIRCULAR_CONTINUOS) {
+ // The buffer pattern does not depend on the batch for circular continuous buffers.
+ return HAILO_SUCCESS;
+ }
+
+ CHECK(batch_size <= m_max_batch_size, HAILO_INVALID_ARGUMENT,
+ "batch_size ({}) must be <= than m_max_batch_size ({})",
+ batch_size, m_max_batch_size);
+
+ LOGGER__TRACE("Setting intermediate buffer's batch_size to {}", batch_size);
+ const auto prev_batch_size = m_dynamic_batch_size;
+ m_dynamic_batch_size = batch_size;
+
+ auto status = m_buffer->reprogram_device_interrupts_for_end_of_batch(m_transfer_size, prev_batch_size,
+ vdma::InterruptsDomain::NONE);
+ CHECK_SUCCESS(status, "Failed reprogramming device interrupts for the end of the previous batch (size {})",
+ prev_batch_size);
+ status = m_buffer->reprogram_device_interrupts_for_end_of_batch(m_transfer_size, m_dynamic_batch_size,
+ vdma::InterruptsDomain::DEVICE);
+ CHECK_SUCCESS(status, "Failed reprogramming device interrupts for the end of the current batch (size {})",
+ m_dynamic_batch_size);
+
+ return HAILO_SUCCESS;
+}
+
+Expected<Buffer> IntermediateBuffer::read()
+{
+ const auto size = m_transfer_size * m_dynamic_batch_size;
+ assert(size <= m_buffer->size());
+
+ auto res = Buffer::create(size);
+ CHECK_EXPECTED(res);
+
+ auto status = m_buffer->read(res->data(), size, 0);
+ CHECK_SUCCESS_AS_EXPECTED(status);
+
+ return res.release();
+}
+
+CONTROL_PROTOCOL__host_buffer_info_t IntermediateBuffer::get_host_buffer_info() const
+{
+ return m_buffer->get_host_buffer_info(m_transfer_size);
+}
+
+IntermediateBuffer::IntermediateBuffer(std::unique_ptr<vdma::VdmaBuffer> &&buffer, uint32_t transfer_size,
+ uint16_t batch_size, StreamingType streaming_type) :
+ m_buffer(std::move(buffer)),
+ m_transfer_size(transfer_size),
+ m_max_batch_size(batch_size),
+ m_streaming_type(streaming_type),
+ m_dynamic_batch_size(batch_size)
+{}
+
+Expected<std::unique_ptr<vdma::VdmaBuffer>> IntermediateBuffer::create_sg_buffer(HailoRTDriver &driver,
+ uint32_t transfer_size, uint16_t batch_size, vdma::ChannelId d2h_channel_id, bool is_circular)
+{
+ auto const DONT_FORCE_DEFAULT_PAGE_SIZE = false;
+ auto buffer_requirements = vdma::BufferSizesRequirements::get_sg_buffer_requirements_single_transfer(
+ driver.desc_max_page_size(), batch_size, batch_size, transfer_size, is_circular, DONT_FORCE_DEFAULT_PAGE_SIZE);
+ CHECK_EXPECTED(buffer_requirements);
+ const auto desc_page_size = buffer_requirements->desc_page_size();
+ const auto descs_count = buffer_requirements->descs_count();
+ const auto buffer_size = buffer_requirements->buffer_size();
+
+ auto buffer = vdma::SgBuffer::create(driver, buffer_size, descs_count, desc_page_size, is_circular,
+ HailoRTDriver::DmaDirection::BOTH, d2h_channel_id);
+ CHECK_EXPECTED(buffer);
+
+ auto buffer_ptr = make_unique_nothrow<vdma::SgBuffer>(buffer.release());
+ CHECK_NOT_NULL_AS_EXPECTED(buffer_ptr, HAILO_OUT_OF_HOST_MEMORY);
+
+ return std::unique_ptr<vdma::VdmaBuffer>(std::move(buffer_ptr));
+}
+
+Expected<std::unique_ptr<vdma::VdmaBuffer>> IntermediateBuffer::create_ccb_buffer(HailoRTDriver &driver,
+ uint32_t transfer_size, uint16_t batch_size, bool is_circular)
+{
+ auto buffer_size_requirements = vdma::BufferSizesRequirements::get_ccb_buffer_requirements_single_transfer(
+ batch_size, transfer_size, is_circular);
+ CHECK_EXPECTED(buffer_size_requirements);
+
+ auto buffer = vdma::ContinuousBuffer::create(buffer_size_requirements->buffer_size(), driver);
+ /* Don't print error here since this might be expected error that the libhailoRT can recover from
+ (out of host memory). If it's not the case, there is a print in hailort_driver.cpp file */
+ if (HAILO_OUT_OF_HOST_CMA_MEMORY == buffer.status()) {
+ return make_unexpected(buffer.status());
+ } else {
+ CHECK_EXPECTED(buffer);
+ }
+
+ auto buffer_ptr = make_unique_nothrow<vdma::ContinuousBuffer>(buffer.release());
+ CHECK_NOT_NULL_AS_EXPECTED(buffer_ptr, HAILO_OUT_OF_HOST_MEMORY);
+
+ return std::unique_ptr<vdma::VdmaBuffer>(std::move(buffer_ptr));
+}
+
+bool IntermediateBuffer::should_use_ccb(HailoRTDriver &driver, StreamingType streaming_type)
+{
+ if (driver.dma_type() == HailoRTDriver::DmaType::PCIE) {
+ // CCB not supported on PCIe
+ return false;
+ }
+
+ switch (streaming_type) {
+ case StreamingType::BURST:
+ // On burst (aka inter-context), because the buffers are big (And depends on the max_batch_size), we currently
+ // don't want to use CCB by default.
+ if (nullptr != std::getenv("HAILO_FORCE_INFER_CONTEXT_CHANNEL_OVER_DESC")) {
+ LOGGER__WARNING("Using desc instead of CCB for inter context channels is not optimal for performance.\n");
+ return false;
+ } else {
+ return true;
+ }
+ case StreamingType::CIRCULAR_CONTINUOS:
+ // On circular_continuous (aka ddr), the buffers are relatively small and we want to verify the C2C mechanism,
+ // therefore the CCB is the default behaviour.
+ if (nullptr != std::getenv("HAILO_FORCE_DDR_CHANNEL_OVER_DESC")) {
+ LOGGER__WARNING("Using desc instead of CCB for ddr channel is not optimal for performance.\n");
+ return false;
+ } else {
+ return true;
+ }
+ }
+
+ // Shouldn't reach here
+ assert(false);
+ return false;
+}
+
+} /* namespace hailort */
--- /dev/null
+/**
+ * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+ **/
+/**
+ * @file intermediate_buffer.hpp
+ * @brief Manages intermediate buffers, including inter-context and ddr buffers.
+ */
+
+#ifndef _HAILO_INTERMEDIATE_BUFFER_HPP_
+#define _HAILO_INTERMEDIATE_BUFFER_HPP_
+
+#include "hailo/expected.hpp"
+#include "hailo/buffer.hpp"
+
+#include "os/hailort_driver.hpp"
+#include "vdma/memory/vdma_buffer.hpp"
+
+#include "control_protocol.h"
+
+
+namespace hailort
+{
+
+class IntermediateBuffer final {
+public:
+
+ enum class StreamingType {
+ // Used for inter-context buffer. The buffer is not circular and the data is fetched in bursts.
+ BURST,
+
+ // Used for ddr-channel buffers. The buffer is circular and fetched continuously.
+ CIRCULAR_CONTINUOS,
+ };
+
+ static Expected<IntermediateBuffer> create(HailoRTDriver &driver, uint32_t transfer_size,
+ uint16_t max_batch_size, vdma::ChannelId d2h_channel_id, StreamingType streaming_type);
+
+ hailo_status set_dynamic_batch_size(uint16_t batch_size);
+ Expected<Buffer> read();
+ CONTROL_PROTOCOL__host_buffer_info_t get_host_buffer_info() const;
+
+private:
+ IntermediateBuffer(std::unique_ptr<vdma::VdmaBuffer> &&buffer, uint32_t transfer_size, uint16_t batch_size,
+ StreamingType streaming_type);
+
+ static Expected<std::unique_ptr<vdma::VdmaBuffer>> create_sg_buffer(HailoRTDriver &driver,
+ uint32_t transfer_size, uint16_t batch_size, vdma::ChannelId d2h_channel_id, bool is_circular);
+ static Expected<std::unique_ptr<vdma::VdmaBuffer>> create_ccb_buffer(HailoRTDriver &driver,
+ uint32_t transfer_size, uint16_t batch_size, bool is_circular);
+ static Expected<std::unique_ptr<vdma::VdmaBuffer>> create_buffer(HailoRTDriver &driver, uint32_t transfer_size,
+ uint16_t max_batch_size, vdma::ChannelId d2h_channel_id, StreamingType streaming_type);
+
+ static bool should_use_ccb(HailoRTDriver &driver, StreamingType streaming_type);
+
+ std::unique_ptr<vdma::VdmaBuffer> m_buffer;
+ const uint32_t m_transfer_size;
+ const uint16_t m_max_batch_size;
+ const StreamingType m_streaming_type;
+ uint16_t m_dynamic_batch_size;
+};
+
+} /* namespace hailort */
+
+#endif /* _HAILO_INTERMEDIATE_BUFFER_HPP_ */
\ No newline at end of file
#include "core_op/resource_manager/resource_manager.hpp"
#include "vdma/channel/boundary_channel.hpp"
+#include "vdma/memory/buffer_requirements.hpp"
#include "device_common/control.hpp"
#include <numeric>
return m_builder;
}
-void ContextResources::add_edge_layer(const LayerInfo &layer_info, vdma::ChannelId channel_id,
- const CONTROL_PROTOCOL__host_buffer_info_t &buffer_info)
+hailo_status ContextResources::add_edge_layer(const LayerInfo &layer_info, vdma::ChannelId channel_id,
+ const CONTROL_PROTOCOL__host_buffer_info_t &buffer_info, const SupportedFeatures &supported_features)
{
+ auto status = validate_edge_layer(layer_info, channel_id, supported_features);
+ CHECK_SUCCESS(status);
+
m_edge_layers.emplace_back(EdgeLayer{
layer_info,
channel_id,
buffer_info
});
+
+ return HAILO_SUCCESS;
+}
+
+void ContextResources::add_ddr_channels_info(const DdrChannelsInfo &ddr_info)
+{
+ m_ddr_channels_infos.emplace_back(ddr_info);
}
std::vector<EdgeLayer> ContextResources::get_edge_layers() const
return edge_layers;
}
-Expected<EdgeLayer> ContextResources::get_edge_layer_by_stream_index(uint8_t stream_index) const
+Expected<EdgeLayer> ContextResources::get_edge_layer_by_stream_index(const uint8_t stream_index,
+ const hailo_stream_direction_t direction) const
{
for (const auto &edge_layer : m_edge_layers) {
- if (edge_layer.layer_info.stream_index == stream_index) {
+ if ((stream_index == edge_layer.layer_info.stream_index) && (direction == edge_layer.layer_info.direction)) {
return EdgeLayer(edge_layer);
}
}
return make_unexpected(HAILO_INTERNAL_FAILURE);
}
-
-ExpectedRef<DdrChannelsPair> ContextResources::create_ddr_channels_pair(const DdrChannelsInfo &ddr_info)
-{
- auto buffer = DdrChannelsPair::create(m_driver, ddr_info);
- CHECK_EXPECTED(buffer);
-
- m_ddr_channels_pairs.emplace_back(buffer.release());
- return std::ref(m_ddr_channels_pairs.back());
-}
-
-ExpectedRef<const DdrChannelsPair> ContextResources::get_ddr_channels_pair(uint8_t d2h_stream_index) const
+Expected<DdrChannelsInfo> ContextResources::get_ddr_channels_info(uint8_t d2h_stream_index) const
{
- for (auto &ddr_channels_pair : m_ddr_channels_pairs) {
- if (ddr_channels_pair.info().d2h_stream_index == d2h_stream_index) {
- return std::ref(ddr_channels_pair);
+ for (const auto &ddr_channels_info : m_ddr_channels_infos) {
+ if (ddr_channels_info.d2h_stream_index == d2h_stream_index) {
+ return DdrChannelsInfo{ddr_channels_info};
}
}
return make_unexpected(HAILO_INTERNAL_FAILURE);
}
-const std::vector<DdrChannelsPair> &ContextResources::get_ddr_channels_pairs() const
+const std::vector<DdrChannelsInfo> &ContextResources::get_ddr_channels_infos() const
{
- return m_ddr_channels_pairs;
+ return m_ddr_channels_infos;
}
-hailo_status ContextResources::validate_edge_layers()
+hailo_status ContextResources::validate_edge_layer(const LayerInfo &layer_info, vdma::ChannelId channel_id,
+ const SupportedFeatures &supported_features)
{
- std::set<vdma::ChannelId> used_channel_ids;
+ bool stream_index_already_used = false;
+
for (const auto &edge_layer : m_edge_layers) {
- CHECK(used_channel_ids.find(edge_layer.channel_id) == used_channel_ids.end(), HAILO_INTERNAL_FAILURE,
- "Same stream use the same channel id {}", edge_layer.channel_id);
- used_channel_ids.insert(edge_layer.channel_id);
+ CHECK(!(edge_layer.channel_id == channel_id), HAILO_INTERNAL_FAILURE,
+ "Same stream use the same channel id {}", channel_id);
+
+ // In Activation Context it is ok to have multiple edge layers with same stream index seeing as they could be for
+ // Different contexts etc...
+ if (CONTROL_PROTOCOL__CONTEXT_SWITCH_CONTEXT_TYPE_ACTIVATION != m_builder.get_context_type()) {
+ if (edge_layer.layer_info.stream_index == layer_info.stream_index) {
+ // Validate that the amount of edge layers with the same stream index per context is 2 (And with opposite directions)
+ // In the case of dual direction supported feature - otherwise 1
+ if (supported_features.dual_direction_stream_index) {
+ CHECK(!stream_index_already_used, HAILO_INTERNAL_FAILURE,
+ "Stream Index {} used for too many edge layers in one context", edge_layer.layer_info.stream_index);
+ CHECK(layer_info.direction != edge_layer.layer_info.direction, HAILO_INTERNAL_FAILURE,
+ "Stream Index {} used for other edge layer in same direction", edge_layer.layer_info.stream_index);
+ stream_index_already_used = true;
+ } else {
+ LOGGER__ERROR("Stream Index {} used for too many edge layers in one context",
+ edge_layer.layer_info.stream_index);
+ return HAILO_INTERNAL_FAILURE;
+ }
+ }
+ }
}
return HAILO_SUCCESS;
LatencyMetersMap latency_meters_map;
if ((config_params.latency & HAILO_LATENCY_MEASURE) == HAILO_LATENCY_MEASURE) {
- // Best affort for starting latency meter.
+ // Best effort for starting latency meter.
auto networks_names = core_op_metadata->get_network_names();
for (auto &network_name : networks_names) {
auto layer_infos = core_op_metadata->get_all_layer_infos(network_name);
const auto &config_channels_info = core_op_metadata->config_channels_info();
config_channels_ids.reserve(config_channels_info.size());
for (uint8_t cfg_index = 0; cfg_index < config_channels_info.size(); cfg_index++) {
- const auto layer_identifier = std::make_tuple(LayerType::CFG, "", cfg_index);
+ const auto layer_identifier = std::make_tuple(LayerType::CFG, HAILO_H2D_STREAM, "", cfg_index);
const auto engine_index = config_channels_info[cfg_index].engine_index;
auto channel_id = allocator.get_available_channel_id(layer_identifier, HailoRTDriver::DmaDirection::H2D, engine_index);
CHECK_EXPECTED(channel_id);
m_vdma_device(vdma_device),
m_driver(driver),
m_config_params(config_params),
- m_inter_context_buffers(),
+ m_intermediate_buffers(),
m_core_op_metadata(std::move(core_op_metadata)),
m_core_op_index(core_op_index),
m_dynamic_context_count(0),
m_vdma_device(other.m_vdma_device),
m_driver(other.m_driver),
m_config_params(other.m_config_params),
- m_inter_context_buffers(std::move(other.m_inter_context_buffers)),
+ m_intermediate_buffers(std::move(other.m_intermediate_buffers)),
m_core_op_metadata(std::move(other.m_core_op_metadata)),
m_core_op_index(other.m_core_op_index),
m_dynamic_context_count(std::exchange(other.m_dynamic_context_count, static_cast<uint8_t>(0))),
}
}
+// TODO: after adding NMS single int, we can create an async channel for async nms output stream (HRT-10553)
+Expected<vdma::BoundaryChannel::Type> ResourcesManager::get_boundary_vdma_channel_type(const LayerInfo &layer_info)
+{
+ CHECK_AS_EXPECTED(contains(m_config_params.stream_params_by_name, layer_info.name), HAILO_INVALID_ARGUMENT,
+ "Can't find stream params for layer {}", layer_info.name);
+ const auto async_stream = (0 != (m_config_params.stream_params_by_name.at(layer_info.name).flags & HAILO_STREAM_FLAGS_ASYNC));
+ if (async_stream) {
+ // NMS async streams use buffered channels
+ if (layer_info.format.order == HAILO_FORMAT_ORDER_HAILO_NMS) {
+ return vdma::BoundaryChannel::Type::BUFFERED;
+ }
+ // Non-nms async streams use async channels
+ return vdma::BoundaryChannel::Type::ASYNC;
+ }
+ // Buffered streams => buffered channels
+ return vdma::BoundaryChannel::Type::BUFFERED;
+}
+
hailo_status ResourcesManager::create_boundary_vdma_channel(const LayerInfo &layer_info)
{
// TODO: put in layer info
channel_direction, layer_info.dma_engine_index);
CHECK_EXPECTED_AS_STATUS(channel_id);
- auto network_batch_size = get_network_batch_size(layer_info.network_name);
+ const auto network_batch_size = get_network_batch_size(layer_info.network_name);
CHECK_EXPECTED_AS_STATUS(network_batch_size);
- uint32_t min_active_trans = MIN_ACTIVE_TRANSFERS_SCALE * network_batch_size.value();
- uint32_t max_active_trans = MAX_ACTIVE_TRANSFERS_SCALE * network_batch_size.value();
+ const auto nms_max_detections_per_frame =
+ layer_info.nms_info.number_of_classes * layer_info.nms_info.max_bboxes_per_class * layer_info.nms_info.chunks_per_frame;
+
+ const auto max_active_transfers_scale = (layer_info.format.order == HAILO_FORMAT_ORDER_HAILO_NMS) ?
+ (nms_max_detections_per_frame * MAX_ACTIVE_TRANSFERS_SCALE) : MAX_ACTIVE_TRANSFERS_SCALE;
+
+ const auto min_active_trans = MIN_ACTIVE_TRANSFERS_SCALE * network_batch_size.value();
+ const auto max_active_trans = (layer_info.format.order == HAILO_FORMAT_ORDER_HAILO_NMS) ?
+ /* NMS Case - Value be be higher than UINT16_MAX. in this case we only limit to UART16_MAX with no error */
+ std::min(static_cast<uint32_t>(UINT16_MAX), max_active_transfers_scale * network_batch_size.value()) :
+ max_active_transfers_scale * network_batch_size.value();
- CHECK(IS_FIT_IN_UINT16(min_active_trans), HAILO_INVALID_ARGUMENT,
+ CHECK(IS_FIT_IN_UINT16(min_active_trans), HAILO_INVALID_ARGUMENT,
"calculated min_active_trans for vdma descriptor list is out of UINT16 range");
- CHECK(IS_FIT_IN_UINT16(max_active_trans), HAILO_INVALID_ARGUMENT,
+ CHECK(IS_FIT_IN_UINT16(max_active_trans), HAILO_INVALID_ARGUMENT,
"calculated min_active_trans for vdma descriptor list is out of UINT16 range");
auto latency_meter = (contains(m_latency_meters, layer_info.network_name)) ? m_latency_meters.at(layer_info.network_name) : nullptr;
/* TODO - HRT-6829- page_size should be calculated inside the vDMA channel class create function */
- const auto transfer_size = (layer_info.nn_stream_config.periph_bytes_per_buffer *
- layer_info.nn_stream_config.core_buffers_per_frame);
- auto desc_sizes_pair = vdma::DescriptorList::get_desc_buffer_sizes_for_single_transfer(m_driver,
- static_cast<uint16_t>(min_active_trans), static_cast<uint16_t>(max_active_trans), transfer_size);
- CHECK_EXPECTED_AS_STATUS(desc_sizes_pair);
-
- const auto page_size = desc_sizes_pair->first;
+ static const bool IS_CIRCULAR = true;
+ const auto transfer_size = LayerInfoUtils::get_layer_transfer_size(layer_info);
+
+ auto const DONT_FORCE_DEFAULT_PAGE_SIZE = false;
+ auto buffer_sizes_requirements = vdma::BufferSizesRequirements::get_sg_buffer_requirements_single_transfer(
+ m_driver.desc_max_page_size(), static_cast<uint16_t>(min_active_trans), static_cast<uint16_t>(max_active_trans),
+ transfer_size, IS_CIRCULAR, DONT_FORCE_DEFAULT_PAGE_SIZE);
+ CHECK_EXPECTED_AS_STATUS(buffer_sizes_requirements);
+
+ const auto page_size = buffer_sizes_requirements->desc_page_size();
const auto descs_count = (nullptr != std::getenv("HAILO_CONFIGURE_FOR_HW_INFER")) ?
- MAX_DESCS_COUNT : desc_sizes_pair->second;
+ MAX_DESCS_COUNT : buffer_sizes_requirements->descs_count();
- const auto channel_type = (0 == (m_config_params.stream_params_by_name.at(layer_info.name).flags & HAILO_STREAM_FLAGS_ASYNC)) ?
- vdma::BoundaryChannel::Type::BUFFERED : vdma::BoundaryChannel::Type::ASYNC;
+ auto channel_type = get_boundary_vdma_channel_type(layer_info);
+ CHECK_EXPECTED_AS_STATUS(channel_type);
auto channel = vdma::BoundaryChannel::create(channel_id.value(), channel_direction, m_driver, descs_count, page_size,
- layer_info.name, latency_meter, network_batch_size.value(), channel_type);
+ layer_info.name, latency_meter, network_batch_size.value(), channel_type.release());
CHECK_EXPECTED_AS_STATUS(channel);
m_boundary_channels.emplace(channel_id.value(), channel.release());
return m_config_params.power_mode;
}
-ExpectedRef<InterContextBuffer> ResourcesManager::create_inter_context_buffer(uint32_t transfer_size,
- uint8_t src_stream_index, uint8_t src_context_index, const std::string &network_name, vdma::ChannelId d2h_channel_id)
+ExpectedRef<IntermediateBuffer> ResourcesManager::create_intermediate_buffer(uint32_t transfer_size,
+ uint16_t batch_size, uint8_t src_stream_index, uint8_t src_context_index,
+ vdma::ChannelId d2h_channel_id, IntermediateBuffer::StreamingType streaming_type)
{
- auto network_batch_size_exp = get_network_batch_size(network_name);
- CHECK_EXPECTED(network_batch_size_exp);
- auto network_batch_size = network_batch_size_exp.value();
-
- auto buffer = InterContextBuffer::create(m_driver, transfer_size, network_batch_size, d2h_channel_id);
+ auto buffer = IntermediateBuffer::create(m_driver, transfer_size, batch_size, d2h_channel_id,
+ streaming_type);
CHECK_EXPECTED(buffer);
const auto key = std::make_pair(src_context_index, src_stream_index);
- auto emplace_res = m_inter_context_buffers.emplace(key, buffer.release());
+ auto emplace_res = m_intermediate_buffers.emplace(key, buffer.release());
return std::ref(emplace_res.first->second);
}
-ExpectedRef<InterContextBuffer> ResourcesManager::get_inter_context_buffer(const IntermediateBufferKey &key)
+ExpectedRef<IntermediateBuffer> ResourcesManager::get_intermediate_buffer(const IntermediateBufferKey &key)
{
- auto buffer_it = m_inter_context_buffers.find(key);
- if (std::end(m_inter_context_buffers) == buffer_it) {
+ auto buffer_it = m_intermediate_buffers.find(key);
+ if (std::end(m_intermediate_buffers) == buffer_it) {
return make_unexpected(HAILO_NOT_FOUND);
}
return m_vdma_device.get_default_streams_interface();
}
-hailo_status ResourcesManager::set_inter_context_channels_dynamic_batch_size(uint16_t dynamic_batch_size)
+hailo_status ResourcesManager::set_dynamic_batch_size(uint16_t dynamic_batch_size)
{
- for (auto &key_buff_pair : m_inter_context_buffers) {
- const auto status = key_buff_pair.second.reprogram(dynamic_batch_size);
+ if (CONTROL_PROTOCOL__IGNORE_DYNAMIC_BATCH_SIZE == dynamic_batch_size) {
+ LOGGER__TRACE("Received CONTROL_PROTOCOL__IGNORE_DYNAMIC_BATCH_SIZE == batch_size");
+ return HAILO_SUCCESS;
+ }
+
+ for (auto &key_buff_pair : m_intermediate_buffers) {
+ const auto status = key_buff_pair.second.set_dynamic_batch_size(dynamic_batch_size);
CHECK_SUCCESS(status);
}
Expected<Buffer> ResourcesManager::read_intermediate_buffer(const IntermediateBufferKey &key)
{
- auto inter_context_buffer_it = m_inter_context_buffers.find(key);
- if (std::end(m_inter_context_buffers) != inter_context_buffer_it) {
- return inter_context_buffer_it->second.read();
- }
-
- const auto dynamic_context_index = key.first;
- const size_t context_index = dynamic_context_index + CONTROL_PROTOCOL__CONTEXT_SWITCH_NUMBER_OF_NON_DYNAMIC_CONTEXTS;
- CHECK_AS_EXPECTED(context_index < m_contexts_resources.size(), HAILO_NOT_FOUND, "Context index {} out of range",
- dynamic_context_index);
- const auto d2h_stream_index = key.second;
- if (auto ddr_channels_pair = m_contexts_resources[context_index].get_ddr_channels_pair(d2h_stream_index)) {
- return ddr_channels_pair->get().read();
- }
-
- LOGGER__ERROR("Failed to find intermediate buffer for src_context {}, src_stream_index {}", key.first,
+ auto intermediate_buffer_it = m_intermediate_buffers.find(key);
+ CHECK_AS_EXPECTED(std::end(m_intermediate_buffers) != intermediate_buffer_it,
+ HAILO_NOT_FOUND, "Failed to find intermediate buffer for src_context {}, src_stream_index {}", key.first,
key.second);
- return make_unexpected(HAILO_NOT_FOUND);
-
+ return intermediate_buffer_it->second.read();
}
hailo_status ResourcesManager::configure()
return HAILO_SUCCESS;
}
-hailo_status ResourcesManager::enable_state_machine(uint16_t dynamic_batch_size)
+hailo_status ResourcesManager::enable_state_machine(uint16_t dynamic_batch_size, uint16_t batch_count)
{
- return Control::enable_core_op(m_vdma_device, m_core_op_index, dynamic_batch_size);
+ return Control::enable_core_op(m_vdma_device, m_core_op_index, dynamic_batch_size, batch_count);
}
hailo_status ResourcesManager::reset_state_machine(bool keep_nn_config_during_reset)
for (uint16_t transfer_index = 0; transfer_index < dynamic_batch_size; transfer_index++) {
const auto last_desc_interrupts_domain = ((dynamic_batch_size - 1) == transfer_index) ?
vdma::InterruptsDomain::DEVICE : vdma::InterruptsDomain::NONE;
- static const auto BUFFER_NOT_CIRCULAR = false;
auto desc_count_local = desc_list->program_last_descriptor(single_transfer_size,
- last_desc_interrupts_domain, acc_desc_offset, BUFFER_NOT_CIRCULAR);
+ last_desc_interrupts_domain, acc_desc_offset);
CHECK_EXPECTED(desc_count_local, "Failed to program descs for inter context channels. Given max_batch_size is too big.");
acc_desc_offset += desc_count_local.value();
}
}
Expected<std::pair<vdma::ChannelId, uint16_t>> ResourcesManager::create_mapped_buffer_for_hw_only_infer(
- vdma::BoundaryChannelPtr boundary_channel_ptr, const hailo_vdma_buffer_direction_flags_t direction,
+ vdma::BoundaryChannelPtr boundary_channel_ptr, const HailoRTDriver::DmaDirection direction,
const uint32_t single_transfer_size, const uint16_t dynamic_batch_size, const uint16_t batch_count)
{
auto total_frames_per_run = dynamic_batch_size * batch_count;
CHECK_AS_EXPECTED(IS_FIT_IN_UINT16(total_desc_count), HAILO_INVALID_ARGUMENT,
"calculated total_desc_count for vdma descriptor list is out of UINT16 range");
- auto mapped_buffer_exp = DmaMappedBuffer::create(total_desc_count * desc_list->desc_page_size(), direction, m_vdma_device);
- CHECK_EXPECTED(mapped_buffer_exp);
-
- auto mapped_buffer = make_shared_nothrow<DmaMappedBuffer>(mapped_buffer_exp.release());
- CHECK_NOT_NULL_AS_EXPECTED(mapped_buffer, HAILO_OUT_OF_HOST_MEMORY);
- m_hw_only_boundary_buffers.push_back(mapped_buffer);
+ auto mapped_buffer = vdma::MappedBuffer::create_shared(m_driver, direction, total_desc_count * desc_list->desc_page_size());
+ CHECK_EXPECTED(mapped_buffer);
+ m_hw_only_boundary_buffers.emplace_back(mapped_buffer.release());
uint32_t STARTING_DESC = 0;
- auto status = desc_list->configure_to_use_buffer(*mapped_buffer, boundary_channel_ptr->get_channel_id(), STARTING_DESC);
+ auto status = desc_list->configure_to_use_buffer(*m_hw_only_boundary_buffers.back(), boundary_channel_ptr->get_channel_id(), STARTING_DESC);
CHECK_SUCCESS_AS_EXPECTED(status);
auto desc_programed = program_desc_for_hw_only_flow(desc_list, single_transfer_size, dynamic_batch_size, batch_count);
channels_info.channel_count++;
}
+hailo_status ResourcesManager::set_hw_infer_done_notification(std::condition_variable &infer_done_cond)
+{
+ auto callback = [](Device &device, const hailo_notification_t ¬ification, void *opaque) {
+ (void)device;
+
+ if (HAILO_NOTIFICATION_ID_HW_INFER_MANAGER_INFER_DONE != notification.id) {
+ LOGGER__ERROR("Notification id passed to hw infer callback is invalid");
+ }
+
+ static_cast<std::condition_variable *>(opaque)->notify_one();
+ return;
+ };
+
+ auto status = get_device().set_notification_callback(callback,
+ HAILO_NOTIFICATION_ID_HW_INFER_MANAGER_INFER_DONE, static_cast<void *>(&infer_done_cond));
+ CHECK_SUCCESS(status);
+
+ return HAILO_SUCCESS;
+}
+
Expected<uint16_t> ResourcesManager::calc_hw_infer_batch_count(uint16_t dynamic_batch_size)
{
uint16_t batch_count = UINT16_MAX;
for (const auto &layer_info : m_core_op_metadata->get_all_layer_infos()) {
const auto stream_info = LayerInfoUtils::get_stream_info_from_layer_info(layer_info);
- const auto single_transfer_size = (HAILO_FORMAT_ORDER_HAILO_NMS == stream_info.format.order) ?
- stream_info.nms_info.bbox_size : stream_info.hw_frame_size;
+ uint32_t single_transfer_size = LayerInfoUtils::get_stream_transfer_size(stream_info, layer_info);
auto boundary_channel_ptr_exp = get_boundary_vdma_channel_by_stream_name(layer_info.name);
CHECK_EXPECTED(boundary_channel_ptr_exp);
auto boundary_channel_ptr = boundary_channel_ptr_exp.release();
return batch_count;
}
-void ResourcesManager::hw_infer_calc_stats(uint16_t batch_count, uint16_t dynamic_batch_size,
+HwInferResults ResourcesManager::hw_infer_calc_stats(uint16_t batch_count, uint16_t dynamic_batch_size,
size_t single_frame_transfer_size, uint32_t infer_cycles)
{
- const auto total_transfer_size = single_frame_transfer_size * dynamic_batch_size * batch_count;
- const auto total_frames = dynamic_batch_size * batch_count;
+ HwInferResults hw_infer_results{};
+ const size_t total_transfer_size = single_frame_transfer_size * dynamic_batch_size * batch_count;
+ const size_t total_frames_passed = dynamic_batch_size * batch_count;
// TODO - get clock rate from Chip (still not supported in VPU mode)
const float32_t CPU_CLOCK_RATE = static_cast<float32_t>(5.0 / (1000 * 1000 * 1000));
const float32_t time_sec = static_cast<float32_t>(infer_cycles) * CPU_CLOCK_RATE;
- const float32_t fps = static_cast<float32_t>(total_frames) / time_sec;
+ const float32_t fps = static_cast<float32_t>(total_frames_passed) / time_sec;
const float32_t BYTE_TO_BIT = 8.0;
const float32_t BITS_TO_GBIT = static_cast<float32_t>(1.0 * 1000 * 1000 * 1000);
const float32_t BW_Gbps = static_cast<float32_t>(total_transfer_size) * BYTE_TO_BIT / time_sec / BITS_TO_GBIT;
- LOGGER__ERROR("\nBatch count - {}\nTotal transfer size: {}\ntotal_frames - {}\ntime_sec - {}\nfps - {}\nBW_Gbps - {}",
- batch_count, total_transfer_size, total_frames, time_sec, fps, BW_Gbps);
+
+ /* Prepare results */
+ hw_infer_results.batch_count = batch_count;
+ hw_infer_results.total_transfer_size = total_transfer_size;
+ hw_infer_results.total_frames_passed = total_frames_passed;
+ hw_infer_results.time_sec = time_sec;
+ hw_infer_results.fps = fps;
+ hw_infer_results.BW_Gbps = BW_Gbps;
+
+ return hw_infer_results;
}
-Expected<CONTROL_PROTOCOL__hw_only_infer_results_t> ResourcesManager::run_hw_only_infer(uint16_t dynamic_batch_size)
+Expected<HwInferResults> ResourcesManager::run_hw_only_infer()
{
- CONTROL_PROTOCOL__hw_only_infer_results_t infer_results = {};
- CONTROL_PROTOCOL__hw_infer_channels_info_t channels_info = {};
+ CONTROL_PROTOCOL__hw_only_infer_results_t fw_infer_results{};
+ CONTROL_PROTOCOL__hw_infer_channels_info_t channels_info{};
channels_info.channel_count = 0;
+ static constexpr auto INFER_TIMEOUT = std::chrono::milliseconds(120000);
- CHECK_AS_EXPECTED(dynamic_batch_size <= m_config_params.batch_size, HAILO_INVALID_ARGUMENT,
- "Dynamic batch size must be up to configured batch size");
-
- auto batch_count = calc_hw_infer_batch_count(dynamic_batch_size);
+ auto batch_count = calc_hw_infer_batch_count(m_config_params.batch_size);
CHECK_EXPECTED(batch_count);
for (const auto &layer_info : m_core_op_metadata->get_all_layer_infos()) {
auto single_transfer_size = (HAILO_FORMAT_ORDER_HAILO_NMS == stream_info.format.order) ?
stream_info.nms_info.bbox_size : stream_info.hw_frame_size;
const auto direction = (layer_info.direction == HAILO_H2D_STREAM) ?
- HAILO_VDMA_BUFFER_DIRECTION_FLAGS_H2D : HAILO_VDMA_BUFFER_DIRECTION_FLAGS_D2H;
+ HailoRTDriver::DmaDirection::H2D : HailoRTDriver::DmaDirection::D2H;
auto channel_info_pair = create_mapped_buffer_for_hw_only_infer(boundary_channel_ptr.release(), direction,
- single_transfer_size, dynamic_batch_size, batch_count.value());
+ single_transfer_size, m_config_params.batch_size, batch_count.value());
CHECK_EXPECTED(channel_info_pair);
add_channel_to_hw_infer_channel_info(channel_info_pair.release(), channels_info);
}
- auto status = Control::start_hw_only_infer(m_vdma_device, m_core_op_index, dynamic_batch_size, &channels_info);
+ std::condition_variable infer_done_cond;
+ auto status = set_hw_infer_done_notification(infer_done_cond);
+ CHECK_SUCCESS_AS_EXPECTED(status);
+
+ std::mutex mutex;
+ std::unique_lock<std::mutex> lock(mutex);
+
+ status = Control::start_hw_only_infer(m_vdma_device, m_core_op_index, m_config_params.batch_size,
+ batch_count.value(), &channels_info);
CHECK_SUCCESS_AS_EXPECTED(status);
- // Delay until infer ends
- // TODO HRT-9829 - chagne to notification from FW
- std::this_thread::sleep_for(std::chrono::milliseconds(20000));
+ infer_done_cond.wait_for(lock, INFER_TIMEOUT);
- status = Control::stop_hw_only_infer(m_vdma_device, &infer_results);
+ status = Control::stop_hw_only_infer(m_vdma_device, &fw_infer_results);
CHECK_SUCCESS_AS_EXPECTED(status);
auto single_frame_transfer_size = m_core_op_metadata->get_total_transfer_size();
CHECK_EXPECTED(single_frame_transfer_size);
- hw_infer_calc_stats(batch_count.value(), dynamic_batch_size, single_frame_transfer_size.release(), infer_results.infer_cycles);
-
- return infer_results;
+ return hw_infer_calc_stats(batch_count.value(), m_config_params.batch_size, single_frame_transfer_size.release(),
+ fw_infer_results.infer_cycles);
}
} /* namespace hailort */
#include "hailo/hailort.h"
-#include "core_op/resource_manager/inter_context_buffer.hpp"
-#include "core_op/resource_manager/ddr_channels_pair.hpp"
+#include "core_op/resource_manager/intermediate_buffer.hpp"
#include "core_op/resource_manager/config_buffer.hpp"
#include "core_op/resource_manager/channel_allocator.hpp"
#include "core_op/resource_manager/context_switch_buffer_builder.hpp"
{
#define DEFAULT_ACTUAL_BATCH_SIZE (1)
+#define MAX_NUMBER_DATA_STREAM_INDEX (20)
struct EdgeLayer {
CONTROL_PROTOCOL__host_buffer_info_t buffer_info;
};
+struct DdrChannelsInfo
+{
+ vdma::ChannelId d2h_channel_id;
+ uint8_t d2h_stream_index;
+ vdma::ChannelId h2d_channel_id;
+ uint8_t h2d_stream_index;
+ CONTROL_PROTOCOL__host_buffer_info_t host_buffer_info;
+ uint8_t network_index;
+ uint16_t row_size;
+ uint16_t min_buffered_rows;
+ // total_buffers_per_frame not same as core_buffer_per frame.
+ //(In DDR core buffer per frame is 1). Used to calc total host descriptors_per_frame.
+ uint16_t total_buffers_per_frame;
+
+ // Checks if the credits are automaticaly going from d2h channel to its h2d channel, or it needs to be done manually
+ // (Using a fw task).
+ bool need_manual_credit_management() const
+ {
+ return host_buffer_info.buffer_type == CONTROL_PROTOCOL__HOST_BUFFER_TYPE_EXTERNAL_DESC;
+ }
+
+ uint16_t descs_count() const
+ {
+ assert(IS_FIT_IN_UINT16(host_buffer_info.total_desc_count));
+ return static_cast<uint16_t>(host_buffer_info.total_desc_count);
+ }
+
+ uint32_t descriptors_per_frame() const
+ {
+ return (row_size / host_buffer_info.desc_page_size) * total_buffers_per_frame;
+ }
+};
+
class ContextResources final {
public:
static Expected<ContextResources> create(HailoRTDriver &driver, CONTROL_PROTOCOL__context_switch_context_type_t context_type,
const std::vector<CONTROL_PROTOCOL__context_switch_context_info_single_control_t> &get_controls() const;
ContextSwitchBufferBuilder &builder();
- void add_edge_layer(const LayerInfo &layer_info, vdma::ChannelId channel_id,
- const CONTROL_PROTOCOL__host_buffer_info_t &buffer_info);
+ hailo_status add_edge_layer(const LayerInfo &layer_info, vdma::ChannelId channel_id,
+ const CONTROL_PROTOCOL__host_buffer_info_t &buffer_info, const SupportedFeatures &supported_features);
+ void add_ddr_channels_info(const DdrChannelsInfo &ddr_info);
std::vector<EdgeLayer> get_edge_layers() const;
std::vector<EdgeLayer> get_edge_layers(LayerType layer_type) const;
std::vector<EdgeLayer> get_edge_layers(hailo_stream_direction_t direction) const;
std::vector<EdgeLayer> get_edge_layers(LayerType layer_type, hailo_stream_direction_t direction) const;
- Expected<EdgeLayer> get_edge_layer_by_stream_index(uint8_t stream_index) const;
+ Expected<EdgeLayer> get_edge_layer_by_stream_index(const uint8_t stream_index,
+ const hailo_stream_direction_t direction) const;
- ExpectedRef<DdrChannelsPair> create_ddr_channels_pair(const DdrChannelsInfo &ddr_info);
- ExpectedRef<const DdrChannelsPair> get_ddr_channels_pair(uint8_t d2h_stream_index) const;
- const std::vector<DdrChannelsPair> &get_ddr_channels_pairs() const;
+ Expected<DdrChannelsInfo> get_ddr_channels_info(uint8_t d2h_stream_index) const;
+ const std::vector<DdrChannelsInfo> &get_ddr_channels_infos() const;
- hailo_status validate_edge_layers();
+ hailo_status validate_edge_layer(const LayerInfo &layer_info, vdma::ChannelId channel_id,
+ const SupportedFeatures &supported_features);
std::vector<ConfigBuffer> &get_config_buffers();
std::reference_wrapper<HailoRTDriver> m_driver;
ContextSwitchBufferBuilder m_builder;
std::vector<ConfigBuffer> m_config_buffers;
- std::vector<DdrChannelsPair> m_ddr_channels_pairs;
std::vector<EdgeLayer> m_edge_layers;
+ std::vector<DdrChannelsInfo> m_ddr_channels_infos;
};
class ResourcesManager final
ResourcesManager &operator=(ResourcesManager &&other) = delete;
ResourcesManager(ResourcesManager &&other) noexcept;
- ExpectedRef<InterContextBuffer> create_inter_context_buffer(uint32_t transfer_size, uint8_t src_stream_index,
- uint8_t src_context_index, const std::string &network_name, vdma::ChannelId d2h_channel_id);
- ExpectedRef<InterContextBuffer> get_inter_context_buffer(const IntermediateBufferKey &key);
+ ExpectedRef<IntermediateBuffer> create_intermediate_buffer(uint32_t transfer_size, uint16_t batch_size,
+ uint8_t src_stream_index, uint8_t src_context_index, vdma::ChannelId d2h_channel_id,
+ IntermediateBuffer::StreamingType streaming_type);
+ ExpectedRef<IntermediateBuffer> get_intermediate_buffer(const IntermediateBufferKey &key);
+ Expected<vdma::BoundaryChannel::Type> get_boundary_vdma_channel_type(const LayerInfo &layer_info);
hailo_status create_boundary_vdma_channel(const LayerInfo &layer_info);
Expected<CONTROL_PROTOCOL__application_header_t> get_control_core_op_header();
Expected<Buffer> read_intermediate_buffer(const IntermediateBufferKey &key);
- hailo_status set_inter_context_channels_dynamic_batch_size(uint16_t dynamic_batch_size);
+ hailo_status set_dynamic_batch_size(uint16_t dynamic_batch_size);
hailo_status configure();
- hailo_status enable_state_machine(uint16_t dynamic_batch_size);
+ hailo_status enable_state_machine(uint16_t dynamic_batch_size,
+ uint16_t batch_count = CONTROL_PROTOCOL__INIFINITE_BATCH_COUNT);
hailo_status reset_state_machine(bool keep_nn_config_during_reset = false);
hailo_status cancel_pending_async_transfers();
hailo_status start_vdma_interrupts_dispatcher();
Expected<uint16_t> program_desc_for_hw_only_flow(std::shared_ptr<vdma::DescriptorList> desc_list,
const uint32_t single_transfer_size, const uint16_t dynamic_batch_size, const uint16_t batch_count);
Expected<std::pair<vdma::ChannelId, uint16_t>> create_mapped_buffer_for_hw_only_infer(
- vdma::BoundaryChannelPtr boundary_channel_ptr, const hailo_vdma_buffer_direction_flags_t direction,
+ vdma::BoundaryChannelPtr boundary_channel_ptr, const HailoRTDriver::DmaDirection direction,
const uint32_t single_transfer_size, const uint16_t dynamic_batch_size, const uint16_t batch_count);
void add_channel_to_hw_infer_channel_info(std::pair<vdma::ChannelId, uint16_t> channel_info,
CONTROL_PROTOCOL__hw_infer_channels_info_t &channels_info);
Expected<uint16_t> calc_hw_infer_batch_count(uint16_t dynamic_batch_size);
- void hw_infer_calc_stats(uint16_t batch_count, uint16_t dynamic_batch_size,
+ HwInferResults hw_infer_calc_stats(uint16_t batch_count, uint16_t dynamic_batch_size,
size_t single_frame_transfer_size, uint32_t infer_cycles);
- Expected<CONTROL_PROTOCOL__hw_only_infer_results_t> run_hw_only_infer(uint16_t dynamic_batch_size);
+ hailo_status set_hw_infer_done_notification(std::condition_variable &infer_done_cond);
+ Expected<HwInferResults> run_hw_only_infer();
private:
hailo_status fill_infer_features(CONTROL_PROTOCOL__application_header_t &app_header);
VdmaDevice &m_vdma_device;
HailoRTDriver &m_driver;
const ConfigureNetworkParams m_config_params;
- std::map<IntermediateBufferKey, InterContextBuffer> m_inter_context_buffers;
+ std::map<IntermediateBufferKey, IntermediateBuffer> m_intermediate_buffers;
std::shared_ptr<CoreOpMetadata> m_core_op_metadata;
uint8_t m_core_op_index;
uint8_t m_dynamic_context_count;
// config_stream_index.
std::vector<vdma::ChannelId> m_config_channels_ids;
// Mapped buffers would be used only in hw only flow
- std::vector<std::shared_ptr<DmaMappedBuffer>> m_hw_only_boundary_buffers;
+ std::vector<std::shared_ptr<vdma::MappedBuffer>> m_hw_only_boundary_buffers;
ResourcesManager(VdmaDevice &vdma_device, HailoRTDriver &driver,
ChannelAllocator &&channel_allocator, const ConfigureNetworkParams config_params,
{
-static uint16_t calculate_periph_buffers_per_frame(const CONTROL_PROTOCOL__hw_consts_t &hw_consts,
+static uint16_t calculate_power_optimized_periph_buffers_per_frame(const CONTROL_PROTOCOL__hw_consts_t &hw_consts,
uint16_t min_periph_buffers_per_frame, uint32_t frame_size, uint16_t periph_buffers_per_frame)
{
const auto max_periph_buffers_per_frame = MIN(frame_size, static_cast<uint32_t>(hw_consts.max_periph_buffers_per_frame));
}
}
-static hailo_status calculate_credit_params(const CONTROL_PROTOCOL__hw_consts_t &hw_consts, uint16_t desc_page_size,
- hailo_stream_direction_t direction, bool should_optimize_credits, uint16_t *periph_bytes_per_buffer,
- uint16_t *periph_buffers_per_frame)
+static Expected<LayerInfo> calculate_credit_params(const CONTROL_PROTOCOL__hw_consts_t &hw_consts, uint16_t desc_page_size,
+ bool should_optimize_credits, const LayerInfo &layer_info)
{
// Next parameters differ between RX and TX
- auto local_periph_bytes_per_buffer = (*periph_bytes_per_buffer);
- auto local_periph_buffers_per_frame = (*periph_buffers_per_frame);
- uint32_t periph_frame_size = (*periph_bytes_per_buffer) * (*periph_buffers_per_frame);
- const auto max_bytes_per_buffer = MAX(hw_consts.max_acceptable_bytes_per_buffer, (*periph_bytes_per_buffer));
+ auto local_periph_bytes_per_buffer = layer_info.nn_stream_config.periph_bytes_per_buffer;
+ auto local_periph_buffers_per_frame = layer_info.nn_stream_config.periph_buffers_per_frame;
+ uint32_t periph_frame_size = local_periph_bytes_per_buffer * local_periph_buffers_per_frame;
+ const auto max_bytes_per_buffer = MAX(hw_consts.max_acceptable_bytes_per_buffer, local_periph_bytes_per_buffer);
- if (0 != (local_periph_bytes_per_buffer % hw_consts.fifo_word_granularity_bytes)) {
- return HAILO_INTERNAL_FAILURE;
- }
+ CHECK_AS_EXPECTED(0 == (local_periph_bytes_per_buffer % hw_consts.fifo_word_granularity_bytes), HAILO_INTERNAL_FAILURE,
+ "Error, Invalid periph bytes ber puffer value {} must divide by {} with no remainder",
+ local_periph_bytes_per_buffer, hw_consts.fifo_word_granularity_bytes);
if (should_optimize_credits) {
// If credits optimizations flag is on, assuming periph_buffers_per_frame * periph_bytes_per_buffer == periph_frame_size
// Find the lowest periph_buffers_per_frame that divides periph_frame_size and is bigger than periph_frame_size / max_bytes_per_buffer
// Also, periph_bytes_per_buffer must be a multiple of 8
const auto min_periph_buffers_per_frame = DIV_ROUND_UP(periph_frame_size, max_bytes_per_buffer);
- local_periph_buffers_per_frame = calculate_periph_buffers_per_frame(hw_consts, static_cast<uint16_t>(min_periph_buffers_per_frame),
- periph_frame_size, local_periph_buffers_per_frame);
+ local_periph_buffers_per_frame = calculate_power_optimized_periph_buffers_per_frame(hw_consts,
+ static_cast<uint16_t>(min_periph_buffers_per_frame), periph_frame_size, local_periph_buffers_per_frame);
assert(IS_FIT_IN_UINT16(periph_frame_size / local_periph_buffers_per_frame));
local_periph_bytes_per_buffer = static_cast<uint16_t>(periph_frame_size / local_periph_buffers_per_frame); // Must be integer according to last function
}
// Periph credits size must be lower than the following value to make sure that the credit size allows
// for at least desc_page_size bytes left in the FIFO for the last descriptor in the pattern
- if ((direction == HAILO_D2H_STREAM) &&
- (static_cast<uint32_t>(local_periph_bytes_per_buffer) > (hw_consts.outbound_data_stream_size - 8 - desc_page_size))) {
- LOGGER__ERROR("Current periph_bytes_per_buffer is {} which is too high. Exiting.", local_periph_bytes_per_buffer);
- return HAILO_INTERNAL_FAILURE;
+ const bool space_left_in_fifo = ((layer_info.direction != HAILO_D2H_STREAM) ||
+ (static_cast<uint32_t>(local_periph_bytes_per_buffer) <= (hw_consts.outbound_data_stream_size - 8 - desc_page_size)));
+ CHECK_AS_EXPECTED(space_left_in_fifo, HAILO_INTERNAL_FAILURE,
+ "Current periph_bytes_per_buffer is {} which is too high. Exiting.", local_periph_bytes_per_buffer);
+
+ auto updated_layer_info = layer_info;
+ updated_layer_info.nn_stream_config.periph_bytes_per_buffer = local_periph_bytes_per_buffer;
+ updated_layer_info.nn_stream_config.periph_buffers_per_frame = local_periph_buffers_per_frame;
+
+ return updated_layer_info;
+}
+
+// NOTE: in case of ddr where periph is aligned to PERIPH_BYTES_PER_BUFFER_DDR_ALIGNMENT_SIZE we cant force that
+// periph_bytes_per_buffer * periph_buffers_per_frame will equal exactly hw_frame_size.
+static bool is_logical_periph_bytes_per_buffer(const uint32_t periph_bytes_per_buffer, const size_t hw_frame_size, const bool is_ddr,
+ const uint32_t max_shmifo_size, const uint32_t desc_page_size, const uint32_t max_periph_bytes_value,
+ const uint16_t core_bytes_per_buffer)
+{
+ if (is_ddr) {
+ // In DDR there is no residue of descriptor - but has to divide with no remainder by core_bytes_per_buffer
+ // Calculated by DFC
+ return (periph_bytes_per_buffer < max_shmifo_size) && (periph_bytes_per_buffer <= max_periph_bytes_value) &&
+ (0 == (core_bytes_per_buffer % periph_bytes_per_buffer));
}
+ return ((periph_bytes_per_buffer < (max_shmifo_size - desc_page_size)) &&
+ (0 == (hw_frame_size % periph_bytes_per_buffer)) && (periph_bytes_per_buffer <= max_periph_bytes_value));
+}
- *periph_bytes_per_buffer = local_periph_bytes_per_buffer;
- *periph_buffers_per_frame = local_periph_buffers_per_frame;
- return HAILO_SUCCESS;
+static Expected<std::tuple<uint16_t, uint16_t>> calculate_periph_requirements(const LayerInfo &layer_info, const uint32_t desc_page_size,
+ const bool is_periph_calculated_in_hailort, const uint32_t max_periph_bytes_value)
+{
+ // If extension for calculating periph values in hailort is false - copy values from core registers , otherwise
+ // If extesnion is true - calculate them according to shape and other layer information
+ if (!is_periph_calculated_in_hailort) {
+ return std::make_tuple(static_cast<uint16_t>(layer_info.nn_stream_config.core_bytes_per_buffer),
+ static_cast<uint16_t>(layer_info.nn_stream_config.core_buffers_per_frame));
+ }
+
+ if (HAILO_FORMAT_ORDER_HAILO_NMS == layer_info.format.order) {
+ CHECK_AS_EXPECTED(IS_FIT_IN_UINT16(layer_info.nms_info.bbox_size * layer_info.nms_info.burst_size),
+ HAILO_INVALID_HEF, "Invalid burst size");
+ return std::make_tuple(static_cast<uint16_t>(layer_info.nms_info.bbox_size * layer_info.nms_info.burst_size),
+ static_cast<uint16_t>(1));
+ }
+
+ CHECK_AS_EXPECTED(IS_FIT_IN_UINT32(layer_info.hw_shape.width * layer_info.hw_shape.features *
+ layer_info.hw_shape.height * layer_info.hw_data_bytes), HAILO_INVALID_HEF, "Invalid core frame size");
+
+ const auto is_ddr = (LayerType::DDR == layer_info.type);
+ const uint32_t alignment = is_ddr ? PERIPH_BYTES_PER_BUFFER_DDR_ALIGNMENT_SIZE : PERIPH_BYTES_PER_BUFFER_ALIGNMENT_SIZE;
+ const auto row_size = static_cast<uint32_t>(layer_info.hw_shape.width * layer_info.hw_shape.features *
+ layer_info.hw_data_bytes);
+ const auto core_frame_size = layer_info.hw_shape.height * row_size;
+
+ // Currently takes the largest periph_bytes_per_buffer that is possible with shmifo size and desc page size
+ // TODO HRT-10961 : calculate optimal periph size
+ auto periph_bytes_per_buffer = HailoRTCommon::align_to(row_size, alignment);
+ while (!is_logical_periph_bytes_per_buffer(periph_bytes_per_buffer, core_frame_size, is_ddr, layer_info.max_shmifo_size,
+ desc_page_size, max_periph_bytes_value, layer_info.nn_stream_config.core_bytes_per_buffer) && (0 < periph_bytes_per_buffer)) {
+ periph_bytes_per_buffer -= alignment;
+ }
+
+ CHECK_AS_EXPECTED(0 != periph_bytes_per_buffer, HAILO_INVALID_ARGUMENT, "Error, Could not find logical periph bytes per buffer value");
+
+ uint32_t periph_buffers_per_frame = (core_frame_size / periph_bytes_per_buffer);
+ // In ddr if we get a periph bytes per buffer os small that the periph buffers per frame cant fit in uint16
+ // put uint16_t max - seeing as this value doesnt really affect anything and we should not fail in that case.
+ if (is_ddr && !IS_FIT_IN_UINT16(periph_buffers_per_frame)) {
+ LOGGER__DEBUG("periph buffers per frame in ddr too large for 16 bit register - putting uint16_t max");
+ periph_buffers_per_frame = UINT16_MAX;
+ }
+ CHECK_AS_EXPECTED(IS_FIT_IN_UINT16(periph_buffers_per_frame), HAILO_INVALID_ARGUMENT);
+
+ return std::make_tuple(static_cast<uint16_t>(periph_bytes_per_buffer), static_cast<uint16_t>(periph_buffers_per_frame));
}
static Expected<LayerInfo> update_layer_info(const LayerInfo &original_layer_info,
const CONTROL_PROTOCOL__host_buffer_info_t &buffer_info,
- const CONTROL_PROTOCOL__hw_consts_t &hw_consts, bool should_optimize_credits)
+ const CONTROL_PROTOCOL__hw_consts_t &hw_consts, const ProtoHEFHwArch &hw_arch, const bool should_optimize_credits,
+ const bool is_periph_calculated_in_hailort)
{
LayerInfo local_layer_info = original_layer_info;
- auto status = calculate_credit_params(hw_consts, buffer_info.desc_page_size, local_layer_info.direction,
- should_optimize_credits, &local_layer_info.nn_stream_config.periph_bytes_per_buffer,
- &local_layer_info.nn_stream_config.periph_buffers_per_frame);
- CHECK_SUCCESS_AS_EXPECTED(status);
-
if (local_layer_info.max_shmifo_size == 0) {
local_layer_info.max_shmifo_size = hw_consts.default_initial_credit_size;
}
- return local_layer_info;
+ // If Hw padding supported dont update periph registers because they were updated in get_hw_padding
+ // TODO HRT-11006 : currently check is_hw_padding_supported and the feature_padding_payload because in MIPI Input stream
+ // Even if is_hw_padding_supported is true we will not use hw padding.
+ auto max_periph_bytes_from_hef = HefConfigurator::max_periph_bytes_value(DeviceBase::hef_arch_to_device_arch(hw_arch));
+ CHECK_EXPECTED(max_periph_bytes_from_hef);
+ const auto max_periph_bytes = MIN(max_periph_bytes_from_hef.value(), local_layer_info.max_shmifo_size);
+
+ const bool hw_padding_supported = HefConfigurator::is_hw_padding_supported(local_layer_info,
+ max_periph_bytes) && (0 != original_layer_info.nn_stream_config.feature_padding_payload);
+ if (!hw_padding_supported) {
+ // Update periph values
+ const auto periph_requirements = calculate_periph_requirements(local_layer_info, buffer_info.desc_page_size,
+ is_periph_calculated_in_hailort, max_periph_bytes);
+ CHECK_EXPECTED(periph_requirements);
+
+ // Calculate and update value of periph bytes per buffer and periph buffers per frame
+ local_layer_info.nn_stream_config.periph_bytes_per_buffer = std::get<0>(periph_requirements.value());
+ local_layer_info.nn_stream_config.periph_buffers_per_frame = std::get<1>(periph_requirements.value());
+ }
+
+ auto updated_local_layer_info = calculate_credit_params(hw_consts, buffer_info.desc_page_size, should_optimize_credits,
+ local_layer_info);
+ CHECK_EXPECTED(updated_local_layer_info);
+
+ return updated_local_layer_info;
}
static hailo_status fill_boundary_input_layer(ContextResources &context_resources,
ResourcesManager &resources_manager, const LayerInfo layer_info, const CONTROL_PROTOCOL__hw_consts_t &hw_consts,
- bool should_optimize_credits)
+ const ProtoHEFHwArch &hw_arch, bool should_optimize_credits)
{
- const auto transfer_size = (layer_info.nn_stream_config.periph_bytes_per_buffer *
- layer_info.nn_stream_config.core_buffers_per_frame);
+ const auto transfer_size = LayerInfoUtils::get_layer_transfer_size(layer_info);
auto vdma_channel = resources_manager.get_boundary_vdma_channel_by_stream_name(layer_info.name);
CHECK_EXPECTED_AS_STATUS(vdma_channel);
const auto buffer_info = vdma_channel.value()->get_boundary_buffer_info(transfer_size);
- auto local_layer_info = update_layer_info(layer_info, buffer_info, hw_consts, should_optimize_credits);
+ const bool is_periph_calculated_in_hailort = resources_manager.get_supported_features().periph_calculation_in_hailort;
+ auto local_layer_info = update_layer_info(layer_info, buffer_info, hw_consts, hw_arch, should_optimize_credits,
+ is_periph_calculated_in_hailort);
CHECK_EXPECTED_AS_STATUS(local_layer_info);
const auto channel_id = vdma_channel.value()->get_channel_id();
- context_resources.add_edge_layer(local_layer_info.value(), channel_id, buffer_info);
+ auto status = context_resources.add_edge_layer(local_layer_info.value(), channel_id, buffer_info,
+ resources_manager.get_supported_features());
+ CHECK_SUCCESS(status);
LOGGER__DEBUG("Boundary input stream: {} h2d_channel: {}.", layer_info.stream_index, channel_id);
return HAILO_SUCCESS;
static hailo_status fill_inter_context_input_layer(ContextResources &context_resources,
ResourcesManager &resources_manager, const LayerInfo &layer_info, const CONTROL_PROTOCOL__hw_consts_t &hw_consts,
- bool should_optimize_credits)
+ const ProtoHEFHwArch &hw_arch, bool should_optimize_credits)
{
const auto channel_id = resources_manager.get_available_channel_id(to_layer_identifier(layer_info),
HailoRTDriver::DmaDirection::H2D, layer_info.dma_engine_index);
/* Get inter context buffer previously created */
const auto &connected_context = layer_info.connected_context_info;
auto intermediate_buffer_key = std::make_pair(connected_context.context_index, connected_context.stream_index);
- auto inter_context_buffer_exp = resources_manager.get_inter_context_buffer(intermediate_buffer_key);
+ auto inter_context_buffer_exp = resources_manager.get_intermediate_buffer(intermediate_buffer_key);
CHECK_EXPECTED_AS_STATUS(inter_context_buffer_exp, "Failed to find inter context buffer for src context {}, src_stream_index {}",
connected_context.context_index, connected_context.stream_index);
auto &inter_context_buffer = inter_context_buffer_exp->get();
+ const bool is_periph_calculated_in_hailort = resources_manager.get_supported_features().periph_calculation_in_hailort;
auto local_layer_info = update_layer_info(layer_info, inter_context_buffer.get_host_buffer_info(), hw_consts,
- should_optimize_credits);
+ hw_arch, should_optimize_credits, is_periph_calculated_in_hailort);
CHECK_EXPECTED_AS_STATUS(local_layer_info);
- context_resources.add_edge_layer(local_layer_info.value(), channel_id.value(),
- inter_context_buffer.get_host_buffer_info());
+ auto status = context_resources.add_edge_layer(local_layer_info.value(), channel_id.value(),
+ inter_context_buffer.get_host_buffer_info(), resources_manager.get_supported_features());
+ CHECK_SUCCESS(status);
LOGGER__DEBUG("Intermediate input stream {}, src_context:{}, dst_context: {}, h2d_channel {}.",
layer_info.stream_index, layer_info.context_index, layer_info.connected_context_info.context_index,
static hailo_status fill_boundary_output_layer(ContextResources &context_resources,
ResourcesManager &resources_manager, const LayerInfo &layer_info, const CONTROL_PROTOCOL__hw_consts_t &hw_consts,
- bool should_optimize_credits)
+ const ProtoHEFHwArch &hw_arch, bool should_optimize_credits)
{
- const auto transfer_size = (layer_info.nn_stream_config.periph_bytes_per_buffer *
- layer_info.nn_stream_config.core_buffers_per_frame);
+ const auto transfer_size = LayerInfoUtils::get_layer_transfer_size(layer_info);
auto vdma_channel = resources_manager.get_boundary_vdma_channel_by_stream_name(layer_info.name);
CHECK_EXPECTED_AS_STATUS(vdma_channel);
const auto buffer_info = vdma_channel.value()->get_boundary_buffer_info(transfer_size);
- auto local_layer_info = update_layer_info(layer_info, buffer_info, hw_consts, should_optimize_credits);
+ const bool is_periph_calculated_in_hailort = resources_manager.get_supported_features().periph_calculation_in_hailort;
+ auto local_layer_info = update_layer_info(layer_info, buffer_info, hw_consts, hw_arch, should_optimize_credits,
+ is_periph_calculated_in_hailort);
CHECK_EXPECTED_AS_STATUS(local_layer_info);
const auto channel_id = vdma_channel.value()->get_channel_id();
- context_resources.add_edge_layer(local_layer_info.value(), channel_id, buffer_info);
+ auto status = context_resources.add_edge_layer(local_layer_info.value(), channel_id, buffer_info,
+ resources_manager.get_supported_features());
+ CHECK_SUCCESS(status);
LOGGER__DEBUG("Boundary output stream: {} d2h_channel: {}.", layer_info.stream_index, channel_id);
return HAILO_SUCCESS;
static hailo_status fill_inter_context_output_layer(ContextResources &context_resources,
ResourcesManager &resources_manager, const LayerInfo &layer_info,
- const CONTROL_PROTOCOL__hw_consts_t &hw_consts, bool should_optimize_credits)
+ const CONTROL_PROTOCOL__hw_consts_t &hw_consts, const ProtoHEFHwArch &hw_arch, bool should_optimize_credits)
{
const auto channel_id = resources_manager.get_available_channel_id(to_layer_identifier(layer_info),
HailoRTDriver::DmaDirection::D2H, layer_info.dma_engine_index);
CHECK_EXPECTED_AS_STATUS(channel_id);
- const auto frame_credits_in_bytes = (layer_info.nn_stream_config.periph_bytes_per_buffer *
- layer_info.nn_stream_config.core_buffers_per_frame);
+ const auto frame_credits_in_bytes = LayerInfoUtils::get_layer_transfer_size(layer_info);
+
+ auto network_batch_size = resources_manager.get_network_batch_size(layer_info.network_name);
+ CHECK_EXPECTED_AS_STATUS(network_batch_size);
- auto inter_context_buffer_exp = resources_manager.create_inter_context_buffer(frame_credits_in_bytes,
- layer_info.stream_index, layer_info.context_index, layer_info.network_name, channel_id.value());
+ auto inter_context_buffer_exp = resources_manager.create_intermediate_buffer(frame_credits_in_bytes,
+ network_batch_size.value(), layer_info.stream_index, layer_info.context_index, channel_id.value(),
+ IntermediateBuffer::StreamingType::BURST);
CHECK_EXPECTED_AS_STATUS(inter_context_buffer_exp);
auto &inter_context_buffer = inter_context_buffer_exp->get();
+ const bool is_periph_calculated_in_hailort = resources_manager.get_supported_features().periph_calculation_in_hailort;
auto local_layer_info = update_layer_info(layer_info, inter_context_buffer.get_host_buffer_info(), hw_consts,
- should_optimize_credits);
+ hw_arch, should_optimize_credits, is_periph_calculated_in_hailort);
CHECK_EXPECTED_AS_STATUS(local_layer_info);
- context_resources.add_edge_layer(local_layer_info.value(), channel_id.value(),
- inter_context_buffer.get_host_buffer_info());
+ auto status = context_resources.add_edge_layer(local_layer_info.value(), channel_id.value(),
+ inter_context_buffer.get_host_buffer_info(), resources_manager.get_supported_features());
+ CHECK_SUCCESS(status);
LOGGER__DEBUG("Inter-context output stream {}, src_context:{}, d2h_channel {}.",
layer_info.stream_index, layer_info.context_index, channel_id.value());
static hailo_status fill_ddr_output_layer(ContextResources &context_resources,
ResourcesManager &resources_manager, const LayerInfo &layer_info,
- const CONTROL_PROTOCOL__hw_consts_t &hw_consts)
+ const CONTROL_PROTOCOL__hw_consts_t &hw_consts, const ProtoHEFHwArch &hw_arch)
{
CHECK(resources_manager.get_supported_features().padded_ddr_buffers, HAILO_INVALID_HEF,
"Failed opening non-compatible HEF that uses the following deprecated features: host-managed DDR buffers."
"Please re-compile the HEF using a newer Dataflow Compiler version (v3.11.0 or newer)");
- // Allocate resources and prepare ddr_info
-
- DdrChannelsInfo ddr_pair_info = {};
- ddr_pair_info.h2d_stream_index = layer_info.connected_context_info.stream_index;
- ddr_pair_info.d2h_stream_index = layer_info.stream_index;
- ddr_pair_info.network_index = layer_info.network_index;
- // It is assumed that output channels are parsed before input channels.
+ // It is assumed that output channels are parsed before input channels.
// Allocate vdma channel index for both edges
- const auto h2d_layer_identifier = std::make_tuple(LayerType::DDR, layer_info.name, ddr_pair_info.h2d_stream_index);
+ const auto h2d_stream_index = layer_info.connected_context_info.stream_index;
+ const auto h2d_layer_identifier = std::make_tuple(LayerType::DDR, HAILO_H2D_STREAM,
+ layer_info.name, h2d_stream_index);
const auto h2d_channel_id = resources_manager.get_available_channel_id(h2d_layer_identifier,
HailoRTDriver::DmaDirection::H2D, layer_info.connected_context_info.dma_engine_index);
CHECK_EXPECTED_AS_STATUS(h2d_channel_id);
- ddr_pair_info.h2d_channel_id = h2d_channel_id.value();
- const auto d2h_layer_identifier = std::make_tuple(LayerType::DDR, layer_info.name, ddr_pair_info.d2h_stream_index);
+ const auto d2h_stream_index = layer_info.stream_index;
+ const auto d2h_layer_identifier = std::make_tuple(LayerType::DDR, HAILO_D2H_STREAM,
+ layer_info.name, d2h_stream_index);
const auto d2h_channel_id = resources_manager.get_available_channel_id(d2h_layer_identifier,
HailoRTDriver::DmaDirection::D2H, layer_info.dma_engine_index);
CHECK_EXPECTED_AS_STATUS(d2h_channel_id);
- ddr_pair_info.d2h_channel_id = d2h_channel_id.value();
- ddr_pair_info.row_size = layer_info.nn_stream_config.core_bytes_per_buffer;
- ddr_pair_info.min_buffered_rows = layer_info.ddr_info.min_buffered_rows;
- ddr_pair_info.total_buffers_per_frame = layer_info.ddr_info.total_buffers_per_frame;
+ // In DDR layer there is no residue - so can ignore descriptor size
+ const auto IGNORE_DESCRIPTOR_SIZE = 0;
+ // Send layer info with updated shmifo size
+ auto layer_info_updated_shmifo = layer_info;
+ if (layer_info_updated_shmifo.max_shmifo_size == 0) {
+ layer_info_updated_shmifo.max_shmifo_size = hw_consts.default_initial_credit_size;
+ }
+
+ auto max_periph_bytes = HefConfigurator::max_periph_bytes_value(DeviceBase::hef_arch_to_device_arch(hw_arch));
+ CHECK_EXPECTED_AS_STATUS(max_periph_bytes, "Error calculating max periph bytes per buffer");
+ const auto periph_values = calculate_periph_requirements(layer_info_updated_shmifo, IGNORE_DESCRIPTOR_SIZE,
+ resources_manager.get_supported_features().periph_calculation_in_hailort, max_periph_bytes.value());
+ CHECK_EXPECTED_AS_STATUS(periph_values);
+
+ const auto row_size = std::get<0>(periph_values.value());
+ const auto min_buffered_rows = layer_info.ddr_info.min_buffered_rows;
- // Create the ddr buffer
- auto ddr_channels_pair = context_resources.create_ddr_channels_pair(ddr_pair_info);
- CHECK_EXPECTED_AS_STATUS(ddr_channels_pair);
+ // Allocate the ddr buffer
+ auto ddr_buffer = resources_manager.create_intermediate_buffer(row_size, min_buffered_rows,
+ d2h_stream_index, layer_info.context_index, d2h_channel_id.value(),
+ IntermediateBuffer::StreamingType::CIRCULAR_CONTINUOS);
+ CHECK_EXPECTED_AS_STATUS(ddr_buffer);
+
+ DdrChannelsInfo ddr_pair_info{};
+ ddr_pair_info.h2d_stream_index = h2d_stream_index;
+ ddr_pair_info.d2h_stream_index = d2h_stream_index;
+ ddr_pair_info.network_index = layer_info.network_index;
+ ddr_pair_info.h2d_channel_id = h2d_channel_id.value();
+ ddr_pair_info.d2h_channel_id = d2h_channel_id.value();
+ ddr_pair_info.row_size = row_size;
+ ddr_pair_info.min_buffered_rows = min_buffered_rows;
+ ddr_pair_info.total_buffers_per_frame = layer_info.ddr_info.total_buffers_per_frame;
+ ddr_pair_info.host_buffer_info = ddr_buffer->get().get_host_buffer_info();
+ context_resources.add_ddr_channels_info(ddr_pair_info);
// On ddr layers, we assume the periph credit size is aligned to the size of descriptor, so we don't want to
// optimize the credits.
const bool should_optimize_credits = false;
- auto local_layer_info = update_layer_info(layer_info, ddr_channels_pair->get().get_host_buffer_info(), hw_consts,
- should_optimize_credits);
+ const bool is_periph_calculated_in_hailort = resources_manager.get_supported_features().periph_calculation_in_hailort;
+ auto local_layer_info = update_layer_info(layer_info, ddr_buffer->get().get_host_buffer_info(), hw_consts,
+ hw_arch, should_optimize_credits, is_periph_calculated_in_hailort);
CHECK_EXPECTED_AS_STATUS(local_layer_info);
- context_resources.add_edge_layer(local_layer_info.value(), ddr_pair_info.d2h_channel_id,
- ddr_channels_pair->get().get_host_buffer_info());
+ auto status = context_resources.add_edge_layer(local_layer_info.value(), ddr_pair_info.d2h_channel_id,
+ ddr_buffer->get().get_host_buffer_info(), resources_manager.get_supported_features());
+ CHECK_SUCCESS(status);
return HAILO_SUCCESS;
}
-static hailo_status fill_ddr_input_layer(ContextResources &context_resources,
- const LayerInfo &layer_info, const CONTROL_PROTOCOL__hw_consts_t &hw_consts)
+static hailo_status fill_ddr_input_layer(ContextResources &context_resources, ResourcesManager &resources_manager,
+ const LayerInfo &layer_info, const CONTROL_PROTOCOL__hw_consts_t &hw_consts, const ProtoHEFHwArch &hw_arch)
{
auto connected_stream_index = layer_info.connected_context_info.stream_index;
- auto ddr_channels_pair = context_resources.get_ddr_channels_pair(connected_stream_index);
- CHECK(ddr_channels_pair, HAILO_INVALID_HEF, "Matching DDR layer as not found for context {} src stream {}",
+ auto ddr_info = context_resources.get_ddr_channels_info(connected_stream_index);
+ CHECK_EXPECTED_AS_STATUS(ddr_info, "Matching DDR layer as not found for context {} src stream {}",
layer_info.context_index, connected_stream_index);
-
- const auto ddr_info = ddr_channels_pair->get().info();
LOGGER__DEBUG("DDR layer: input stream_index: {}, output stream_index: {}, h2d_channel {}, d2h_channel: {}.",
- ddr_info.h2d_stream_index, ddr_info.d2h_stream_index, ddr_info.h2d_channel_id, ddr_info.d2h_channel_id);
+ ddr_info->h2d_stream_index, ddr_info->d2h_stream_index, ddr_info->h2d_channel_id, ddr_info->d2h_channel_id);
- CHECK(layer_info.stream_index == ddr_info.h2d_stream_index, HAILO_INVALID_HEF, "DDR channel pair mismatch in h2d channel");
- CHECK(layer_info.connected_context_info.stream_index == ddr_info.d2h_stream_index, HAILO_INVALID_HEF, "DDR channel pair mismatch in d2h channel");
- CHECK(layer_info.network_index == ddr_info.network_index, HAILO_INVALID_HEF, "DDR channel pair mismatch network_index");
+ CHECK(layer_info.stream_index == ddr_info->h2d_stream_index, HAILO_INVALID_HEF, "DDR channel pair mismatch in h2d channel");
+ CHECK(layer_info.connected_context_info.stream_index == ddr_info->d2h_stream_index, HAILO_INVALID_HEF, "DDR channel pair mismatch in d2h channel");
+ CHECK(layer_info.network_index == ddr_info->network_index, HAILO_INVALID_HEF, "DDR channel pair mismatch network_index");
// On ddr layers, we assume the periph credit size is aligned to the size of descriptor, so we don't want to
// optimize the credits.
const bool should_optimize_credits = false;
- auto local_layer_info = update_layer_info(layer_info, ddr_channels_pair->get().get_host_buffer_info(), hw_consts,
- should_optimize_credits);
+ const bool is_periph_calculated_in_hailort = resources_manager.get_supported_features().periph_calculation_in_hailort;
+ auto local_layer_info = update_layer_info(layer_info, ddr_info->host_buffer_info, hw_consts,
+ hw_arch, should_optimize_credits, is_periph_calculated_in_hailort);
CHECK_EXPECTED_AS_STATUS(local_layer_info);
- context_resources.add_edge_layer(local_layer_info.value(), ddr_channels_pair->get().info().h2d_channel_id,
- ddr_channels_pair->get().get_host_buffer_info());
+ auto status = context_resources.add_edge_layer(local_layer_info.value(), ddr_info->h2d_channel_id,
+ ddr_info->host_buffer_info, resources_manager.get_supported_features());
+ CHECK_SUCCESS(status);
return HAILO_SUCCESS;
}
const ContextResources &context_resources)
{
bool start_fw_ddr_buffer_task = false;
- for (auto& ddr_channels_pair : context_resources.get_ddr_channels_pairs()) {
- if (ddr_channels_pair.need_manual_credit_management()) {
- const auto ddr_info = ddr_channels_pair.info();
+ for (const auto &ddr_info : context_resources.get_ddr_channels_infos()) {
+ if (ddr_info.need_manual_credit_management()) {
auto ddr_pair_action = DdrPairInfoAction::create(ddr_info.h2d_channel_id, ddr_info.d2h_channel_id,
- ddr_info.network_index, ddr_channels_pair.descriptors_per_frame(), ddr_channels_pair.descs_count());
+ ddr_info.network_index, ddr_info.descriptors_per_frame(), ddr_info.descs_count());
CHECK_EXPECTED_AS_STATUS(ddr_pair_action);
configuration_actions.emplace_back(ddr_pair_action.release());
static hailo_status parse_and_fill_edge_layers_mapping(
ContextResources &context_resources,
const ContextMetadata &context_metadata,
- ResourcesManager &resources_manager)
+ ResourcesManager &resources_manager, const ProtoHEFHwArch &hw_arch)
{
hailo_status status = HAILO_UNINITIALIZED;
// We parse ddr inputs before boundary/inter-context because otherwise on C2C mode we may lose some credit.
for (const auto &output_layer_info : context_metadata.get_ddr_output_layers()) {
- status = fill_ddr_output_layer(context_resources, resources_manager, output_layer_info, *hw_consts);
+ status = fill_ddr_output_layer(context_resources, resources_manager, output_layer_info, *hw_consts, hw_arch);
CHECK_SUCCESS(status);
}
for (const auto &output_layer_info : context_metadata.get_boundary_output_layers()) {
status = fill_boundary_output_layer(context_resources, resources_manager, output_layer_info,
- *hw_consts, should_optimize_credits);
+ *hw_consts, hw_arch, should_optimize_credits);
CHECK_SUCCESS(status);
}
for (const auto &output_layer_info : context_metadata.get_inter_context_output_layers()) {
status = fill_inter_context_output_layer(context_resources, resources_manager, output_layer_info,
- *hw_consts, should_optimize_credits);
+ *hw_consts, hw_arch, should_optimize_credits);
CHECK_SUCCESS(status);
}
for (const auto &input_layer_info : context_metadata.get_ddr_input_layers()) {
- status = fill_ddr_input_layer(context_resources, input_layer_info, *hw_consts);
+ status = fill_ddr_input_layer(context_resources, resources_manager, input_layer_info, *hw_consts, hw_arch);
CHECK_SUCCESS(status);
}
for (const auto &input_layer_info : context_metadata.get_boundary_input_layers()) {
status = fill_boundary_input_layer(context_resources, resources_manager, input_layer_info,
- *hw_consts, should_optimize_credits);
+ *hw_consts, hw_arch, should_optimize_credits);
CHECK_SUCCESS(status);
}
for (const auto &input_layer_info : context_metadata.get_inter_context_input_layers()) {
status = fill_inter_context_input_layer(context_resources, resources_manager, input_layer_info,
- *hw_consts, should_optimize_credits);
+ *hw_consts, hw_arch, should_optimize_credits);
CHECK_SUCCESS(status);
}
- status = context_resources.validate_edge_layers();
- CHECK_SUCCESS(status);
-
/* UN-Lock resources at the end of the context -
h2d inter-context, d2h inter-context and DDR buffer channels */
for (const auto &input_layer_info : context_metadata.get_inter_context_input_layers()) {
}
for (const auto &output_layer_info : context_metadata.get_ddr_output_layers()) {
- const auto h2d_layer_identifier = std::make_tuple(LayerType::DDR, output_layer_info.name,
- output_layer_info.connected_context_info.stream_index);
+ const auto h2d_layer_identifier = std::make_tuple(LayerType::DDR, HAILO_H2D_STREAM,
+ output_layer_info.name, output_layer_info.connected_context_info.stream_index);
status = resources_manager.free_channel_index(h2d_layer_identifier);
CHECK_SUCCESS(status);
- const auto d2h_layer_identifier = std::make_tuple(LayerType::DDR, output_layer_info.name,
- output_layer_info.stream_index);
+ const auto d2h_layer_identifier = std::make_tuple(LayerType::DDR, HAILO_D2H_STREAM,
+ output_layer_info.name, output_layer_info.stream_index);
status = resources_manager.free_channel_index(d2h_layer_identifier);
CHECK_SUCCESS(status);
}
return HAILO_SUCCESS;
}
-static Expected<uint8_t> find_dummy_stream(const LayerInfo &layer_info, const ContextResources &context_resources)
+// TODO HRT-10073: change to supported features list
+static bool is_hailo15_device_type(const hailo_device_architecture_t dev_arch)
{
- const auto other_direction = (HAILO_H2D_STREAM == layer_info.direction) ? HAILO_D2H_STREAM : HAILO_H2D_STREAM;
- const auto other_direction_edge_layers = context_resources.get_edge_layers(other_direction);
- CHECK_AS_EXPECTED(!other_direction_edge_layers.empty(), HAILO_INTERNAL_FAILURE, "Couldn't find dummy stream");
- return Expected<uint8_t>(other_direction_edge_layers.front().layer_info.stream_index);
+ // Compare with HAILO15 device arch
+ return (HAILO_ARCH_HAILO15 == dev_arch);
}
-static hailo_status add_change_vdma_to_stream_mapping(
+static Expected<uint8_t> find_dummy_stream(const LayerInfo &layer_info, const ContextResources &context_resources,
+ const bool is_null_shmifo_supported)
+{
+ if (is_null_shmifo_supported) {
+ static const uint8_t DUMMY_STREAM_INDEX = 31;
+ return Expected<uint8_t>(DUMMY_STREAM_INDEX);
+ } else {
+ const auto other_direction = (HAILO_H2D_STREAM == layer_info.direction) ? HAILO_D2H_STREAM : HAILO_H2D_STREAM;
+ const auto other_direction_edge_layers = context_resources.get_edge_layers(other_direction);
+ CHECK_AS_EXPECTED(!other_direction_edge_layers.empty(), HAILO_INTERNAL_FAILURE, "Couldn't find dummy stream");
+ return Expected<uint8_t>(other_direction_edge_layers.front().layer_info.stream_index);
+ }
+}
+
+static hailo_status add_change_vdma_to_stream_mapping(const ProtoHEFHwArch &hw_arch,
const CoreOpMetadata &core_op_metadata, const ResourcesManager &resources_manager,
ContextResources &context_resources, uint8_t context_index,
std::vector<ContextSwitchConfigActionPtr> &processed_configuration_actions)
const bool is_dummy_stream = layer_info.context_index != context_index;
uint8_t stream_index = layer_info.stream_index;
if (is_dummy_stream) {
- auto dummy_stream_index = find_dummy_stream(layer_info, context_resources);
+ auto dummy_stream_index = find_dummy_stream(layer_info, context_resources,
+ is_hailo15_device_type(DeviceBase::hef_arch_to_device_arch(hw_arch)));
CHECK_EXPECTED_AS_STATUS(dummy_stream_index);
stream_index = *dummy_stream_index;
}
for (const auto &edge_layer : context_resources.get_edge_layers(LayerType::DDR, HAILO_H2D_STREAM)) {
const auto d2h_stream_index = edge_layer.layer_info.connected_context_info.stream_index;
- auto pair = context_resources.get_ddr_channels_pair(d2h_stream_index);
- CHECK_EXPECTED_AS_STATUS(pair);
- const auto d2h_channel_id = pair->get().info().d2h_channel_id;
+ auto ddr_channels_info = context_resources.get_ddr_channels_info(d2h_stream_index);
+ CHECK_EXPECTED_AS_STATUS(ddr_channels_info);
+ const auto d2h_channel_id = ddr_channels_info->d2h_channel_id;
auto activate_action = ActivateDdrInputChannelAction::create(edge_layer.channel_id,
edge_layer.layer_info.stream_index, edge_layer.layer_info.nn_stream_config, edge_layer.buffer_info,
return HAILO_SUCCESS;
}
-static hailo_status proccess_trigger_new_data_input_action(const ContextSwitchConfigActionPtr &configuration_action,
+static hailo_status proccess_trigger_new_data_input_action(const ProtoHEFHwArch &hw_arch,
+ const ContextSwitchConfigActionPtr &configuration_action,
uint32_t trigger_new_data_from_input_group_start,
uint32_t trigger_new_data_from_input_group_end,
const uint32_t &action_index,
CHECK_SUCCESS(status);
if (!is_single_context) {
- status = add_change_vdma_to_stream_mapping(core_op_metadata, resources_manager,
+ status = add_change_vdma_to_stream_mapping(hw_arch, core_op_metadata, resources_manager,
context_resources, context_index, processed_configuration_actions);
CHECK_SUCCESS(status);
}
// * TriggerNewDataFromDataInput for each input layer (inter context/ boundary) in the context. This action is given
// from the HEF.
// * Finally StartBurstCreditsTaskAction
-static hailo_status handle_edge_layer_activation_actions(std::vector<ContextSwitchConfigActionPtr> &configuration_actions,
- const CoreOpMetadata &core_op_metadata,
+static hailo_status handle_edge_layer_activation_actions(const ProtoHEFHwArch &hw_arch,
+ std::vector<ContextSwitchConfigActionPtr> &configuration_actions, const CoreOpMetadata &core_op_metadata,
const ResourcesManager &resources_manager, ContextResources &context_resources, uint8_t context_index,
bool is_single_context)
{
for (uint32_t action_index = 0; action_index < configuration_actions.size(); action_index++) {
const auto &configuration_action = configuration_actions[action_index];
if (ContextSwitchConfigAction::Type::TriggerNewDataFromDataInput == configuration_action->get_type()) {
- auto status = proccess_trigger_new_data_input_action(configuration_action,
+ auto status = proccess_trigger_new_data_input_action(hw_arch, configuration_action,
trigger_new_data_from_input_group_start, trigger_new_data_from_input_group_end, action_index,
core_op_metadata, resources_manager, context_resources, context_index, processed_configuration_actions, is_single_context);
CHECK_SUCCESS(status);
return HAILO_SUCCESS;
}
-static bool is_hailo15_device_type(const ProtoHEFHwArch &hw_arch)
-{
- // Compare with HW_ARCH__LAVENDER and HW_ARCH__GINGER to support hefs compiled for them
- return (PROTO__HW_ARCH__GINGER == hw_arch) || (PROTO__HW_ARCH__LAVENDER == hw_arch) ||
- (PROTO__HW_ARCH__HAILO15H == hw_arch);
-}
-
static hailo_status write_action_list(const ContextResources & context_resources, ContextSwitchBufferBuilder &builder,
const std::vector<ContextSwitchConfigActionPtr> &actions)
{
hailo_status status = HAILO_UNINITIALIZED;
// Add edge layers mapping
- status = parse_and_fill_edge_layers_mapping(context_resources, context_metadata, resources_manager);
+ status = parse_and_fill_edge_layers_mapping(context_resources, context_metadata, resources_manager, hw_arch);
CHECK_SUCCESS(status);
// Parse context
std::vector<ContextSwitchConfigActionPtr> actions = context_metadata.get_actions();
- const auto support_pre_fetch = is_hailo15_device_type(hw_arch);
+ const auto support_pre_fetch = is_hailo15_device_type(DeviceBase::hef_arch_to_device_arch(hw_arch));
status = add_fetch_config_actions(actions, context_resources.get_config_buffers(), support_pre_fetch);
CHECK_SUCCESS(status);
- status = handle_edge_layer_activation_actions(actions, core_op_metadata, resources_manager,
+ status = handle_edge_layer_activation_actions(hw_arch, actions, core_op_metadata, resources_manager,
context_resources, context_index, is_single_context);
CHECK_SUCCESS(status);
static hailo_status fill_activation_config_recepies_for_multi_context(
ContextResources &context_resources, ResourcesManager &resources_manager,
- std::shared_ptr<CoreOpMetadata> core_op_metadata)
+ std::shared_ptr<CoreOpMetadata> core_op_metadata, const ProtoHEFHwArch &hw_arch)
{
auto hw_consts = Control::get_hw_consts(resources_manager.get_device());
CHECK_EXPECTED_AS_STATUS(hw_consts);
for (const auto &layer_info : core_op_metadata->get_output_layer_infos()){
auto status = fill_boundary_output_layer(context_resources, resources_manager, layer_info, *hw_consts,
- should_optimize_credits);
+ hw_arch, should_optimize_credits);
CHECK_SUCCESS(status);
}
for (const auto &layer_info : core_op_metadata->get_input_layer_infos()) {
auto status = fill_boundary_input_layer(context_resources, resources_manager, layer_info, *hw_consts,
- should_optimize_credits);
+ hw_arch, should_optimize_credits);
CHECK_SUCCESS(status);
}
- auto status = context_resources.validate_edge_layers();
- CHECK_SUCCESS(status);
-
std::vector<ContextSwitchConfigActionPtr> actions;
for (const auto &edge_layer : context_resources.get_edge_layers(LayerType::BOUNDARY)) {
auto action = edge_layer.layer_info.direction == HAILO_H2D_STREAM ?
return write_action_list(context_resources, context_resources.builder(), actions);
}
+static Expected<ContextSwitchConfigActionPtr> create_switch_lcu_batch_action(const ContextSwitchConfigActionPtr action,
+ ContextResources &context_resources)
+{
+ uint8_t cluster_index = 0;
+ uint8_t lcu_index = 0;
+ uint8_t network_index = 0;
+ uint32_t kernel_done_count = 0;
+
+ CHECK_AS_EXPECTED((ContextSwitchConfigAction::Type::EnableLcuDefault == action->get_type()) ||
+ (ContextSwitchConfigAction::Type::EnableLcuNonDefault == action->get_type()), HAILO_INVALID_ARGUMENT,
+ "Invalid action type - must be enable lcu (default or non default), Received type {}", action->get_type());
+
+ const auto params_buffer = action->serialize_params(context_resources);
+ CHECK_EXPECTED(params_buffer);
+
+ if (ContextSwitchConfigAction::Type::EnableLcuDefault == action->get_type()) {
+ const auto params = reinterpret_cast<const CONTEXT_SWITCH_DEFS__enable_lcu_action_default_data_t*>(params_buffer.value().data());
+ cluster_index = CONTEXT_SWITCH_DEFS__PACKED_LCU_ID_CLUSTER_INDEX_READ(params->packed_lcu_id);
+ lcu_index = CONTEXT_SWITCH_DEFS__PACKED_LCU_ID_LCU_INDEX_READ(params->packed_lcu_id);
+ network_index = params->network_index;
+ kernel_done_count = CONTEXT_SWITCH_DEFS__ENABLE_LCU_DEFAULT_KERNEL_COUNT;
+ } else {
+ const auto params = reinterpret_cast<const CONTEXT_SWITCH_DEFS__enable_lcu_action_non_default_data_t*>(params_buffer.value().data());
+ cluster_index = CONTEXT_SWITCH_DEFS__PACKED_LCU_ID_CLUSTER_INDEX_READ(params->packed_lcu_id);
+ lcu_index = CONTEXT_SWITCH_DEFS__PACKED_LCU_ID_LCU_INDEX_READ(params->packed_lcu_id);
+ network_index = params->network_index;
+ kernel_done_count = params->kernel_done_count;
+ }
+
+ return SwitchLcuBatchAction::create(cluster_index, lcu_index, network_index, kernel_done_count);
+}
+
static hailo_status fill_batch_switching_context_config_recepies_for_multi_context(
ContextResources &context_resources, const CoreOpMetadata &core_op_metadata)
{
CHECK_EXPECTED_AS_STATUS(reset_ddr_action);
actions.emplace_back(reset_ddr_action.release());
- // We need to re-enable all the lcus of the first context since some of their config regs are batch dependent.
- // => We'll filter out all of the "enable lcu" actions from the preliminary context
- static const std::set<ContextSwitchConfigAction::Type> BATCH_SWITCHING_ACTIONS = {
+ // Find all the enabled lcus from the preliminary context in order to create coresponding switch lcu batch actions to run
+ // In the batch switch context
+ static const std::set<ContextSwitchConfigAction::Type> ENABLE_LCU_ACTIONS = {
ContextSwitchConfigAction::Type::EnableLcuDefault,
ContextSwitchConfigAction::Type::EnableLcuNonDefault
};
- const auto batch_switch_actions = core_op_metadata.preliminary_context().get_actions_of_type(BATCH_SWITCHING_ACTIONS);
- actions.insert(actions.end(), batch_switch_actions.begin(), batch_switch_actions.end());
+
+ const auto batch_switch_actions = core_op_metadata.preliminary_context().get_actions_of_type(ENABLE_LCU_ACTIONS);
+ for (const auto &action : batch_switch_actions) {
+ auto switch_lcu_batch_action = create_switch_lcu_batch_action(action, context_resources);
+ CHECK_EXPECTED_AS_STATUS(switch_lcu_batch_action);
+ actions.insert(actions.end(), switch_lcu_batch_action.release());
+ }
auto status = handle_repeated_actions(actions);
CHECK_SUCCESS(status);
// Add edge layers mapping (only preliminary_run_asap networks have edge layers in the preliminary context)
assert(PRELIMINARY_CONTEXT_INDEX < core_op_metadata->dynamic_contexts().size());
auto status = parse_and_fill_edge_layers_mapping(context_resources,
- core_op_metadata->dynamic_contexts()[PRELIMINARY_CONTEXT_INDEX], resources_manager);
+ core_op_metadata->dynamic_contexts()[PRELIMINARY_CONTEXT_INDEX], resources_manager, hw_arch);
CHECK_SUCCESS(status);
}
// Parse preliminary config
std::vector<ContextSwitchConfigActionPtr> actions = preliminary_context.get_actions();
- const auto support_pre_fetch = is_hailo15_device_type(hw_arch);
+ const auto support_pre_fetch = is_hailo15_device_type(DeviceBase::hef_arch_to_device_arch(hw_arch));
auto status = add_fetch_config_actions(actions, context_resources.get_config_buffers(), support_pre_fetch);
CHECK_SUCCESS(status);
if (resources_manager.get_supported_features().preliminary_run_asap) {
- status = handle_edge_layer_activation_actions(actions, *core_op_metadata, resources_manager,
+ status = handle_edge_layer_activation_actions(hw_arch, actions, *core_op_metadata, resources_manager,
context_resources, PRELIMINARY_CONTEXT_INDEX, is_single_context);
CHECK_SUCCESS(status);
}
auto activation_context = resources_manager->add_new_context(CONTROL_PROTOCOL__CONTEXT_SWITCH_CONTEXT_TYPE_ACTIVATION);
CHECK_EXPECTED(activation_context);
status = fill_activation_config_recepies_for_multi_context(activation_context.value().get(),
- resources_manager.value(), core_op_metadata);
+ resources_manager.value(), core_op_metadata, hw_arch);
CHECK_SUCCESS_AS_EXPECTED(status);
auto batch_switching_context = resources_manager->add_new_context(CONTROL_PROTOCOL__CONTEXT_SWITCH_CONTEXT_TYPE_BATCH_SWITCHING);
return HAILO_SUCCESS;
}
-hailo_status Control::validate_arch_supported(Device &device, const std::vector<hailo_device_architecture_t> &supported_archs)
-{
- auto dev_arch = device.get_architecture();
- CHECK_EXPECTED_AS_STATUS(dev_arch);
- for (const auto &arch : supported_archs) {
- if (*dev_arch == arch) {
- return HAILO_SUCCESS;
- }
- }
- LOGGER__ERROR("Control is not supported for this device architecture - {}", HailoRTCommon::get_device_arch_str(*dev_arch));
- return HAILO_NOT_SUPPORTED;
-}
-
hailo_status Control::parse_and_validate_response(uint8_t *message, uint32_t message_size,
CONTROL_PROTOCOL__response_header_t **header, CONTROL_PROTOCOL__payload_t **payload,
- CONTROL_PROTOCOL__request_t *request)
+ CONTROL_PROTOCOL__request_t *request, Device &device)
{
hailo_status status = HAILO_UNINITIALIZED;
HAILO_COMMON_STATUS_t common_status = HAILO_COMMON_STATUS__UNINITIALIZED;
(FIRMWARE_STATUS_t)fw_status.minor_status, common_status);
}
+ if ((CONTROL_PROTOCOL_STATUS_CONTROL_UNSUPPORTED == fw_status.minor_status) ||
+ (CONTROL_PROTOCOL_STATUS_CONTROL_UNSUPPORTED == fw_status.major_status)) {
+ auto device_arch = device.get_architecture();
+ auto dev_arch_str = (device_arch) ? HailoRTCommon::get_device_arch_str(*device_arch) : "Unable to parse arch";
+ LOGGER__ERROR("Opcode {} is not supported on the device." \
+ " This error usually occurs when the control is not supported for the device arch - ({}), or not compiled to the FW",
+ CONTROL_PROTOCOL__get_textual_opcode((CONTROL_PROTOCOL__OPCODE_t)BYTE_ORDER__ntohl(request->header.common_header.opcode)),
+ dev_arch_str);
+ }
+
+ if ((CONTROL_PROTOCOL_STATUS_UNSUPPORTED_DEVICE == fw_status.minor_status) ||
+ (CONTROL_PROTOCOL_STATUS_UNSUPPORTED_DEVICE == fw_status.major_status)) {
+ LOGGER__ERROR("Opcode {} is not supported on the current board.",
+ CONTROL_PROTOCOL__get_textual_opcode((CONTROL_PROTOCOL__OPCODE_t)BYTE_ORDER__ntohl(request->header.common_header.opcode)));
+ }
+
if ((HAILO_CONTROL_STATUS_UNSUPPORTED_OPCODE == fw_status.minor_status) ||
(HAILO_CONTROL_STATUS_UNSUPPORTED_OPCODE == fw_status.major_status)) {
status = HAILO_UNSUPPORTED_OPCODE;
LOGGER__ERROR("Opcode {} is not supported",
CONTROL_PROTOCOL__get_textual_opcode((CONTROL_PROTOCOL__OPCODE_t)BYTE_ORDER__ntohl(request->header.common_header.opcode)));
}
+
goto exit;
}
/* Parse response */
status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload,
- &request);
+ &request, device);
CHECK_SUCCESS_AS_EXPECTED(status);
identify_response = (CONTROL_PROTOCOL_identify_response_t *)(payload->parameters);
/* Parse response */
status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload,
- &request);
+ &request, device);
if (HAILO_SUCCESS != status) {
goto exit;
}
CONTROL_PROTOCOL__request_t request = {};
size_t request_size = 0;
- /* Validate arch */
- auto status = Control::validate_arch_supported(device);
- CHECK_SUCCESS(status);
-
auto common_status = CONTROL_PROTOCOL__pack_set_fw_logger_request(&request, &request_size, device.get_control_sequence(), level,
static_cast<uint8_t>(interface_mask));
- status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE;
+ auto status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE;
CHECK_SUCCESS(status);
uint8_t response_buffer[RESPONSE_MAX_BUFFER_SIZE] = {};
CONTROL_PROTOCOL__response_header_t *header = NULL;
CONTROL_PROTOCOL__payload_t *payload = NULL;
status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload,
- &request);
+ &request, device);
CHECK_SUCCESS(status);
return HAILO_SUCCESS;
CONTROL_PROTOCOL__request_t request = {};
size_t request_size = 0;
- /* Validate arch */
- auto status = Control::validate_arch_supported(device);
- CHECK_SUCCESS(status);
-
auto common_status = CONTROL_PROTOCOL__pack_set_clock_freq_request(&request, &request_size, device.get_control_sequence(), clock_freq);
- status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE;
+ auto status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE;
CHECK_SUCCESS(status);
uint8_t response_buffer[RESPONSE_MAX_BUFFER_SIZE] = {};
CONTROL_PROTOCOL__response_header_t *header = NULL;
CONTROL_PROTOCOL__payload_t *payload = NULL;
status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload,
- &request);
+ &request, device);
CHECK_SUCCESS(status);
return HAILO_SUCCESS;
CONTROL_PROTOCOL__request_t request = {};
size_t request_size = 0;
- /* Validate arch */
- auto status = Control::validate_arch_supported(device);
- CHECK_SUCCESS(status);
-
auto common_status = CONTROL_PROTOCOL__pack_set_throttling_state_request(&request, &request_size, device.get_control_sequence(), should_activate);
- status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE;
+ auto status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE;
CHECK_SUCCESS(status);
uint8_t response_buffer[RESPONSE_MAX_BUFFER_SIZE] = {};
CONTROL_PROTOCOL__response_header_t *header = NULL;
CONTROL_PROTOCOL__payload_t *payload = NULL;
status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload,
- &request);
+ &request, device);
CHECK_SUCCESS(status);
return HAILO_SUCCESS;
CONTROL_PROTOCOL__payload_t *payload = NULL;
CONTROL_PROTOCOL__get_throttling_state_response_t *get_throttling_state_response = NULL;
- /* Validate arch */
- status = Control::validate_arch_supported(device);
- CHECK_SUCCESS_AS_EXPECTED(status);
-
common_status = CONTROL_PROTOCOL__pack_get_throttling_state_request(&request, &request_size, device.get_control_sequence());
status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE;
CHECK_SUCCESS_AS_EXPECTED(status);
/* Parse response */
- status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload, &request);
+ status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload, &request, device);
CHECK_SUCCESS_AS_EXPECTED(status);
get_throttling_state_response = (CONTROL_PROTOCOL__get_throttling_state_response_t *)(payload->parameters);
CONTROL_PROTOCOL__request_t request = {};
size_t request_size = 0;
- /* Validate arch */
- auto status = Control::validate_arch_supported(device);
- CHECK_SUCCESS(status);
-
auto common_status = CONTROL_PROTOCOL__pack_set_overcurrent_state_request(&request, &request_size, device.get_control_sequence(), should_activate);
- status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE;
+ auto status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE;
CHECK_SUCCESS(status);
uint8_t response_buffer[RESPONSE_MAX_BUFFER_SIZE] = {};
/* Parse response */
CONTROL_PROTOCOL__response_header_t *header = NULL;
CONTROL_PROTOCOL__payload_t *payload = NULL;
- status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload, &request);
+ status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload, &request, device);
CHECK_SUCCESS(status);
return HAILO_SUCCESS;
CONTROL_PROTOCOL__payload_t *payload = NULL;
CONTROL_PROTOCOL__get_overcurrent_state_response_t *get_overcurrent_state_response = NULL;
- /* Validate arch */
- status = Control::validate_arch_supported(device);
- CHECK_SUCCESS_AS_EXPECTED(status);
-
common_status = CONTROL_PROTOCOL__pack_get_overcurrent_state_request(&request, &request_size, device.get_control_sequence());
status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE;
CHECK_SUCCESS_AS_EXPECTED(status);
/* Parse response */
- status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload, &request);
+ status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload, &request, device);
CHECK_SUCCESS_AS_EXPECTED(status);
get_overcurrent_state_response = (CONTROL_PROTOCOL__get_overcurrent_state_response_t *)(payload->parameters);
{
size_t request_size = 0;
CONTROL_PROTOCOL__request_t request = {};
+
auto common_status = CONTROL_PROTOCOL__pack_get_hw_consts_request(&request, &request_size, device.get_control_sequence());
auto status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE;
CHECK_SUCCESS_AS_EXPECTED(status);
CONTROL_PROTOCOL__response_header_t *header = NULL;
CONTROL_PROTOCOL__payload_t *payload = NULL;
- status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload, &request);
+ status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload, &request,
+ device);
CHECK_SUCCESS_AS_EXPECTED(status);
const auto &response = *reinterpret_cast<CONTROL_PROTOCOL__get_hw_consts_response_t*>(payload->parameters);
/* Parse response */
status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload,
- &request);
+ &request, device);
if (HAILO_SUCCESS != status) {
goto exit;
}
/* Parse response */
status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload,
- &request);
+ &request, device);
if (HAILO_SUCCESS != status) {
goto exit;
}
CONTROL_PROTOCOL__response_header_t *header = NULL;
CONTROL_PROTOCOL__payload_t *payload = NULL;
- /* Validate arch */
- status = Control::validate_arch_supported(device);
- if (HAILO_SUCCESS != status) {
- goto exit;
- }
-
common_status = CONTROL_PROTOCOL__pack_open_stream_request(&request, &request_size, device.get_control_sequence(),
dataflow_manager_id, is_input);
status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE;
/* Parse response */
status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload,
- &request);
+ &request, device);
if (HAILO_SUCCESS != status) {
goto exit;
}
CONTROL_PROTOCOL__response_header_t *header = NULL;
CONTROL_PROTOCOL__payload_t *payload = NULL;
- /* Validate arch */
- status = Control::validate_arch_supported(device);
- if (HAILO_SUCCESS != status) {
- goto exit;
- }
-
common_status = CONTROL_PROTOCOL__pack_close_stream_request(&request, &request_size, device.get_control_sequence(),
dataflow_manager_id, is_input);
status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE;
/* Parse response */
status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload,
- &request);
+ &request, device);
if (HAILO_SUCCESS != status) {
goto exit;
}
/* Validate arguments */
CHECK_ARG_NOT_NULL(params);
- /* Validate arch */
- status = Control::validate_arch_supported(device);
- if (HAILO_SUCCESS != status) {
- goto exit;
- }
-
common_status = CONTROL_PROTOCOL__pack_config_stream_udp_input_request(&request, &request_size,
device.get_control_sequence(), params);
status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE;
/* Parse response */
status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload,
- &request);
+ &request, device);
if (HAILO_SUCCESS != status) {
goto exit;
}
/* Validate arguments */
CHECK_ARG_NOT_NULL(params);
- /* Validate arch */
- status = Control::validate_arch_supported(device);
- if (HAILO_SUCCESS != status) {
- goto exit;
- }
-
common_status = CONTROL_PROTOCOL__pack_config_stream_udp_output_request(&request, &request_size,
device.get_control_sequence(), params);
status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE;
/* Parse response */
status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload,
- &request);
+ &request, device);
if (HAILO_SUCCESS != status) {
goto exit;
}
/* Validate arguments */
CHECK_ARG_NOT_NULL(params);
- /* Validate arch */
- status = Control::validate_arch_supported(device);
- if (HAILO_SUCCESS != status) {
- goto exit;
- }
-
common_status = CONTROL_PROTOCOL__pack_config_stream_mipi_input_request(&request, &request_size,
device.get_control_sequence(), params);
status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE;
/* Parse response */
status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload,
- &request);
+ &request, device);
if (HAILO_SUCCESS != status) {
goto exit;
}
/* Validate arguments */
CHECK_ARG_NOT_NULL(params);
- /* Validate arch */
- status = Control::validate_arch_supported(device);
- if (HAILO_SUCCESS != status) {
- goto exit;
- }
-
common_status = CONTROL_PROTOCOL__pack_config_stream_mipi_output_request(&request, &request_size,
device.get_control_sequence(), params);
status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE;
/* Parse response */
status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload,
- &request);
+ &request, device);
if (HAILO_SUCCESS != status) {
goto exit;
}
/* Validate arguments */
CHECK_ARG_NOT_NULL(params);
- /* Validate arch */
- status = Control::validate_arch_supported(device);
- if (HAILO_SUCCESS != status) {
- goto exit;
- }
-
common_status = CONTROL_PROTOCOL__pack_config_stream_pcie_input_request(&request, &request_size,
device.get_control_sequence(), params);
status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE;
/* Parse response */
status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload,
- &request);
+ &request, device);
if (HAILO_SUCCESS != status) {
goto exit;
}
/* Validate arguments */
CHECK_ARG_NOT_NULL(params);
- /* Validate arch */
- status = Control::validate_arch_supported(device);
- if (HAILO_SUCCESS != status) {
- goto exit;
- }
-
common_status = CONTROL_PROTOCOL__pack_config_stream_pcie_output_request(&request, &request_size,
device.get_control_sequence(), params);
status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE;
/* Parse response */
status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload,
- &request);
+ &request, device);
if (HAILO_SUCCESS != status) {
goto exit;
}
CONTROL_PROTOCOL__payload_t *payload = NULL;
CONTROL_PROTOCOL__power_measurement_response_t *response = NULL;
- /* Validate arch */
- status = Control::validate_arch_supported(device);
- if (HAILO_SUCCESS != status) {
- goto exit;
- }
-
/* Validate arguments */
CHECK_ARG_NOT_NULL(measurement);
/* Parse response */
status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload,
- &request);
+ &request, device);
if (HAILO_SUCCESS != status) {
goto exit;
}
CONTROL_PROTOCOL__payload_t *payload = NULL;
CONTROL_PROTOCOL__set_power_measurement_response_t *response = NULL;
- /* Validate arch */
- status = Control::validate_arch_supported(device);
- if (HAILO_SUCCESS != status) {
- goto exit;
- }
-
CHECK(CONTROL_PROTOCOL__MAX_NUMBER_OF_POWER_MEASUREMETS > buffer_index,
HAILO_INVALID_ARGUMENT, "Invalid power measurement index {}", buffer_index);
/* Parse response */
status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload,
- &request);
+ &request, device);
if (HAILO_SUCCESS != status) {
goto exit;
}
CONTROL_PROTOCOL__payload_t *payload = NULL;
CONTROL_PROTOCOL__get_power_measurement_response_t *get_power_response = NULL;
- /* Validate arch */
- status = Control::validate_arch_supported(device);
- if (HAILO_SUCCESS != status) {
- goto exit;
- }
-
/* Validate arguments */
CHECK(CONTROL_PROTOCOL__MAX_NUMBER_OF_POWER_MEASUREMETS > buffer_index,
HAILO_INVALID_ARGUMENT, "Invalid power measurement index {}", buffer_index);
}
/* Parse response */
status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload,
- &request);
+ &request, device);
if (HAILO_SUCCESS != status) {
goto exit;
}
CONTROL_PROTOCOL__payload_t *payload = NULL;
uint32_t delay_milliseconds = 0;
- /* Validate arch */
- status = Control::validate_arch_supported(device);
- if (HAILO_SUCCESS != status) {
- goto exit;
- }
-
delay_milliseconds = POWER_MEASUREMENT_DELAY_MS(sampling_period, averaging_factor);
// There is no logical way that measurement delay can be 0 - because sampling_period and averaging_factor cant be 0
// Hence if it is 0 - it means it was 0.xx and we want to round up to 1 in that case
/* Parse response */
status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload,
- &request);
+ &request, device);
if (HAILO_SUCCESS != status) {
goto exit;
}
CONTROL_PROTOCOL__response_header_t *header = NULL;
CONTROL_PROTOCOL__payload_t *payload = NULL;
- /* Validate arch */
- status = Control::validate_arch_supported(device);
- if (HAILO_SUCCESS != status) {
- goto exit;
- }
-
common_status = CONTROL_PROTOCOL__pack_stop_power_measurement_request(&request, &request_size, device.get_control_sequence());
status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE;
if (HAILO_SUCCESS != status) {
/* Parse response */
status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload,
- &request);
+ &request, device);
if (HAILO_SUCCESS != status) {
goto exit;
}
CHECK_ARG_NOT_NULL(slave_config);
CHECK_ARG_NOT_NULL(data);
- /* Validate arch */
- status = Control::validate_arch_supported(device);
- if (HAILO_SUCCESS != status) {
- goto exit;
- }
-
/* Pack request */
common_status = CONTROL_PROTOCOL__pack_i2c_write_request(&request, &request_size, device.get_control_sequence(),
register_address, static_cast<uint8_t>(slave_config->endianness),
/* Parse response */
status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload,
- &request);
+ &request, device);
if (HAILO_SUCCESS != status) {
goto exit;
}
CHECK_ARG_NOT_NULL(slave_config);
CHECK_ARG_NOT_NULL(data);
- /* Validate arch */
- status = Control::validate_arch_supported(device);
- if (HAILO_SUCCESS != status) {
- goto exit;
- }
-
/* Pack request */
common_status = CONTROL_PROTOCOL__pack_i2c_read_request(&request, &request_size, device.get_control_sequence(),
register_address, static_cast<uint8_t>(slave_config->endianness),
/* Parse response */
status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload,
- &request);
+ &request, device);
if (HAILO_SUCCESS != status) {
goto exit;
}
/* Validate arguments */
CHECK_ARG_NOT_NULL(params);
- /* Validate arch */
- status = Control::validate_arch_supported(device);
- if (HAILO_SUCCESS != status) {
- goto exit;
- }
-
common_status = CONTROL_PROTOCOL__pack_config_core_top_request(&request, &request_size, device.get_control_sequence(), config_type, params);
status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE;
if (HAILO_SUCCESS != status) {
/* Parse response */
status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload,
- &request);
+ &request, device);
if (HAILO_SUCCESS != status) {
goto exit;
}
CONTROL_PROTOCOL__response_header_t *header = NULL;
CONTROL_PROTOCOL__payload_t *payload = NULL;
- /* Validate arch */
- status = Control::validate_arch_supported(device);
- if (HAILO_SUCCESS != status) {
- goto exit;
- }
-
common_status = CONTROL_PROTOCOL__pack_phy_operation_request(&request, &request_size, device.get_control_sequence(), operation_type);
status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE;
if (HAILO_SUCCESS != status) {
/* Parse response */
status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload,
- &request);
+ &request, device);
if (HAILO_SUCCESS != status) {
goto exit;
}
/* Validate arguments */
CHECK_ARG_NOT_NULL(info);
- /* Validate arch */
- status = Control::validate_arch_supported(device);
- if (HAILO_SUCCESS != status) {
- goto exit;
- }
-
common_status = CONTROL_PROTOCOL__pack_examine_user_config(&request, &request_size, device.get_control_sequence());
status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE;
if (HAILO_SUCCESS != status) {
/* Parse response */
status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload,
- &request);
+ &request, device);
if (HAILO_SUCCESS != status) {
goto exit;
}
CONTROL_PROTOCOL__payload_t *payload = NULL;
CONTROL_PROTOCOL__read_user_config_response_t *response = NULL;
- /* Validate arch */
- status = Control::validate_arch_supported(device);
- CHECK_SUCCESS(status);
-
common_status = CONTROL_PROTOCOL__pack_read_user_config(&request, &request_size, device.get_control_sequence(),
read_offset, read_length);
status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE;
/* Parse response */
status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload,
- &request);
+ &request, device);
CHECK_SUCCESS(status);
response = (CONTROL_PROTOCOL__read_user_config_response_t *)(payload->parameters);
/* Validate arguments */
CHECK_ARG_NOT_NULL(buffer);
- /* Validate arch */
- status = Control::validate_arch_supported(device);
- CHECK_SUCCESS(status);
-
status = examine_user_config(device, &user_config_info);
CHECK_SUCCESS(status);
CONTROL_PROTOCOL__response_header_t *header = NULL;
CONTROL_PROTOCOL__payload_t *payload = NULL;
- /* Validate arch */
- status = Control::validate_arch_supported(device);
- CHECK_SUCCESS(status);
-
common_status = CONTROL_PROTOCOL__pack_write_user_config_request(&request, &request_size,
device.get_control_sequence(), offset, data + offset, chunk_size);
status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE;
/* Parse response */
status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload,
- &request);
+ &request, device);
CHECK_SUCCESS(status);
return HAILO_SUCCESS;
/* Validate arguments */
CHECK_ARG_NOT_NULL(data);
- /* Validate arch */
- status = Control::validate_arch_supported(device);
- CHECK_SUCCESS(status);
-
while (offset < data_length) {
chunk_size = MIN(WRITE_CHUNK_SIZE, (data_length - offset));
status = write_user_config_chunk(device, offset, data, chunk_size);
CONTROL_PROTOCOL__response_header_t *header = NULL;
CONTROL_PROTOCOL__payload_t *payload = NULL;
- /* Validate arch */
- status = Control::validate_arch_supported(device);
- if (HAILO_SUCCESS != status) {
- goto exit;
- }
-
common_status = CONTROL_PROTOCOL__pack_erase_user_config_request(&request, &request_size, device.get_control_sequence());
status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE;
if (HAILO_SUCCESS != status) {
/* Parse response */
status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload,
- &request);
+ &request, device);
if (HAILO_SUCCESS != status) {
goto exit;
}
/* Validate arguments */
CHECK_ARG_NOT_NULL(buffer);
- /* Validate arch */
- status = Control::validate_arch_supported(device);
- CHECK_SUCCESS(status);
-
CHECK(buffer_length >= BOARD_CONFIG_SIZE, HAILO_INSUFFICIENT_BUFFER,
"read buffer is too small. provided buffer size: {} bytes, board config size: {} bytes", buffer_length,
BOARD_CONFIG_SIZE);
/* Parse response */
status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload,
- &request);
+ &request, device);
CHECK_SUCCESS(status);
response = (CONTROL_PROTOCOL__read_board_config_response_t *)(payload->parameters);
actual_read_data_length = BYTE_ORDER__ntohl(response->data_length);
/* Validate arguments */
CHECK_ARG_NOT_NULL(data);
- /* Validate arch */
- status = Control::validate_arch_supported(device);
- CHECK_SUCCESS(status);
-
CHECK(BOARD_CONFIG_SIZE >= data_length, HAILO_INVALID_OPERATION,
"Invalid size of board config. data_length={}, max_size={}" , data_length, BOARD_CONFIG_SIZE);
/* Parse response */
status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload,
- &request);
+ &request, device);
CHECK_SUCCESS(status);
return HAILO_SUCCESS;
/* Validate arguments */
CHECK_ARG_NOT_NULL(data);
- /* Validate arch */
- status = Control::validate_arch_supported(device);
- if (HAILO_SUCCESS != status) {
- goto exit;
- }
-
common_status = CONTROL_PROTOCOL__write_second_stage_to_internal_memory_request(&request, &request_size, device.get_control_sequence(), offset,
data, data_length);
status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE;
/* Parse response */
status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload,
- &request);
+ &request, device);
if (HAILO_SUCCESS != status) {
goto exit;
}
/* Validate arguments */
CHECK_ARG_NOT_NULL(expected_md5);
- /* Validate arch */
- status = Control::validate_arch_supported(device);
- if (HAILO_SUCCESS != status) {
- goto exit;
- }
-
common_status = CONTROL_PROTOCOL__copy_second_stage_to_flash_request(&request, &request_size, device.get_control_sequence(), expected_md5, second_stage_size);
status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE;
if (HAILO_SUCCESS != status) {
/* Parse response */
status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload,
- &request);
+ &request, device);
if (HAILO_SUCCESS != status) {
goto exit;
}
CONTROL_PROTOCOL__response_header_t *header = NULL;
CONTROL_PROTOCOL__payload_t *payload = NULL;
- /* Validate arch */
- status = Control::validate_arch_supported(device);
- if (HAILO_SUCCESS != status) {
- goto exit;
- }
-
common_status = CONTROL_PROTOCOL__pack_start_firmware_update_request(&request, &request_size, device.get_control_sequence());
status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE;
if (HAILO_SUCCESS != status) {
/* Parse response */
status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload,
- &request);
+ &request, device);
if (HAILO_SUCCESS != status) {
goto exit;
}
CONTROL_PROTOCOL__response_header_t *header = NULL;
CONTROL_PROTOCOL__payload_t *payload = NULL;
- /* Validate arch */
- status = Control::validate_arch_supported(device);
- if (HAILO_SUCCESS != status) {
- goto exit;
- }
-
common_status = CONTROL_PROTOCOL__pack_finish_firmware_update_request(&request, &request_size, device.get_control_sequence());
status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE;
if (HAILO_SUCCESS != status) {
/* Parse response */
status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload,
- &request);
+ &request, device);
if (HAILO_SUCCESS != status) {
goto exit;
}
/* Validate arguments */
CHECK_ARG_NOT_NULL(data);
- /* Validate arch */
- status = Control::validate_arch_supported(device);
- if (HAILO_SUCCESS != status) {
- goto exit;
- }
-
common_status = CONTROL_PROTOCOL__write_firmware_update_request(&request, &request_size, device.get_control_sequence(), offset,
data, data_length);
status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE;
/* Parse response */
status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload,
- &request);
+ &request, device);
if (HAILO_SUCCESS != status) {
goto exit;
}
/* Validate arguments */
CHECK_ARG_NOT_NULL(expected_md5);
- /* Validate arch */
- status = Control::validate_arch_supported(device);
- if (HAILO_SUCCESS != status) {
- goto exit;
- }
-
common_status = CONTROL_PROTOCOL__pack_validate_firmware_update_request(&request, &request_size, device.get_control_sequence(),
expected_md5, firmware_size);
status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE;
/* Parse response */
status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload,
- &request);
+ &request, device);
if (HAILO_SUCCESS != status) {
goto exit;
}
/* Validate arguments */
CHECK_ARG_NOT_NULL(inbound_to_outbound_latency_nsec);
- /* Validate arch */
- status = Control::validate_arch_supported(device);
- if (HAILO_SUCCESS != status) {
- goto exit;
- }
-
common_status = CONTROL_PROTOCOL__pack_latency_measurement_read_request(&request, &request_size, device.get_control_sequence());
status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE;
if (HAILO_SUCCESS != status) {
/* Parse response */
status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload,
- &request);
+ &request, device);
if (HAILO_SUCCESS != status) {
goto exit;
}
CONTROL_PROTOCOL__response_header_t *header = NULL;
CONTROL_PROTOCOL__payload_t *payload = NULL;
- /* Validate arch */
- status = Control::validate_arch_supported(device);
- if (HAILO_SUCCESS != status) {
- goto exit;
- }
-
common_status = CONTROL_PROTOCOL__pack_latency_measurement_config_request(&request, &request_size, device.get_control_sequence(),
latency_measurement_en, inbound_start_buffer_number, outbound_stop_buffer_number,
inbound_stream_index, outbound_stream_index);
/* Parse response */
status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload,
- &request);
+ &request, device);
if (HAILO_SUCCESS != status) {
goto exit;
}
CHECK_ARG_NOT_NULL(data);
CHECK_ARG_NOT_NULL(config_name);
- /* Validate arch */
- status = Control::validate_arch_supported(device);
- if (HAILO_SUCCESS != status) {
- goto exit;
- }
-
common_status = CONTROL_PROTOCOL__pack_sensor_store_config_request(&request, &request_size, device.get_control_sequence(), is_first, section_index, start_offset,
reset_data_size, sensor_type, total_data_size, data, data_length, config_height,
config_width, config_fps, config_name_length, config_name);
/* Parse response */
status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload,
- &request);
+ &request, device);
if (HAILO_SUCCESS != status) {
goto exit;
}
CONTROL_PROTOCOL__response_header_t *header = NULL;
CONTROL_PROTOCOL__payload_t *payload = NULL;
- /* Validate arch */
- status = Control::validate_arch_supported(device);
- CHECK_SUCCESS(status);
-
status = CONTROL_PROTOCOL__pack_sensor_set_i2c_bus_index_request(&request, &request_size, device.get_control_sequence(), sensor_type, bus_index);
CHECK_SUCCESS(status);
CHECK_SUCCESS(status);
/* Parse response */
- status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload, &request);
+ status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload, &request, device);
CHECK_SUCCESS(status);
return HAILO_SUCCESS;
CONTROL_PROTOCOL__response_header_t *header = NULL;
CONTROL_PROTOCOL__payload_t *payload = NULL;
- /* Validate arch */
- status = Control::validate_arch_supported(device);
- if (HAILO_SUCCESS != status) {
- goto exit;
- }
-
common_status = CONTROL_PROTOCOL__pack_sensor_load_and_start_config_request(&request, &request_size, device.get_control_sequence(), section_index);
status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE;
if (HAILO_SUCCESS != status) {
/* Parse response */
status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload,
- &request);
+ &request, device);
if (HAILO_SUCCESS != status) {
goto exit;
}
CONTROL_PROTOCOL__response_header_t *header = NULL;
CONTROL_PROTOCOL__payload_t *payload = NULL;
- /* Validate arch */
- status = Control::validate_arch_supported(device);
- if (HAILO_SUCCESS != status) {
- goto exit;
- }
-
common_status = CONTROL_PROTOCOL__pack_sensor_reset_request(&request, &request_size, device.get_control_sequence(), section_index);
status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE;
if (HAILO_SUCCESS != status) {
/* Parse response */
status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload,
- &request);
+ &request, device);
if (HAILO_SUCCESS != status) {
goto exit;
}
CONTROL_PROTOCOL__response_header_t *header = NULL;
CONTROL_PROTOCOL__payload_t *payload = NULL;
- /* Validate arch */
- status = Control::validate_arch_supported(device);
- if (HAILO_SUCCESS != status) {
- goto exit;
- }
-
common_status = CONTROL_PROTOCOL__pack_sensor_set_generic_i2c_slave_request(&request, &request_size, device.get_control_sequence(), slave_address, register_address_size, bus_index, should_hold_bus, endianness);
status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE;
if (HAILO_SUCCESS != status) {
/* Parse response */
status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload,
- &request);
+ &request, device);
if (HAILO_SUCCESS != status) {
goto exit;
}
/* Validate arguments */
CHECK_ARG_NOT_NULL(data);
- /* Validate arch */
- status = Control::validate_arch_supported(device);
- if (HAILO_SUCCESS != status) {
- goto exit;
- }
-
common_status = CONTROL_PROTOCOL__pack_sensor_get_config_request(&request, &request_size, device.get_control_sequence(), section_index, offset, data_length);
status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE;
/* Parse response */
status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload,
- &request);
+ &request, device);
if (HAILO_SUCCESS != status) {
goto exit;
}
/* Validate arguments */
CHECK_ARG_NOT_NULL(data);
- /* Validate arch */
- status = Control::validate_arch_supported(device);
- if (HAILO_SUCCESS != status) {
- goto exit;
- }
-
common_status = CONTROL_PROTOCOL__pack_sensor_get_sections_info_request(&request, &request_size, device.get_control_sequence());
status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE;
/* Parse response */
status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload,
- &request);
+ &request, device);
if (HAILO_SUCCESS != status) {
goto exit;
}
/* Parse response */
status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload,
- &request);
+ &request, device);
if (HAILO_SUCCESS != status) {
goto exit;
}
/* Parse response */
status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload,
- &request);
+ &request, device);
if (HAILO_SUCCESS != status) {
/* In case of max memory error, add LOGGER ERROR, and set indicative error to the user */
CHECK((CONTEXT_SWITCH_TASK_STATUS_ADD_TRIGGER_FUNCTION_REACHED_FORBIDDEN_MEMORY_SPACE != header->status.major_status),
/* Validate arguments */
CHECK_ARG_NOT_NULL(measurement);
- /* Validate arch */
- status = Control::validate_arch_supported(device);
- if (HAILO_SUCCESS != status) {
- goto exit;
- }
-
common_status = CONTROL_PROTOCOL__pack_idle_time_get_measuremment_request(&request, &request_size, device.get_control_sequence());
status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE;
if (HAILO_SUCCESS != status) {
/* Parse response */
status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload,
- &request);
+ &request, device);
if (HAILO_SUCCESS != status) {
LOGGER__ERROR("failed validating idle_time_get_measurement control response with status {}", status);
goto exit;
CONTROL_PROTOCOL__response_header_t *header = NULL;
CONTROL_PROTOCOL__payload_t *payload = NULL;
- /* Validate arch */
- status = Control::validate_arch_supported(device);
- if (HAILO_SUCCESS != status) {
- goto exit;
- }
-
common_status = CONTROL_PROTOCOL__pack_idle_time_set_measuremment_request(&request, &request_size, device.get_control_sequence(), measurement_enable);
status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE;
if (HAILO_SUCCESS != status) {
/* Parse response */
status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload,
- &request);
+ &request, device);
if (HAILO_SUCCESS != status) {
LOGGER__ERROR("failed idle_time_set_measurement control with status {}", status);
goto exit;
CONTROL_PROTOCOL__request_t request = {};
size_t request_size = 0;
- /* Validate arch */
- auto status = Control::validate_arch_supported(device);
- CHECK_SUCCESS(status);
-
HAILO_COMMON_STATUS_t common_status = CONTROL_PROTOCOL__pack_set_pause_frames_request(&request, &request_size,
device.get_control_sequence(), rx_pause_frames_enable);
- status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE;
+ auto status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE;
CHECK_SUCCESS(status);
uint8_t response_buffer[RESPONSE_MAX_BUFFER_SIZE] = {};
CONTROL_PROTOCOL__response_header_t *header = NULL;
CONTROL_PROTOCOL__payload_t *payload = NULL;
status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload,
- &request);
+ &request, device);
CHECK_SUCCESS(status);
return HAILO_SUCCESS;
/* Parse response */
status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload,
- &request);
+ &request, device);
if (HAILO_SUCCESS != status) {
goto exit;
}
hailo_status Control::change_context_switch_status(Device &device,
CONTROL_PROTOCOL__CONTEXT_SWITCH_STATUS_t state_machine_status,
- uint8_t network_group_index, uint16_t dynamic_batch_size, bool keep_nn_config_during_reset)
+ uint8_t network_group_index, uint16_t dynamic_batch_size, uint16_t batch_count,
+ bool keep_nn_config_during_reset)
{
hailo_status status = HAILO_UNINITIALIZED;
HAILO_COMMON_STATUS_t common_status = HAILO_COMMON_STATUS__UNINITIALIZED;
common_status = CONTROL_PROTOCOL__pack_change_context_switch_status_request(&request, &request_size,
device.get_control_sequence(), state_machine_status, network_group_index, dynamic_batch_size,
- keep_nn_config_during_reset);
+ batch_count, keep_nn_config_during_reset);
status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE;
if (HAILO_SUCCESS != status) {
goto exit;
/* Parse response */
status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload,
- &request);
+ &request, device);
if (HAILO_SUCCESS != status) {
goto exit;
}
return status;
}
-hailo_status Control::enable_core_op(Device &device, uint8_t network_group_index, uint16_t dynamic_batch_size)
+hailo_status Control::enable_core_op(Device &device, uint8_t network_group_index, uint16_t dynamic_batch_size,
+ uint16_t batch_count)
{
- static const auto REMOVE_NN_CONFIG_DURING_RESET = false;
return Control::change_context_switch_status(device, CONTROL_PROTOCOL__CONTEXT_SWITCH_STATUS_ENABLED,
- network_group_index, dynamic_batch_size, REMOVE_NN_CONFIG_DURING_RESET);
+ network_group_index, dynamic_batch_size, batch_count);
}
hailo_status Control::reset_context_switch_state_machine(Device &device, bool keep_nn_config_during_reset)
{
static const auto IGNORE_NETWORK_GROUP_INDEX = 0;
static const auto IGNORE_DYNAMIC_BATCH_SIZE = 0;
+ static const auto DEFAULT_BATCH_COUNT = 0;
return Control::change_context_switch_status(device, CONTROL_PROTOCOL__CONTEXT_SWITCH_STATUS_RESET,
- IGNORE_NETWORK_GROUP_INDEX, IGNORE_DYNAMIC_BATCH_SIZE, keep_nn_config_during_reset);
+ IGNORE_NETWORK_GROUP_INDEX, IGNORE_DYNAMIC_BATCH_SIZE, DEFAULT_BATCH_COUNT, keep_nn_config_during_reset);
}
hailo_status Control::wd_enable(Device &device, uint8_t cpu_id, bool should_enable)
CONTROL_PROTOCOL__response_header_t *header = NULL;
CONTROL_PROTOCOL__payload_t *payload = NULL;
- /* Validate arch */
- status = Control::validate_arch_supported(device);
- if (HAILO_SUCCESS != status) {
- goto exit;
- }
-
common_status = CONTROL_PROTOCOL__pack_wd_enable(&request, &request_size, device.get_control_sequence(), cpu_id, should_enable);
status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE;
if (HAILO_SUCCESS != status) {
/* Parse response */
status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload,
- &request);
+ &request, device);
if (HAILO_SUCCESS != status) {
LOGGER__ERROR("failed wd_enable control with status {}", status);
goto exit;
CONTROL_PROTOCOL__response_header_t *header = NULL;
CONTROL_PROTOCOL__payload_t *payload = NULL;
- /* Validate arch */
- status = Control::validate_arch_supported(device);
- if (HAILO_SUCCESS != status) {
- goto exit;
- }
-
common_status = CONTROL_PROTOCOL__pack_wd_config(&request, &request_size, device.get_control_sequence(), cpu_id, wd_cycles, wd_mode);
status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE;
if (HAILO_SUCCESS != status) {
/* Parse response */
status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload,
- &request);
+ &request, device);
if (HAILO_SUCCESS != status) {
LOGGER__ERROR("failed wd_config control with status {}", status);
goto exit;
CHECK_ARG_NOT_NULL(system);
- /* Validate arch */
- status = Control::validate_arch_supported(device);
- if (HAILO_SUCCESS != status) {
- goto exit;
- }
-
common_status = CONTROL_PROTOCOL__pack_previous_system_state(&request, &request_size, device.get_control_sequence(), cpu_id);
status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE;
if (HAILO_SUCCESS != status) {
/* Parse response */
status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload,
- &request);
+ &request, device);
if (HAILO_SUCCESS != status) {
LOGGER__ERROR("failed previous_system_state control with status {}", status);
goto exit;
/* Parse response */
status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload,
- &request);
+ &request, device);
if (HAILO_SUCCESS != status) {
goto exit;
}
/* Parse response */
status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload,
- &request);
+ &request, device);
if (HAILO_SUCCESS != status) {
goto exit;
}
/* Parse response */
status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload,
- &request);
+ &request, device);
if (HAILO_SUCCESS != status) {
goto exit;
}
}
/* Parse response */
- status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload, &request);
+ status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload, &request, device);
if (HAILO_SUCCESS != status) {
LOGGER__ERROR("failed clear_configured_apps control with status {}", status);
goto exit;
CONTROL_PROTOCOL__payload_t *payload = NULL;
CONTROL_PROTOCOL__get_chip_temperature_response_t* temps = NULL;
- /* Validate arch */
- status = Control::validate_arch_supported(device);
- if (HAILO_SUCCESS != status) {
- goto exit;
- }
-
common_status = CONTROL_PROTOCOL__pack_get_chip_temperature_request(&request, &request_size, device.get_control_sequence());
status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE;
if (HAILO_SUCCESS != status) {
/* Parse response */
status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload,
- &request);
+ &request, device);
if (HAILO_SUCCESS != status) {
goto exit;
}
CONTROL_PROTOCOL__response_header_t *header = NULL;
CONTROL_PROTOCOL__payload_t *payload = NULL;
- /* Validate arch */
- status = Control::validate_arch_supported(device);
- if (HAILO_SUCCESS != status) {
- goto exit;
- }
-
common_status = CONTROL_PROTOCOL__pack_enable_debugging_request(&request, &request_size, device.get_control_sequence(), is_rma);
status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE;
if (HAILO_SUCCESS != status) {
/* Parse response */
status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload,
- &request);
+ &request, device);
if (HAILO_SUCCESS != status) {
goto exit;
}
CONTROL_PROTOCOL__response_header_t *header = NULL;
CONTROL_PROTOCOL__payload_t *payload = NULL;
- /* Validate arguments */
-
- /* Validate arch */
- status = Control::validate_arch_supported(device);
- CHECK_SUCCESS_AS_EXPECTED(status);
-
common_status = CONTROL_PROTOCOL__pack_get_extended_device_information_request(&request, &request_size, device.get_control_sequence());
status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE;
CHECK_SUCCESS_AS_EXPECTED(status);
CHECK_SUCCESS_AS_EXPECTED(status);
/* Parse response */
- status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload, &request);
+ status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload, &request, device);
CHECK_SUCCESS_AS_EXPECTED(status);
return std::move(*(CONTROL_PROTOCOL__get_extended_device_information_response_t *)(payload->parameters));
CONTROL_PROTOCOL__payload_t *payload = NULL;
CONTROL_PROTOCOL__get_health_information_response_t *get_health_information_response = NULL;
- /* Validate arguments */
-
- /* Validate arch */
- status = Control::validate_arch_supported(device);
- CHECK_SUCCESS_AS_EXPECTED(status);
-
common_status = CONTROL_PROTOCOL__pack_get_health_information_request(&request, &request_size, device.get_control_sequence());
status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE;
CHECK_SUCCESS_AS_EXPECTED(status);
CHECK_SUCCESS_AS_EXPECTED(status);
/* Parse response */
- status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload, &request);
+ status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload, &request,
+ device);
CHECK_SUCCESS_AS_EXPECTED(status);
get_health_information_response = (CONTROL_PROTOCOL__get_health_information_response_t *)(payload->parameters);
/* Parse response */
status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload,
- &request);
+ &request, device);
if (HAILO_SUCCESS != status) {
goto exit;
}
/* Parse response */
status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload,
- &request);
+ &request, device);
if (HAILO_SUCCESS != status) {
goto exit;
}
/* Parse response */
status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload,
- &request);
+ &request, device);
if (HAILO_SUCCESS != status) {
goto exit;
}
/* Parse response */
status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload,
- &request);
+ &request, device);
CHECK_SUCCESS(status);
return HAILO_SUCCESS;
CONTROL_PROTOCOL__response_header_t *header = NULL;
CONTROL_PROTOCOL__payload_t *payload = NULL;
- /* Validate arch */
- auto status = Control::validate_arch_supported(device);
- CHECK_SUCCESS(status);
-
auto common_status = CONTROL_PROTOCOL__pack_run_bist_test_request(
&request, &request_size, device.get_control_sequence(),
is_top_test, top_bypass_bitmap, cluster_index, cluster_bypass_bitmap_0, cluster_bypass_bitmap_1);
- status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE;
+ auto status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE;
CHECK_SUCCESS(status);
status = device.fw_interact((uint8_t*)(&request), request_size, (uint8_t*)&response_buffer, &response_size);
/* Parse response */
status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload,
- &request);
+ &request, device);
CHECK_SUCCESS(status);
return HAILO_SUCCESS;
CONTROL_PROTOCOL__response_header_t *header = NULL;
CONTROL_PROTOCOL__payload_t *payload = NULL;
- /* Validate arch */
- auto status = Control::validate_arch_supported(device);
- CHECK_SUCCESS(status);
-
auto common_status = CONTROL_PROTOCOL__pack_set_sleep_state_request(
&request, &request_size, device.get_control_sequence(), static_cast<uint8_t>(sleep_state));
- status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE;
+ auto status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE;
CHECK_SUCCESS(status);
status = device.fw_interact((uint8_t*)(&request), request_size, (uint8_t*)&response_buffer, &response_size);
/* Parse response */
status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload,
- &request);
+ &request, device);
CHECK_SUCCESS(status);
return HAILO_SUCCESS;
}
hailo_status Control::change_hw_infer_status(Device &device, CONTROL_PROTOCOL__hw_infer_state_t state,
- uint8_t network_group_index, uint16_t dynamic_batch_size,
+ uint8_t network_group_index, uint16_t dynamic_batch_size, uint16_t batch_count,
CONTROL_PROTOCOL__hw_infer_channels_info_t *channels_info, CONTROL_PROTOCOL__hw_only_infer_results_t *results)
{
CONTROL_PROTOCOL__request_t request = {};
auto common_status = CONTROL_PROTOCOL__pack_change_hw_infer_status_request(
&request, &request_size, device.get_control_sequence(), static_cast<uint8_t>(state),
- network_group_index, dynamic_batch_size, channels_info);
+ network_group_index, dynamic_batch_size, batch_count, channels_info);
auto status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE;
CHECK_SUCCESS(status);
/* Parse response */
status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload,
- &request);
+ &request, device);
CHECK_SUCCESS(status);
change_hw_infer_status_response = (CONTROL_PROTOCOL__change_hw_infer_status_response_t *)(payload->parameters);
}
hailo_status Control::start_hw_only_infer(Device &device, uint8_t network_group_index, uint16_t dynamic_batch_size,
- CONTROL_PROTOCOL__hw_infer_channels_info_t *channels_info)
+ uint16_t batch_count, CONTROL_PROTOCOL__hw_infer_channels_info_t *channels_info)
{
CONTROL_PROTOCOL__hw_only_infer_results_t results = {};
return Control::change_hw_infer_status(device, CONTROL_PROTOCOL__HW_INFER_STATE_START,
- network_group_index, dynamic_batch_size, channels_info ,&results);
+ network_group_index, dynamic_batch_size, batch_count, channels_info ,&results);
}
hailo_status Control::stop_hw_only_infer(Device &device, CONTROL_PROTOCOL__hw_only_infer_results_t *results)
{
const uint8_t DEFAULT_NETWORK_GROUP = 0;
const uint16_t DEFAULT_DYNAMIC_BATCH_SIZE = 1;
+ const uint16_t DEFAULT_BATCH_COUNT = 1;
CONTROL_PROTOCOL__hw_infer_channels_info_t channels_info_default = {};
return Control::change_hw_infer_status(device, CONTROL_PROTOCOL__HW_INFER_STATE_STOP,
- DEFAULT_NETWORK_GROUP, DEFAULT_DYNAMIC_BATCH_SIZE, &channels_info_default, results);
+ DEFAULT_NETWORK_GROUP, DEFAULT_DYNAMIC_BATCH_SIZE, DEFAULT_BATCH_COUNT, &channels_info_default, results);
}
} /* namespace hailort */
static hailo_status parse_and_validate_response(uint8_t *message, uint32_t message_size,
CONTROL_PROTOCOL__response_header_t **header, CONTROL_PROTOCOL__payload_t **payload,
- CONTROL_PROTOCOL__request_t *request);
+ CONTROL_PROTOCOL__request_t *request, Device &device);
/**
* Receive information about the device.
* Enable core-op
*
* @param[in] device - The Hailo device.
- * @param[in] core_op_index - core_op index
+ * @param[in] core_op_index - core_op index
+ * @param[in] dynamic_batch_size - actual batch size
+ * @param[in] batch_count - number of batches user wish to run on hailo chip
*
* @return Upon success, returns @a HAILO_SUCCESS. Otherwise, returns an @a static hailo_status error.
*/
- static hailo_status enable_core_op(Device &device, uint8_t core_op_index, uint16_t dynamic_batch_size);
+ static hailo_status enable_core_op(Device &device, uint8_t core_op_index, uint16_t dynamic_batch_size,
+ uint16_t batch_count);
/**
* reset context switch state machine
*
static Expected<CONTROL_PROTOCOL__hw_consts_t> get_hw_consts(Device &device);
static hailo_status set_sleep_state(Device &device, hailo_sleep_state_t sleep_state);
static hailo_status change_hw_infer_status(Device &device, CONTROL_PROTOCOL__hw_infer_state_t state,
- uint8_t network_group_index, uint16_t dynamic_batch_size,
+ uint8_t network_group_index, uint16_t dynamic_batch_size, uint16_t batch_count,
CONTROL_PROTOCOL__hw_infer_channels_info_t *channels_info, CONTROL_PROTOCOL__hw_only_infer_results_t *results);
static hailo_status start_hw_only_infer(Device &device, uint8_t network_group_index, uint16_t dynamic_batch_size,
- CONTROL_PROTOCOL__hw_infer_channels_info_t *channels_info);
+ uint16_t batch_count, CONTROL_PROTOCOL__hw_infer_channels_info_t *channels_info);
static hailo_status stop_hw_only_infer(Device &device, CONTROL_PROTOCOL__hw_only_infer_results_t *results);
// TODO: needed?
static hailo_status power_measurement(Device &device, CONTROL_PROTOCOL__dvm_options_t dvm,
bool *is_action_list_end, uint32_t *batch_counter);
static hailo_status context_switch_set_context_info_chunk(Device &device,
const CONTROL_PROTOCOL__context_switch_context_info_single_control_t &context_info);
- static hailo_status change_context_switch_status(Device &device,
+ static hailo_status change_context_switch_status(Device &device,
CONTROL_PROTOCOL__CONTEXT_SWITCH_STATUS_t state_machine_status,
- uint8_t network_group_index, uint16_t dynamic_batch_size, bool keep_nn_config_during_reset);
+ uint8_t network_group_index, uint16_t dynamic_batch_size, uint16_t batch_count,
+ bool keep_nn_config_during_reset = false);
static Expected<CONTROL_PROTOCOL__get_extended_device_information_response_t> get_extended_device_info_response(Device &device);
- static hailo_status validate_arch_supported(Device &device, const std::vector<hailo_device_architecture_t> &supported_archs = { HAILO_ARCH_HAILO8, HAILO_ARCH_HAILO8L });
};
} /* namespace hailort */
return CONTROL_PROTOCOL__textual_format[opcode];
}
-#define CHANGE_HW_INFER_REQUEST_PARAMETER_COUNT (4)
+#define CHANGE_HW_INFER_REQUEST_PARAMETER_COUNT (5)
/* Functions declarations */
HAILO_COMMON_STATUS_t control_protocol__parse_message(uint8_t *message,
return status;
}
+#define CONTEXT_SWITCH_SWITCH_STATUS_REQUEST_PARAMS (5)
HAILO_COMMON_STATUS_t CONTROL_PROTOCOL__pack_change_context_switch_status_request(
CONTROL_PROTOCOL__request_t *request, size_t *request_size, uint32_t sequence,
CONTROL_PROTOCOL__CONTEXT_SWITCH_STATUS_t state_machine_status, uint8_t application_index,
- uint16_t dynamic_batch_size, bool keep_nn_config_during_reset)
+ uint16_t dynamic_batch_size, uint16_t batch_count, bool keep_nn_config_during_reset)
{
HAILO_COMMON_STATUS_t status = HAILO_COMMON_STATUS__UNINITIALIZED;
size_t local_request_size = 0;
/* Header */
local_request_size = CONTROL_PROTOCOL__REQUEST_BASE_SIZE +
sizeof(CONTROL_PROTOCOL__change_context_switch_status_request_t);
- control_protocol__pack_request_header(request, sequence, HAILO_CONTROL_OPCODE_CHANGE_CONTEXT_SWITCH_STATUS, 4);
+ control_protocol__pack_request_header(request, sequence,
+ HAILO_CONTROL_OPCODE_CHANGE_CONTEXT_SWITCH_STATUS, CONTEXT_SWITCH_SWITCH_STATUS_REQUEST_PARAMS);
/* state_machine_status */
request->parameters.change_context_switch_status_request.state_machine_status_length =
request->parameters.change_context_switch_status_request.dynamic_batch_size_length =
BYTE_ORDER__htonl(sizeof(request->parameters.change_context_switch_status_request.dynamic_batch_size));
request->parameters.change_context_switch_status_request.dynamic_batch_size = dynamic_batch_size;
-
- /* dynamic_batch_size */
+
+ /* batch_count */
+ request->parameters.change_context_switch_status_request.batch_count_length =
+ BYTE_ORDER__htonl(sizeof(request->parameters.change_context_switch_status_request.batch_count));
+ request->parameters.change_context_switch_status_request.batch_count = batch_count;
+
+ /* keep_nn_config_during_reset */
request->parameters.change_context_switch_status_request.keep_nn_config_during_reset_length =
BYTE_ORDER__htonl(sizeof(request->parameters.change_context_switch_status_request.keep_nn_config_during_reset));
request->parameters.change_context_switch_status_request.keep_nn_config_during_reset = keep_nn_config_during_reset;
HAILO_COMMON_STATUS_t CONTROL_PROTOCOL__pack_change_hw_infer_status_request(
CONTROL_PROTOCOL__request_t *request, size_t *request_size, uint32_t sequence,
uint8_t hw_infer_state, uint8_t network_group_index, uint16_t dynamic_batch_size,
- CONTROL_PROTOCOL__hw_infer_channels_info_t *channels_info)
+ uint16_t batch_count, CONTROL_PROTOCOL__hw_infer_channels_info_t *channels_info)
{
HAILO_COMMON_STATUS_t status = HAILO_COMMON_STATUS__UNINITIALIZED;
size_t local_request_size = 0;
BYTE_ORDER__htonl(sizeof(request->parameters.change_hw_infer_status_request.dynamic_batch_size));
request->parameters.change_hw_infer_status_request.dynamic_batch_size = dynamic_batch_size;
+ /* batch_count */
+ request->parameters.change_hw_infer_status_request.batch_count_length =
+ BYTE_ORDER__htonl(sizeof(request->parameters.change_hw_infer_status_request.batch_count));
+ request->parameters.change_hw_infer_status_request.batch_count = batch_count;
+
/* channels_info */
request->parameters.change_hw_infer_status_request.channels_info_length =
BYTE_ORDER__htonl(sizeof(request->parameters.change_hw_infer_status_request.channels_info));
HAILO_COMMON_STATUS_t CONTROL_PROTOCOL__pack_change_context_switch_status_request(
CONTROL_PROTOCOL__request_t *request, size_t *request_size, uint32_t sequence,
CONTROL_PROTOCOL__CONTEXT_SWITCH_STATUS_t state_machine_status, uint8_t application_index,
- uint16_t dynamic_batch_size, bool keep_nn_config_during_reset);
+ uint16_t dynamic_batch_size, uint16_t batch_count, bool keep_nn_config_during_reset);
HAILO_COMMON_STATUS_t CONTROL_PROTOCOL__pack_wd_enable(
CONTROL_PROTOCOL__request_t *request,
size_t *request_size,
HAILO_COMMON_STATUS_t CONTROL_PROTOCOL__pack_set_sleep_state_request(CONTROL_PROTOCOL__request_t *request, size_t *request_size, uint32_t sequence, uint8_t sleep_state);
HAILO_COMMON_STATUS_t CONTROL_PROTOCOL__pack_change_hw_infer_status_request(CONTROL_PROTOCOL__request_t *request,
size_t *request_size, uint32_t sequence, uint8_t hw_infer_state, uint8_t network_group_index,
- uint16_t dynamic_batch_size, CONTROL_PROTOCOL__hw_infer_channels_info_t *channels_info);
+ uint16_t dynamic_batch_size, uint16_t batch_count, CONTROL_PROTOCOL__hw_infer_channels_info_t *channels_info);
#endif /* _CONTROL_PROTOCOL_HPP_ */
\ No newline at end of file
static HAILO_COMMON_STATUS_t D2H_EVENTS__parse_health_monitor_cpu_ecc_fatal_notification(D2H_EVENT_MESSAGE_t *d2h_notification_message);
static HAILO_COMMON_STATUS_t D2H_EVENTS__parse_context_switch_breakpoint_reached(D2H_EVENT_MESSAGE_t *d2h_notification_message);
static HAILO_COMMON_STATUS_t D2H_EVENTS__parse_health_monitor_clock_changed_event_notification(D2H_EVENT_MESSAGE_t *d2h_notification_message);
+static HAILO_COMMON_STATUS_t D2H_EVENTS__parse_hw_infer_manager_infer_done_notification(D2H_EVENT_MESSAGE_t *d2h_notification_message);
/**********************************************************************
* Globals
D2H_EVENTS__parse_health_monitor_cpu_ecc_error_notification,
D2H_EVENTS__parse_health_monitor_cpu_ecc_fatal_notification,
D2H_EVENTS__parse_context_switch_breakpoint_reached,
- D2H_EVENTS__parse_health_monitor_clock_changed_event_notification
+ D2H_EVENTS__parse_health_monitor_clock_changed_event_notification,
+ D2H_EVENTS__parse_hw_infer_manager_infer_done_notification
};
/**********************************************************************
* Internal Functions
return status;
}
+static HAILO_COMMON_STATUS_t D2H_EVENTS__parse_hw_infer_manager_infer_done_notification(D2H_EVENT_MESSAGE_t *d2h_notification_message)
+{
+ HAILO_COMMON_STATUS_t status = HAILO_COMMON_STATUS__UNINITIALIZED;
+
+ if (D2H_EVENT_HW_INFER_MANAGER_INFER_DONE_PARAMETER_COUNT != d2h_notification_message->header.parameter_count) {
+ LOGGER__ERROR("d2h notification invalid parameter count: {}", d2h_notification_message->header.parameter_count);
+ status = HAILO_STATUS__D2H_EVENTS__INCORRECT_PARAMETER_COUNT;
+ goto l_exit;
+ }
+
+ LOGGER__INFO("Got hw infer done notification - Infer took {} cycles",
+ d2h_notification_message->message_parameters.hw_infer_manager_infer_done_event.infer_cycles);
+
+ status = HAILO_COMMON_STATUS__SUCCESS;
+
+l_exit:
+ return status;
+}
+
static HAILO_COMMON_STATUS_t D2H_EVENTS__parse_health_monitor_closed_streams_notification(D2H_EVENT_MESSAGE_t *d2h_notification_message)
{
HAILO_COMMON_STATUS_t status = HAILO_COMMON_STATUS__UNINITIALIZED;
{
auto device_ids = scan();
CHECK_EXPECTED(device_ids, "Failed scan devices");
- CHECK_AS_EXPECTED(device_ids->size() == 1, HAILO_INVALID_OPERATION,
- "Expected only 1 device on the system (found {}). Pass device_id to create a specific device", device_ids->size());
+ CHECK_AS_EXPECTED(device_ids->size() >= 1, HAILO_INVALID_OPERATION, "There is no hailo device on the system");
+ // Choose the first device.
return Device::create(device_ids->at(0));
}
return device;
}
+Expected<std::unique_ptr<Device>> Device::create_eth(const std::string &device_address, uint16_t port,
+ uint32_t timeout_milliseconds, uint8_t max_number_of_attempts)
+{
+ /* Validate address length */
+ CHECK_AS_EXPECTED(INET_ADDRSTRLEN >= device_address.size(),
+ HAILO_INVALID_ARGUMENT, "device_address is too long");
+
+ hailo_eth_device_info_t device_info = {};
+ device_info.host_address.sin_family = AF_INET;
+ device_info.host_address.sin_port = HAILO_ETH_PORT_ANY;
+ auto status = Socket::pton(AF_INET, HAILO_ETH_ADDRESS_ANY, &(device_info.host_address.sin_addr));
+ CHECK_SUCCESS_AS_EXPECTED(status);
+
+ device_info.device_address.sin_family = AF_INET;
+ device_info.device_address.sin_port = port;
+ status = Socket::pton(AF_INET, device_address.c_str(), &(device_info.device_address.sin_addr));
+ CHECK_SUCCESS_AS_EXPECTED(status);
+
+ device_info.timeout_millis = timeout_milliseconds;
+ device_info.max_number_of_attempts = max_number_of_attempts;
+ device_info.max_payload_size = HAILO_DEFAULT_ETH_MAX_PAYLOAD_SIZE;
+
+ return create_eth(device_info);
+}
+
Expected<hailo_pcie_device_info_t> Device::parse_pcie_device_info(const std::string &device_info_str)
{
const bool LOG_ON_FAILURE = true;
}
}
+bool Device::device_ids_equal(const std::string &first, const std::string &second)
+{
+ const bool DONT_LOG_ON_FAILURE = false;
+ if (IntegratedDevice::DEVICE_ID == first) {
+ // On integrated devices device all ids should be the same
+ return first == second;
+ } else if (auto first_pcie_info = PcieDevice::parse_pcie_device_info(first, DONT_LOG_ON_FAILURE)) {
+ auto second_pcie_info = PcieDevice::parse_pcie_device_info(second, DONT_LOG_ON_FAILURE);
+ if (!second_pcie_info) {
+ // second is not pcie
+ return false;
+ }
+ return PcieDevice::pcie_device_infos_equal(*first_pcie_info, *second_pcie_info);
+ } else if (auto eth_info = EthernetDevice::parse_eth_device_info(first, DONT_LOG_ON_FAILURE)) {
+ // On ethernet devices, device ids should e equal
+ return first == second;
+ } else {
+ // first device does not match.
+ return false;
+ }
+}
+
uint32_t Device::get_control_sequence()
{
return m_control_sequence;
continue;
}
- LOGGER__INFO("[{}] Got notification from fw with id: {}", device_id, hailo_notification_id);
-
std::shared_ptr<NotificationCallback> callback_func = nullptr;
void *callback_opaque = nullptr;
{
case HEALTH_MONITOR_CLOCK_CHANGED_EVENT_ID:
*hailo_notification_id = HAILO_NOTIFICATION_ID_HEALTH_MONITOR_CLOCK_CHANGED_EVENT;
break;
+ case HW_INFER_MANAGER_INFER_DONE:
+ *hailo_notification_id = HAILO_NOTIFICATION_ID_HW_INFER_MANAGER_INFER_DONE;
+ break;
default:
status = HAILO_INVALID_ARGUMENT;
goto l_exit;
virtual hailo_status erase_user_config() override;
static hailo_device_architecture_t hef_arch_to_device_arch(ProtoHEFHwArch hef_arch);
+ virtual Expected<hailo_device_architecture_t> get_architecture() const override
+ {
+ // FW is always up if we got here (device implementations's ctor would fail otherwise)
+ // Hence, just return it
+ return Expected<hailo_device_architecture_t>(m_device_architecture);
+ }
+
+
protected:
struct NotificationThreadSharedParams {
NotificationThreadSharedParams() : is_running(false) {}
CHECK_SUCCESS(status);
/* Parse and validate the response */
- return Control::parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload, &request);
+ return Control::parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload, &request,
+ *this);
}
Expected<std::unique_ptr<EthernetDevice>> EthernetDevice::create(const hailo_eth_device_info_t &device_info)
std::chrono::milliseconds timeout)
{
// Convert interface name to IP address
- std::array<char, IPV4_STRING_MAX_LENGTH> interface_ip_address{};
- auto status = EthernetUtils::get_ip_from_interface(interface_name.c_str(), interface_ip_address.data(), interface_ip_address.size());
- CHECK_SUCCESS_AS_EXPECTED(status);
+ auto interface_ip_address = EthernetUtils::get_ip_from_interface(interface_name);
+ CHECK_EXPECTED(interface_ip_address);
- return scan_by_host_address(interface_ip_address.data(), timeout);
+ return scan_by_host_address(*interface_ip_address, timeout);
}
hailo_status get_udp_broadcast_params(const char *host_address, struct in_addr &interface_ip_address,
// TODO: fix logic with respect to is_expecting_response
if (0 != response_size) {
status = Control::parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header,
- &payload, &request);
+ &payload, &request, *this);
CHECK_SUCCESS(status);
CHECK(is_expecting_response, HAILO_INTERNAL_FAILURE,
"Recived valid response from FW for control who is not expecting one.");
return HAILO_SUCCESS;
}
-Expected<hailo_device_architecture_t> EthernetDevice::get_architecture() const
-{
- // FW is always up if we got here (EthernetDevice's ctor would fail otherwise)
- // Hence, just return it
- return Expected<hailo_device_architecture_t>(m_device_architecture);
-}
-
hailo_eth_device_info_t EthernetDevice::get_device_info() const
{
return m_device_info;
auto core_op_metadata = hef.pimpl->get_core_op_metadata(network_group_name);
CHECK_EXPECTED(core_op_metadata);
+ auto core_op_metadata_ptr = core_op_metadata.release();
- auto core_op_metadata_ptr = make_shared_nothrow<CoreOpMetadata>(core_op_metadata.release());
- CHECK_AS_EXPECTED(nullptr != core_op_metadata_ptr, HAILO_OUT_OF_HOST_MEMORY);
-
- auto net_flow_ops = hef.pimpl->post_process_ops(core_op_metadata_ptr->core_op_name());
+ auto metadata = hef.pimpl->network_group_metadata(core_op_metadata_ptr->core_op_name());
auto status = HAILO_UNINITIALIZED;
auto single_context_app = HcpConfigCoreOp(*this, m_active_core_op_holder, net_group_config.release(),
m_core_ops.push_back(core_op_ptr);
core_ops_ptrs.push_back(core_op_ptr);
- auto net_group_expected = ConfiguredNetworkGroupBase::create(config_params, std::move(core_ops_ptrs), std::move(net_flow_ops));
+ auto net_group_expected = ConfiguredNetworkGroupBase::create(config_params, std::move(core_ops_ptrs), std::move(metadata));
CHECK_EXPECTED(net_group_expected);
auto net_group_ptr = net_group_expected.release();
static Expected<std::unique_ptr<EthernetDevice>> create(const hailo_eth_device_info_t &device_info);
static Expected<std::unique_ptr<EthernetDevice>> create(const std::string &ip_addr);
- virtual Expected<hailo_device_architecture_t> get_architecture() const override;
hailo_eth_device_info_t get_device_info() const;
virtual const char* get_dev_id() const override;
return size;
}
-hailo_status EthernetInputStream::sync_write_all_raw_buffer_no_transform_impl(void *buffer, size_t offset, size_t size)
+hailo_status EthernetInputStream::write_impl(const MemoryView &buffer)
{
hailo_status status = HAILO_UNINITIALIZED;
- ASSERT(NULL != buffer);
-
- CHECK(size >= MIN_UDP_PAYLOAD_SIZE, HAILO_INVALID_ARGUMENT, "Input must be larger than {}", MIN_UDP_PAYLOAD_SIZE);
- CHECK(((size % HailoRTCommon::HW_DATA_ALIGNMENT) == 0), HAILO_INVALID_ARGUMENT,
- "Input must be aligned to {} (got {})", HailoRTCommon::HW_DATA_ALIGNMENT, size);
+ CHECK(buffer.size() >= MIN_UDP_PAYLOAD_SIZE, HAILO_INVALID_ARGUMENT, "Input must be larger than {}", MIN_UDP_PAYLOAD_SIZE);
+ CHECK(((buffer.size() % HailoRTCommon::HW_DATA_ALIGNMENT) == 0), HAILO_INVALID_ARGUMENT,
+ "Input must be aligned to {} (got {})", HailoRTCommon::HW_DATA_ALIGNMENT, buffer.size());
+ const size_t offset = 0;
if (this->configuration.is_sync_enabled) {
- status = eth_stream__write_all_with_sync(buffer, offset, size);
+ status = eth_stream__write_all_with_sync(buffer.data(), offset, buffer.size());
} else {
- status = eth_stream__write_all_no_sync(buffer, offset, size);
+ status = eth_stream__write_all_no_sync(buffer.data(), offset, buffer.size());
}
if (HAILO_STREAM_ABORTED_BY_USER == status) {
LOGGER__INFO("eth_stream__write_all was aborted!");
return HAILO_SUCCESS;
}
-hailo_status EthernetInputStream::eth_stream__write_all_no_sync(void *buffer, size_t offset, size_t size) {
+hailo_status EthernetInputStream::eth_stream__write_all_no_sync(const void *buffer, size_t offset, size_t size) {
size_t remainder_size = 0;
size_t packet_size = this->configuration.max_payload_size;
return eth_stream__write_with_remainder(buffer, offset, size, remainder_size);
}
-hailo_status EthernetInputStream::eth_stream__write_with_remainder(void *buffer, size_t offset, size_t size, size_t remainder_size) {
+hailo_status EthernetInputStream::eth_stream__write_with_remainder(const void *buffer, size_t offset, size_t size, size_t remainder_size) {
size_t transfer_size = 0;
size_t offset_end_without_remainder = offset + size - remainder_size;
while (offset < offset_end_without_remainder) {
transfer_size = offset_end_without_remainder - offset;
- auto expected_bytes_written = sync_write_raw_buffer(MemoryView(static_cast<uint8_t*>(buffer) + offset, transfer_size));
+ auto expected_bytes_written = sync_write_raw_buffer(MemoryView::create_const(static_cast<const uint8_t*>(buffer) + offset, transfer_size));
if (HAILO_STREAM_ABORTED_BY_USER == expected_bytes_written.status()) {
LOGGER__INFO("sync_write_raw_buffer was aborted!");
return expected_bytes_written.status();
offset += expected_bytes_written.release();
}
if (0 < remainder_size) {
- auto expected_bytes_written = sync_write_raw_buffer(MemoryView(static_cast<uint8_t*>(buffer) + offset, remainder_size));
+ auto expected_bytes_written = sync_write_raw_buffer(MemoryView::create_const(static_cast<const uint8_t*>(buffer) + offset, remainder_size));
if (HAILO_STREAM_ABORTED_BY_USER == expected_bytes_written.status()) {
LOGGER__INFO("sync_write_raw_buffer was aborted!");
return expected_bytes_written.status();
token_bucket()
{}
-hailo_status TokenBucketEthernetInputStream::eth_stream__write_with_remainder(void *buffer, size_t offset, size_t size, size_t remainder_size) {
+hailo_status TokenBucketEthernetInputStream::eth_stream__write_with_remainder(const void *buffer, size_t offset, size_t size, size_t remainder_size) {
size_t transfer_size = 0;
size_t offset_end_without_remainder = offset + size - remainder_size;
(void)token_bucket.consumeWithBorrowAndWait(MAX_CONSUME_SIZE, rate_bytes_per_sec, BURST_SIZE);
transfer_size = offset_end_without_remainder - offset;
- auto expected_bytes_written = sync_write_raw_buffer(MemoryView(static_cast<uint8_t*>(buffer) + offset, transfer_size));
+ auto expected_bytes_written = sync_write_raw_buffer(MemoryView::create_const(static_cast<const uint8_t*>(buffer) + offset, transfer_size));
if (HAILO_STREAM_ABORTED_BY_USER == expected_bytes_written.status()) {
LOGGER__INFO("sync_write_raw_buffer was aborted!");
return expected_bytes_written.status();
// However, since remainder_size is modulo MAX_UDP_PAYLOAD_SIZE and BURST_SIZE == MAX_UDP_PAYLOAD_SIZE, it should be smaller.
(void)token_bucket.consumeWithBorrowAndWait(static_cast<double>(remainder_size), rate_bytes_per_sec, BURST_SIZE);
- auto expected_bytes_written = sync_write_raw_buffer(MemoryView(static_cast<uint8_t*>(buffer) + offset, remainder_size));
+ auto expected_bytes_written = sync_write_raw_buffer(MemoryView::create_const(static_cast<const uint8_t*>(buffer) + offset, remainder_size));
if (HAILO_STREAM_ABORTED_BY_USER == expected_bytes_written.status()) {
LOGGER__INFO("sync_write_raw_buffer was aborted!");
return expected_bytes_written.status();
{}
#endif
-hailo_status EthernetInputStream::eth_stream__write_all_with_sync(void *buffer, size_t offset, size_t size) {
+hailo_status EthernetInputStream::eth_stream__write_all_with_sync(const void *buffer, size_t offset, size_t size) {
hailo_status status = HAILO_UNINITIALIZED;
size_t number_of_frames = 0;
size_t frame_size = m_stream_info.hw_frame_size;
((hailo_output_sync_packet_t*)((uint8_t*)buffer + offset))->barker == BYTE_ORDER__ntohl(SYNC_PACKET_BARKER));
}
-hailo_status EthernetOutputStream::read_all(MemoryView &buffer)
+hailo_status EthernetOutputStream::read_impl(MemoryView &buffer)
{
if ((buffer.size() % HailoRTCommon::HW_DATA_ALIGNMENT) != 0) {
LOGGER__ERROR("Size must be aligned to {} (got {})", HailoRTCommon::HW_DATA_ALIGNMENT, buffer.size());
status = this->read_all_no_sync(buffer.data(), 0, buffer.size());
}
if (HAILO_STREAM_ABORTED_BY_USER == status) {
- LOGGER__INFO("read_all was aborted!");
+ LOGGER__INFO("read was aborted!");
return status;
}
CHECK_SUCCESS(status);
Device &m_device;
hailo_status eth_stream__config_input_sync_params(uint32_t frames_per_sync);
- hailo_status eth_stream__write_all_no_sync(void *buffer, size_t offset, size_t size);
- hailo_status eth_stream__write_all_with_sync(void *buffer, size_t offset, size_t size);
+ hailo_status eth_stream__write_all_no_sync(const void *buffer, size_t offset, size_t size);
+ hailo_status eth_stream__write_all_with_sync(const void *buffer, size_t offset, size_t size);
hailo_status set_timeout(std::chrono::milliseconds timeout);
void set_max_payload_size(uint16_t size);
protected:
- virtual hailo_status eth_stream__write_with_remainder(void *buffer, size_t offset, size_t size, size_t remainder_size);
- virtual Expected<size_t> sync_write_raw_buffer(const MemoryView &buffer) override;
- virtual hailo_status sync_write_all_raw_buffer_no_transform_impl(void *buffer, size_t offset, size_t size) override;
+ virtual hailo_status eth_stream__write_with_remainder(const void *buffer, size_t offset, size_t size, size_t remainder_size);
+ Expected<size_t> sync_write_raw_buffer(const MemoryView &buffer);
+ virtual hailo_status write_impl(const MemoryView &buffer) override;
public:
EthernetInputStream(Device &device, Udp &&udp, EventPtr &&core_op_activated_event, const LayerInfo &layer_info, hailo_status &status) :
static const uint32_t MAX_CONSUME_SIZE = MAX_UDP_PAYLOAD_SIZE;
protected:
- virtual hailo_status eth_stream__write_with_remainder(void *buffer, size_t offset, size_t size, size_t remainder_size);
+ virtual hailo_status eth_stream__write_with_remainder(const void *buffer, size_t offset, size_t size, size_t remainder_size) override;
public:
TokenBucketEthernetInputStream(Device &device, Udp &&udp, EventPtr &&core_op_activated_event,
Device &m_device;
EthernetOutputStream(Device &device, const LayerInfo &edge_layer, Udp &&udp, EventPtr &&core_op_activated_event, hailo_status &status) :
- OutputStreamBase(edge_layer, std::move(core_op_activated_event), status),
+ OutputStreamBase(edge_layer, HAILO_STREAM_INTERFACE_ETH, std::move(core_op_activated_event), status),
leftover_buffer(),
leftover_size(0),
// Firmware starts sending sync sequence from 0, so treating the first previous as max value (that will be overflowed to 0)
m_device(device)
{}
- hailo_status read_all(MemoryView &buffer) override;
+ hailo_status read_impl(MemoryView &buffer) override;
hailo_status read_all_with_sync(void *buffer, size_t offset, size_t size);
hailo_status read_all_no_sync(void *buffer, size_t offset, size_t size);
public:
virtual ~EthernetOutputStream();
- virtual Expected<size_t> sync_read_raw_buffer(MemoryView &buffer);
+ Expected<size_t> sync_read_raw_buffer(MemoryView &buffer);
static Expected<std::unique_ptr<EthernetOutputStream>> create(Device &device, const LayerInfo &edge_layer,
const hailo_eth_output_stream_params_t ¶ms, EventPtr core_op_activated_event);
return make_unexpected(HAILO_INVALID_OPERATION);
}
+Expected<HwInferResults> HcpConfigCoreOp::run_hw_infer_estimator()
+{
+ LOGGER__ERROR("run_hw_infer_estimator function is not supported on ETH core-ops");
+ return make_unexpected(HAILO_INVALID_OPERATION);
+}
+
hailo_status HcpConfigCoreOp::activate_impl(uint16_t dynamic_batch_size, bool resume_pending_stream_transfers)
{
m_active_core_op_holder.set(*this);
virtual hailo_status activate_impl(uint16_t dynamic_batch_size, bool resume_pending_stream_transfers) override;
virtual hailo_status deactivate_impl(bool keep_nn_config_during_reset) override;
+ virtual Expected<HwInferResults> run_hw_infer_estimator() override;
virtual ~HcpConfigCoreOp() = default;
HcpConfigCoreOp(const HcpConfigCoreOp &other) = delete;
#include "hailo/network_rate_calculator.hpp"
#include "common/utils.hpp"
+#include "common/ethernet_utils.hpp"
#include "eth/eth_stream.hpp"
return results;
}
+hailo_status NetworkUdpRateCalculator::set_rate_limit(const std::string &ip, uint16_t port, uint32_t rate_bytes_per_sec)
+{
+#if defined(__GNUC__)
+ auto tc = TrafficControlUtil::create(ip, port, rate_bytes_per_sec);
+ CHECK_EXPECTED_AS_STATUS(tc);
+ CHECK_SUCCESS(tc->set_rate_limit());
+
+ return HAILO_SUCCESS;
+#else
+ (void)ip;
+ (void)port;
+ (void)rate_bytes_per_sec;
+ LOGGER__ERROR("set_rate_limit is only supported on Unix platforms");
+ return HAILO_NOT_IMPLEMENTED;
+#endif
+}
+
+hailo_status NetworkUdpRateCalculator::reset_rate_limit(const std::string &ip, uint16_t port)
+{
+#if defined(__GNUC__)
+ auto tc = TrafficControlUtil::create(ip, port, 0);
+ CHECK_EXPECTED_AS_STATUS(tc);
+ CHECK_SUCCESS(tc->reset_rate_limit());
+
+ return HAILO_SUCCESS;
+#else
+ (void)ip;
+ (void)port;
+ LOGGER__ERROR("reset_rate_limit is only supported on Unix platforms");
+ return HAILO_NOT_IMPLEMENTED;
+#endif
+}
+
+Expected<std::string> NetworkUdpRateCalculator::get_interface_name(const std::string &ip)
+{
+ return EthernetUtils::get_interface_from_board_ip(ip);
+}
+
} /* namespace hailort */
#include "utils/shared_resource_manager.hpp"
#include "vdevice/vdevice_internal.hpp"
#include "utils/profiler/tracer_macros.hpp"
+#include "utils/exported_resource_manager.hpp"
#include <chrono>
+#include <tuple>
using namespace hailort;
+// Note: Async stream API uses BufferPtr as a param. When exporting BufferPtrs to the user via c-api, they must be
+// stored in some container, otherwise their ref count may reach zero and they will be freed, despite the
+// c-api user still using them. (shared_ptr<T> doesn't have a release method like unique_ptr<T>)
+// Singleton holding a mapping between the address of a buffer allocated/mapped via hailo_allocate_buffer/hailo_dma_map_buffer_to_device
+// to the underlying BufferPtr. When a buffer is freed via hailo_free_buffer, the BufferPtr object will be removed from
+// the storage.
+using ExportedBufferManager = ExportedResourceManager<BufferPtr, void *>;
+
+struct ThreeTupleHash {
+ template<typename T>
+ std::size_t operator()(const T& tuple) const {
+ auto hash = std::hash<typename std::tuple_element<0, T>::type>()(std::get<0>(tuple));
+ hash ^= std::hash<typename std::tuple_element<1, T>::type>()(std::get<1>(tuple));
+ hash ^= std::hash<typename std::tuple_element<2, T>::type>()(std::get<2>(tuple));
+ return hash;
+ }
+};
+
+// (buffer_addr, device_id, mapping_direction)
+using DmaMappingKey = std::tuple<void *, std::string, hailo_dma_buffer_direction_t>;
+using DmaMappingManager = ExportedResourceManager<DmaStoragePtr, DmaMappingKey, ThreeTupleHash>;
+
COMPAT__INITIALIZER(hailort__initialize_logger)
{
// Init logger singleton if compiling only HailoRT
{
CHECK_ARG_NOT_NULL(device_out);
- auto device = (device_info == nullptr) ? PcieDevice::create() : PcieDevice::create(*device_info);
+ auto device = (device_info == nullptr) ? Device::create_pcie() : Device::create_pcie(*device_info);
CHECK_EXPECTED_AS_STATUS(device, "Failed creating pcie device");
*device_out = reinterpret_cast<hailo_device>(device.release().release());
return HAILO_SUCCESS;
}
+void fill_cfg_params_struct_by_class(const std::string &network_group_name, const ConfigureNetworkParams &class_in, hailo_configure_network_group_params_t *struct_out)
+{
+ strncpy(struct_out->name, network_group_name.c_str(), network_group_name.size() + 1);
+ struct_out->batch_size = class_in.batch_size;
+ struct_out->power_mode = class_in.power_mode;
+ struct_out->latency = class_in.latency;
+
+ int i = 0;
+ for (auto & pair: class_in.network_params_by_name) {
+ strncpy(struct_out->network_params_by_name[i].name, pair.first.c_str(), pair.first.length() + 1);
+ struct_out->network_params_by_name[i].network_params = pair.second;
+ i++;
+ }
+ struct_out->network_params_by_name_count = class_in.network_params_by_name.size();
+
+ i = 0;
+ for (auto & pair: class_in.stream_params_by_name) {
+ strncpy(struct_out->stream_params_by_name[i].name, pair.first.c_str(), pair.first.length() + 1);
+ struct_out->stream_params_by_name[i].stream_params = pair.second;
+ i++;
+ }
+ struct_out->stream_params_by_name_count = class_in.stream_params_by_name.size();
+}
+
+hailo_status hailo_init_configure_params_by_vdevice(hailo_hef hef, hailo_vdevice vdevice,
+ hailo_configure_params_t *params)
+{
+ CHECK_ARG_NOT_NULL(hef);
+ CHECK_ARG_NOT_NULL(vdevice);
+ CHECK_ARG_NOT_NULL(params);
+
+ auto configure_params = (reinterpret_cast<VDevice*>(vdevice))->create_configure_params(*reinterpret_cast<Hef*>(hef));
+ CHECK_EXPECTED_AS_STATUS(configure_params);
+
+ params->network_group_params_count = configure_params->size();
+ uint8_t net_group = 0;
+ for (auto &cfg_params : configure_params.value()) {
+ fill_cfg_params_struct_by_class(cfg_params.first, cfg_params.second, &(params->network_group_params[net_group]));
+ net_group++;
+ }
+
+ return HAILO_SUCCESS;
+}
+
+hailo_status hailo_init_configure_params_by_device(hailo_hef hef, hailo_device device,
+ hailo_configure_params_t *params)
+{
+ CHECK_ARG_NOT_NULL(hef);
+ CHECK_ARG_NOT_NULL(device);
+ CHECK_ARG_NOT_NULL(params);
+
+ auto configure_params = (reinterpret_cast<Device*>(device))->create_configure_params(*reinterpret_cast<Hef*>(hef));
+ CHECK_EXPECTED_AS_STATUS(configure_params);
+
+ params->network_group_params_count = configure_params->size();
+ uint8_t net_group = 0;
+ for (auto &cfg_params : configure_params.value()) {
+ fill_cfg_params_struct_by_class(cfg_params.first, cfg_params.second, &(params->network_group_params[net_group]));
+ net_group++;
+ }
+
+ return HAILO_SUCCESS;
+}
+
+
hailo_status hailo_init_configure_params_mipi_input(hailo_hef hef, hailo_stream_interface_t output_interface,
hailo_mipi_input_stream_params_t *mipi_params, hailo_configure_params_t *params)
{
return (reinterpret_cast<ConfiguredNetworkGroup*>(configured_network_group))->set_scheduler_priority(priority, network_name_str);
}
+hailo_status hailo_allocate_buffer(size_t size, const hailo_buffer_parameters_t *allocation_params, void **buffer_out)
+{
+ CHECK_ARG_NOT_NULL(allocation_params);
+ CHECK_ARG_NOT_NULL(buffer_out);
+ CHECK(0 != size, HAILO_INVALID_ARGUMENT, "Buffer size must be greater than zero");
+
+ auto buffer_storage_params = BufferStorageParams::create(*allocation_params);
+ CHECK_EXPECTED_AS_STATUS(buffer_storage_params);
+
+ // Create buffer
+ auto buffer = Buffer::create_shared(size, *buffer_storage_params);
+ CHECK_EXPECTED_AS_STATUS(buffer);
+
+ // Store the buffer in manager (otherwise it'll be freed at the end of this func)
+ const auto status = ExportedBufferManager::register_resource(*buffer, buffer->get()->data());
+ CHECK_SUCCESS(status);
+
+ *buffer_out = buffer->get()->data();
+
+ return HAILO_SUCCESS;
+}
+
+hailo_status hailo_free_buffer(void *buffer)
+{
+ CHECK_ARG_NOT_NULL(buffer);
+ return ExportedBufferManager::unregister_resource(buffer);
+}
+
+static Expected<DmaMappingKey> get_mapping_key(void *buffer, hailo_device device, hailo_dma_buffer_direction_t direction)
+{
+ hailo_device_id_t device_id{};
+ auto status = hailo_get_device_id(device, &device_id);
+ CHECK_SUCCESS_AS_EXPECTED(status);
+
+ return std::make_tuple(buffer, std::string(device_id.id), direction);
+}
+
+// TODO: hailo_dma_map_buffer_to_device/hailo_dma_unmap_buffer_from_device aren't thread safe when crossed with
+// hailo_allocate_buffer/hailo_free_buffer (HRT-10669)
+hailo_status hailo_dma_map_buffer_to_device(void *buffer, size_t size, hailo_device device, hailo_dma_buffer_direction_t direction)
+{
+ CHECK_ARG_NOT_NULL(buffer);
+ CHECK_ARG_NOT_NULL(device);
+
+ auto hailort_allocated_buffer = ExportedBufferManager::get_resource(buffer);
+ if (hailort_allocated_buffer) {
+ // TODO: this will change here HRT-10983
+ // The buffer has been allocated by hailort
+ // The mapping is held by the Buffer object
+ auto mapping_result = hailort_allocated_buffer->get()->storage().dma_map(*reinterpret_cast<Device*>(device), direction);
+ CHECK_EXPECTED_AS_STATUS(mapping_result);
+ const auto new_mapping = mapping_result.value();
+
+ if (!new_mapping) {
+ return HAILO_DMA_MAPPING_ALREADY_EXISTS;
+ }
+ } else {
+ // The buffer has been allocated by the user
+ // Create dma storage
+ auto dma_mapped_buffer = DmaStorage::create_from_user_address(buffer, size, direction, *reinterpret_cast<Device*>(device));
+ CHECK_EXPECTED_AS_STATUS(dma_mapped_buffer);
+ assert(buffer == dma_mapped_buffer.value()->user_address());
+ auto dma_mapped_buffer_ptr = dma_mapped_buffer.release();
+
+ // Store the mapping in manager (otherwise it'll be freed at the end of this func)
+ auto key = get_mapping_key(dma_mapped_buffer_ptr->user_address(), device, direction);
+ CHECK_EXPECTED_AS_STATUS(key);
+ const auto status = DmaMappingManager::register_resource(dma_mapped_buffer_ptr, key.release());
+ if (HAILO_INVALID_ARGUMENT == status) {
+ // TODO: This will change once we allow mapping the same buffer in different directions (HRT-10656).
+ // Checking that the mapping exists will need to be at DmaStorage's level
+ return HAILO_DMA_MAPPING_ALREADY_EXISTS;
+ }
+ CHECK_SUCCESS(status);
+ }
+
+ return HAILO_SUCCESS;
+}
+
+hailo_status hailo_dma_unmap_buffer_from_device(void *buffer, hailo_device device, hailo_dma_buffer_direction_t direction)
+{
+ // TODO: support mapping the same buffer in different directions (HRT-10656)
+ (void)direction;
+
+ CHECK_ARG_NOT_NULL(buffer);
+ CHECK_ARG_NOT_NULL(device);
+
+ auto hailort_allocated_buffer = ExportedBufferManager::get_resource(buffer);
+ if (hailort_allocated_buffer) {
+ // TODO: mappings get dtor'd when the Buffer object is dtor'd.
+ // We want all the mapping to be held in one place for hailort::Buffers and for user alloacted buffers
+ // so this will change (HRT-10983)
+ return HAILO_SUCCESS;
+ }
+
+ auto key = get_mapping_key(buffer, device, direction);
+ CHECK_EXPECTED_AS_STATUS(key);
+ return DmaMappingManager::unregister_resource(key.release());
+}
+
hailo_status hailo_calculate_eth_input_rate_limits(hailo_hef hef, const char *network_group_name, uint32_t fps,
hailo_rate_limit_t *rates, size_t *rates_length)
{
return HAILO_SUCCESS;
}
+hailo_status hailo_stream_wait_for_async_output_ready(hailo_output_stream stream, size_t transfer_size, uint32_t timeout_ms)
+{
+ CHECK_ARG_NOT_NULL(stream);
+ return (reinterpret_cast<OutputStream*>(stream))->wait_for_async_ready(transfer_size, std::chrono::milliseconds(timeout_ms));
+}
+
+hailo_status hailo_stream_wait_for_async_input_ready(hailo_input_stream stream, size_t transfer_size, uint32_t timeout_ms)
+{
+ CHECK_ARG_NOT_NULL(stream);
+ return (reinterpret_cast<InputStream*>(stream))->wait_for_async_ready(transfer_size, std::chrono::milliseconds(timeout_ms));
+}
+
+hailo_status hailo_output_stream_get_async_max_queue_size(hailo_output_stream stream, size_t *queue_size)
+{
+ CHECK_ARG_NOT_NULL(stream);
+ CHECK_ARG_NOT_NULL(queue_size);
+
+ auto local_queue_size = reinterpret_cast<OutputStream*>(stream)->get_async_max_queue_size();
+ CHECK_EXPECTED_AS_STATUS(local_queue_size);
+ *queue_size = local_queue_size.release();
+
+ return HAILO_SUCCESS;
+}
+
+hailo_status hailo_input_stream_get_async_max_queue_size(hailo_input_stream stream, size_t *queue_size)
+{
+ CHECK_ARG_NOT_NULL(stream);
+ CHECK_ARG_NOT_NULL(queue_size);
+
+ auto local_queue_size = reinterpret_cast<InputStream*>(stream)->get_async_max_queue_size();
+ CHECK_EXPECTED_AS_STATUS(local_queue_size);
+ *queue_size = local_queue_size.release();
+
+ return HAILO_SUCCESS;
+}
+
+static InputStream::TransferDoneCallback wrap_c_user_callback(hailo_stream_write_async_callback_t callback, void *opaque)
+{
+ return [callback, opaque](const InputStream::CompletionInfo &completion_info) {
+ hailo_stream_write_async_completion_info_t c_completion_info{};
+ c_completion_info.status = completion_info.status;
+ c_completion_info.buffer_addr = completion_info.buffer_addr;
+ c_completion_info.buffer_size = completion_info.buffer_size;
+ c_completion_info.opaque = opaque;
+ callback(&c_completion_info);
+ };
+}
+
+static OutputStream::TransferDoneCallback wrap_c_user_callback(hailo_stream_read_async_callback_t callback, void *opaque)
+{
+ return [callback, opaque](const OutputStream::CompletionInfo &completion_info) {
+ hailo_stream_read_async_completion_info_t c_completion_info{};
+ c_completion_info.status = completion_info.status;
+ c_completion_info.buffer_addr = completion_info.buffer_addr;
+ c_completion_info.buffer_size = completion_info.buffer_size;
+ c_completion_info.opaque = opaque;
+ callback(&c_completion_info);
+ };
+}
+
+hailo_status hailo_stream_read_raw_buffer_async(hailo_output_stream stream, void *buffer, size_t size,
+ hailo_stream_read_async_callback_t callback, void *opaque)
+{
+ CHECK_ARG_NOT_NULL(stream);
+ CHECK_ARG_NOT_NULL(buffer);
+ CHECK_ARG_NOT_NULL(callback);
+
+ auto buffer_ref = ExportedBufferManager::get_resource(buffer);
+ if (HAILO_NOT_FOUND == buffer_ref.status()) {
+ // User addr (buffer hasn't been allocated by hailo_allocate_buffer)
+ return (reinterpret_cast<OutputStream*>(stream))->read_async(buffer, size,
+ wrap_c_user_callback(callback, opaque));
+ }
+
+ // buffer has been allocated by hailo_allocate_buffer
+ CHECK_EXPECTED_AS_STATUS(buffer_ref);
+ auto buffer_ptr = buffer_ref->get();
+ assert(buffer_ptr != nullptr);
+ CHECK(size == buffer_ptr->size(), HAILO_INVALID_ARGUMENT);
+
+ return (reinterpret_cast<OutputStream*>(stream))->read_async(buffer_ptr,
+ wrap_c_user_callback(callback, opaque));
+}
+
+hailo_status hailo_stream_write_raw_buffer_async(hailo_input_stream stream, const void *buffer, size_t size,
+ hailo_stream_write_async_callback_t callback, void *opaque)
+{
+ CHECK_ARG_NOT_NULL(stream);
+ CHECK_ARG_NOT_NULL(buffer);
+ CHECK_ARG_NOT_NULL(callback);
+
+ auto buffer_ref = ExportedBufferManager::get_resource(const_cast<void *>(buffer));
+ if (HAILO_NOT_FOUND == buffer_ref.status()) {
+ // User addr (buffer hasn't been allocated by hailo_allocate_buffer)
+ return (reinterpret_cast<InputStream*>(stream))->write_async(buffer, size,
+ wrap_c_user_callback(callback, opaque));
+ }
+
+ // buffer has been allocated by hailo_allocate_buffer
+ CHECK_EXPECTED_AS_STATUS(buffer_ref);
+ auto buffer_ptr = buffer_ref->get();
+ assert(buffer_ptr != nullptr);
+ CHECK(size == buffer_ptr->size(), HAILO_INVALID_ARGUMENT);
+
+ return (reinterpret_cast<InputStream*>(stream))->write_async(buffer_ptr,
+ wrap_c_user_callback(callback, opaque));
+}
+
hailo_status hailo_fuse_nms_frames(const hailo_nms_fuse_input_t *nms_fuse_inputs, uint32_t inputs_count,
uint8_t *fused_buffer, size_t fused_buffer_size)
{
return HAILO_SUCCESS;
}
+hailo_status hailo_demux_by_name_raw_frame_by_output_demuxer(hailo_output_demuxer demuxer, const void *src,
+ size_t src_size, hailo_stream_raw_buffer_by_name_t *raw_buffers_by_name, size_t raw_buffers_count)
+{
+ CHECK_ARG_NOT_NULL(src);
+ CHECK_ARG_NOT_NULL(raw_buffers_by_name);
+ CHECK_ARG_NOT_NULL(demuxer);
+
+ std::map<std::string, MemoryView> raw_buffers_map;
+ for (size_t i = 0; i < raw_buffers_count; i++) {
+ raw_buffers_map.emplace(std::string(raw_buffers_by_name[i].name),
+ MemoryView(raw_buffers_by_name[i].raw_buffer.buffer, raw_buffers_by_name[i].raw_buffer.size));
+ }
+ auto src_memview = MemoryView::create_const(src, src_size);
+ auto status = reinterpret_cast<OutputDemuxer*>(demuxer)->transform_demux(src_memview, raw_buffers_map);
+ CHECK_SUCCESS(status);
+
+ return HAILO_SUCCESS;
+}
+
hailo_status hailo_get_mux_infos_by_output_demuxer(hailo_output_demuxer demuxer, hailo_stream_info_t *stream_infos,
size_t *number_of_streams)
{
hailo_format_t HailoRTDefaults::expand_auto_format(const hailo_format_t &host_format, const hailo_format_t &hw_format)
{
+ if (HAILO_FORMAT_ORDER_HAILO_NMS == hw_format.order) {
+ assert(HAILO_FORMAT_TYPE_UINT16 == hw_format.type);
+ // TODO (HRT-11082): On NMS, change meaning of auto to float
+ if (HAILO_FORMAT_TYPE_AUTO == host_format.type) {
+ LOGGER__WARNING("Received 'HAILO_FORMAT_TYPE_AUTO' for NMS output, which is currently translated as HAILO_FORMAT_TYPE_UINT16. "\
+ "Starting HailoRT version 4.15, this will change to HAILO_FORMAT_TYPE_FLOAT32");
+ }
+ }
auto host_format_copy = host_format;
if (HAILO_FORMAT_TYPE_AUTO == host_format_copy.type) {
host_format_copy.type = hw_format.type;
namespace hailort
{
-
static uint8_t pack_vdma_channel_id(const vdma::ChannelId &channel_id)
{
return static_cast<uint8_t>(channel_id.channel_index |
Expected<ContextSwitchConfigActionPtr> NoneAction::create()
{
auto result = ContextSwitchConfigActionPtr(new (std::nothrow) NoneAction());
- CHECK_AS_EXPECTED((nullptr != result), HAILO_OUT_OF_HOST_MEMORY);
+ CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY);
return result;
}
{
auto result = ContextSwitchConfigActionPtr(new (std::nothrow) ActivateConfigChannelAction(config_stream_index,
channel_id, host_buffer_info));
- CHECK_AS_EXPECTED((nullptr != result), HAILO_OUT_OF_HOST_MEMORY);
+ CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY);
return result;
}
{
auto result = ContextSwitchConfigActionPtr(new (std::nothrow) DeactivateConfigChannelAction(config_stream_index,
channel_id));
- CHECK_AS_EXPECTED((nullptr != result), HAILO_OUT_OF_HOST_MEMORY);
+ CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY);
return result;
}
"Too many ccw burst {} (must fit in uint16)", total_ccw_burst);
auto result = ContextSwitchConfigActionPtr(new (std::nothrow) WriteDataCcwAction(
std::move(data), config_stream_index, static_cast<uint16_t>(total_ccw_burst)));
- CHECK_AS_EXPECTED((nullptr != result), HAILO_OUT_OF_HOST_MEMORY);
+ CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY);
return result;
}
Expected<ContextSwitchConfigActionPtr> AddCcwBurstAction::create(uint8_t config_stream_index, uint16_t ccw_bursts)
{
auto result = ContextSwitchConfigActionPtr(new (std::nothrow) AddCcwBurstAction(config_stream_index, ccw_bursts));
- CHECK_AS_EXPECTED((nullptr != result), HAILO_OUT_OF_HOST_MEMORY);
+ CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY);
return result;
}
"On cfg with continuous mode, max descriptors size must fit in uint16_t");
auto result = ContextSwitchConfigActionPtr(new (std::nothrow) FetchCfgChannelDescriptorsAction(channel_id,
static_cast<uint16_t>(desc_count)));
- CHECK_AS_EXPECTED((nullptr != result), HAILO_OUT_OF_HOST_MEMORY);
+ CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY);
return result;
}
Expected<ContextSwitchConfigActionPtr> StartBurstCreditsTaskAction::create()
{
auto result = ContextSwitchConfigActionPtr(new (std::nothrow) StartBurstCreditsTaskAction());
- CHECK_AS_EXPECTED((nullptr != result), HAILO_OUT_OF_HOST_MEMORY);
+ CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY);
return result;
}
Expected<ContextSwitchConfigActionPtr> WaitForNetworkGroupChangeAction::create()
{
auto result = ContextSwitchConfigActionPtr(new (std::nothrow) WaitForNetworkGroupChangeAction());
- CHECK_AS_EXPECTED((nullptr != result), HAILO_OUT_OF_HOST_MEMORY);
+ CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY);
return result;
}
"Invalid repeated sub-action type (can't have sub-action with type CONTEXT_SWITCH_DEFS__ACTION_TYPE_COUNT)");
auto result = ContextSwitchConfigActionPtr(new (std::nothrow) RepeatedAction(std::move(actions)));
- CHECK_AS_EXPECTED((nullptr != result), HAILO_OUT_OF_HOST_MEMORY);
+ CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY);
return result;
}
Expected<ContextSwitchConfigActionPtr> DisableLcuAction::create(uint8_t cluster_index, uint8_t lcu_index)
{
auto result = ContextSwitchConfigActionPtr(new (std::nothrow) DisableLcuAction(cluster_index, lcu_index));
- CHECK_AS_EXPECTED((nullptr != result), HAILO_OUT_OF_HOST_MEMORY);
+ CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY);
return result;
}
Expected<ContextSwitchConfigActionPtr> WaitForLcuAction::create(uint8_t cluster_index, uint8_t lcu_index)
{
auto result = ContextSwitchConfigActionPtr(new (std::nothrow) WaitForLcuAction(cluster_index, lcu_index));
- CHECK_AS_EXPECTED((nullptr != result), HAILO_OUT_OF_HOST_MEMORY);
+ CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY);
return result;
}
(CONTEXT_SWITCH_DEFS__ENABLE_LCU_DEFAULT_KERNEL_COUNT == kernel_done_count);
auto result = ContextSwitchConfigActionPtr(new (std::nothrow) EnableLcuAction(cluster_index, lcu_index,
network_index, kernel_done_address, kernel_done_count, is_default));
- CHECK_AS_EXPECTED((nullptr != result), HAILO_OUT_OF_HOST_MEMORY);
+ CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY);
return result;
}
{
auto result = ContextSwitchConfigActionPtr(new (std::nothrow) EnableSequencerAction(cluster_index, initial_l3_cut,
initial_l3_offset, active_apu, active_ia, active_sc, active_l2, l2_offset_0, l2_offset_1));
- CHECK_AS_EXPECTED((nullptr != result), HAILO_OUT_OF_HOST_MEMORY);
+ CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY);
return result;
}
Expected<ContextSwitchConfigActionPtr> WaitForSequencerAction::create(uint8_t cluster_index)
{
auto result = ContextSwitchConfigActionPtr(new (std::nothrow) WaitForSequencerAction(cluster_index));
- CHECK_AS_EXPECTED((nullptr != result), HAILO_OUT_OF_HOST_MEMORY);
+ CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY);
return result;
}
Expected<ContextSwitchConfigActionPtr> AllowInputDataflowAction::create(uint8_t stream_index)
{
auto result = ContextSwitchConfigActionPtr(new (std::nothrow) AllowInputDataflowAction(stream_index));
- CHECK_AS_EXPECTED((nullptr != result), HAILO_OUT_OF_HOST_MEMORY);
+ CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY);
return result;
}
Expected<Buffer> AllowInputDataflowAction::serialize_params(const ContextResources &context_resources) const
{
- const auto edge_layer = context_resources.get_edge_layer_by_stream_index(m_stream_index);
+ // H2D direction because it is Input actions
+ const auto edge_layer = context_resources.get_edge_layer_by_stream_index(m_stream_index, HAILO_H2D_STREAM);
CHECK_EXPECTED(edge_layer);
CONTEXT_SWITCH_DEFS__fetch_data_action_data_t params{};
Expected<ContextSwitchConfigActionPtr> WaitForModuleConfigDoneAction::create(uint8_t module_index)
{
auto result = ContextSwitchConfigActionPtr(new (std::nothrow) WaitForModuleConfigDoneAction(module_index));
- CHECK_AS_EXPECTED((nullptr != result), HAILO_OUT_OF_HOST_MEMORY);
+ CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY);
return result;
}
{
auto result = ContextSwitchConfigActionPtr(new (std::nothrow) DdrPairInfoAction(
h2d_channel_id, d2h_channel_id, network_index, descriptors_per_frame, descs_count));
- CHECK_AS_EXPECTED((nullptr != result), HAILO_OUT_OF_HOST_MEMORY);
+ CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY);
return result;
}
Expected<ContextSwitchConfigActionPtr> StartDdrBufferingTaskAction::create()
{
auto result = ContextSwitchConfigActionPtr(new (std::nothrow) StartDdrBufferingTaskAction());
- CHECK_AS_EXPECTED((nullptr != result), HAILO_OUT_OF_HOST_MEMORY);
+ CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY);
return result;
}
Expected<ContextSwitchConfigActionPtr> ResetDdrBufferingTaskAction::create()
{
auto result = ContextSwitchConfigActionPtr(new (std::nothrow) ResetDdrBufferingTaskAction());
- CHECK_AS_EXPECTED((nullptr != result), HAILO_OUT_OF_HOST_MEMORY);
+ CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY);
return result;
}
{
auto result = ContextSwitchConfigActionPtr(new (std::nothrow) ChangeVdmaToStreamMapping(channel_id, stream_index,
is_dummy_stream));
- CHECK_AS_EXPECTED((nullptr != result), HAILO_OUT_OF_HOST_MEMORY);
+ CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY);
return result;
}
Expected<ContextSwitchConfigActionPtr> WaitOutputTransferDoneAction::create(uint8_t stream_index)
{
auto result = ContextSwitchConfigActionPtr(new (std::nothrow) WaitOutputTransferDoneAction(stream_index));
- CHECK_AS_EXPECTED((nullptr != result), HAILO_OUT_OF_HOST_MEMORY);
+ CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY);
return result;
}
Expected<Buffer> WaitOutputTransferDoneAction::serialize_params(const ContextResources &context_resources) const
{
- const auto edge_layer = context_resources.get_edge_layer_by_stream_index(m_stream_index);
+ // D2H direction because it is output action
+ const auto edge_layer = context_resources.get_edge_layer_by_stream_index(m_stream_index, HAILO_D2H_STREAM);
CHECK_EXPECTED(edge_layer);
CONTEXT_SWITCH_DEFS__vdma_dataflow_interrupt_data_t params{};
{
auto result = ContextSwitchConfigActionPtr(new (std::nothrow) OpenBoundaryInputChannelAction(channel_id,
host_buffer_info));
- CHECK_AS_EXPECTED((nullptr != result), HAILO_OUT_OF_HOST_MEMORY);
+ CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY);
return result;
}
{
auto result = ContextSwitchConfigActionPtr(new (std::nothrow) OpenBoundaryOutputChannelAction(channel_id,
host_buffer_info));
- CHECK_AS_EXPECTED((nullptr != result), HAILO_OUT_OF_HOST_MEMORY);
+ CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY);
return result;
}
{
auto result = ContextSwitchConfigActionPtr(new (std::nothrow) ActivateBoundaryInputChannelAction(channel_id,
stream_index, nn_stream_config, host_buffer_info, initial_credit_size));
- CHECK_AS_EXPECTED((nullptr != result), HAILO_OUT_OF_HOST_MEMORY);
+ CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY);
return result;
}
{
auto result = ContextSwitchConfigActionPtr(new (std::nothrow) ActivateBoundaryOutputChannelAction(channel_id,
stream_index, nn_stream_config, host_buffer_info));
- CHECK_AS_EXPECTED((nullptr != result), HAILO_OUT_OF_HOST_MEMORY);
+ CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY);
return result;
}
{
auto result = ContextSwitchConfigActionPtr(new (std::nothrow) ActivateInterContextInputChannelAction(channel_id,
stream_index, nn_stream_config, host_buffer_info, initial_credit_size));
- CHECK_AS_EXPECTED((nullptr != result), HAILO_OUT_OF_HOST_MEMORY);
+ CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY);
return result;
}
{
auto result = ContextSwitchConfigActionPtr(new (std::nothrow) ActivateInterContextOutputChannelAction(channel_id,
stream_index, network_index, nn_stream_config, host_buffer_info));
- CHECK_AS_EXPECTED((nullptr != result), HAILO_OUT_OF_HOST_MEMORY);
+ CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY);
return result;
}
{
auto result = ContextSwitchConfigActionPtr(new (std::nothrow) ActivateDdrInputChannelAction(channel_id,
stream_index, nn_stream_config, host_buffer_info, initial_credit_size, connected_d2h_channel_id));
- CHECK_AS_EXPECTED((nullptr != result), HAILO_OUT_OF_HOST_MEMORY);
+ CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY);
return result;
}
{
auto result = ContextSwitchConfigActionPtr(new (std::nothrow) ActivateDdrOutputChannelAction(channel_id,
stream_index, nn_stream_config, host_buffer_info, buffered_rows_count));
- CHECK_AS_EXPECTED((nullptr != result), HAILO_OUT_OF_HOST_MEMORY);
+ CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY);
return result;
}
edge_layer.layer_info.direction, is_inter_context,
static_cast<CONTROL_PROTOCOL__HOST_BUFFER_TYPE_t>(edge_layer.buffer_info.buffer_type),
edge_layer.layer_info.max_shmifo_size));
- CHECK_AS_EXPECTED((nullptr != result), HAILO_OUT_OF_HOST_MEMORY);
+ CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY);
return result;
}
edge_layer.layer_info.direction, is_inter_context,
static_cast<CONTROL_PROTOCOL__HOST_BUFFER_TYPE_t>(edge_layer.buffer_info.buffer_type),
edge_layer.layer_info.max_shmifo_size));
- CHECK_AS_EXPECTED((nullptr != result), HAILO_OUT_OF_HOST_MEMORY);
+ CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY);
return result;
}
Expected<ContextSwitchConfigActionPtr> WaitDmaIdleAction::create(uint8_t stream_index)
{
auto result = ContextSwitchConfigActionPtr(new (std::nothrow) WaitDmaIdleAction(stream_index));
- CHECK_AS_EXPECTED((nullptr != result), HAILO_OUT_OF_HOST_MEMORY);
+ CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY);
return result;
}
Expected<Buffer> WaitDmaIdleAction::serialize_params(const ContextResources &context_resources) const
{
- const auto edge_layer = context_resources.get_edge_layer_by_stream_index(m_stream_index);
+ // D2H direction because it is output action
+ const auto edge_layer = context_resources.get_edge_layer_by_stream_index(m_stream_index, HAILO_D2H_STREAM);
CHECK_EXPECTED(edge_layer);
CONTEXT_SWITCH_DEFS__wait_dma_idle_data_t params{};
auto result = ContextSwitchConfigActionPtr(new (std::nothrow) WaitNmsIdleAction(aggregator_index,
pred_cluster_ob_index, pred_cluster_ob_cluster_index, pred_cluster_ob_interface, succ_prepost_ob_index,
succ_prepost_ob_interface));
- CHECK_AS_EXPECTED((nullptr != result), HAILO_OUT_OF_HOST_MEMORY);
+ CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY);
return result;
}
return Buffer::create(reinterpret_cast<uint8_t*>(¶ms), sizeof(params));
}
-Expected<ContextSwitchConfigActionPtr> EnableNmsAction::create(uint8_t nms_unit_index, uint8_t network_index)
+Expected<ContextSwitchConfigActionPtr> EnableNmsAction::create(uint8_t nms_unit_index, uint8_t network_index, uint16_t number_of_classes,
+ uint16_t burst_size)
{
- auto result = ContextSwitchConfigActionPtr(new (std::nothrow) EnableNmsAction(nms_unit_index, network_index));
- CHECK_AS_EXPECTED((nullptr != result), HAILO_OUT_OF_HOST_MEMORY);
+ auto result = ContextSwitchConfigActionPtr(new (std::nothrow) EnableNmsAction(nms_unit_index, network_index, number_of_classes, burst_size));
+ CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY);
return result;
}
-EnableNmsAction::EnableNmsAction(uint8_t nms_unit_index, uint8_t network_index) :
+EnableNmsAction::EnableNmsAction(uint8_t nms_unit_index, uint8_t network_index, uint16_t number_of_classes, uint16_t burst_size) :
ContextSwitchConfigAction(ContextSwitchConfigAction::Type::EnableNms, CONTEXT_SWITCH_DEFS__ACTION_TYPE_ENABLE_NMS),
m_nms_unit_index(nms_unit_index),
- m_network_index(network_index)
+ m_network_index(network_index),
+ m_number_of_classes(number_of_classes),
+ m_burst_size(burst_size)
{}
Expected<Buffer> EnableNmsAction::serialize_params(const ContextResources &) const
CONTEXT_SWITCH_DEFS__enable_nms_action_t params{};
params.nms_unit_index = m_nms_unit_index;
params.network_index = m_network_index;
+ params.number_of_classes = m_number_of_classes;
+ params.burst_size = m_burst_size;
return Buffer::create(reinterpret_cast<uint8_t*>(¶ms), sizeof(params));
}
return true;
}
+Expected<ContextSwitchConfigActionPtr> WriteDataByTypeAction::create(uint32_t address, uint8_t data_type, uint32_t data,
+ uint8_t shift, uint32_t mask, uint8_t network_index)
+{
+ auto result = ContextSwitchConfigActionPtr(new (std::nothrow) WriteDataByTypeAction(address, data_type, data, shift, mask, network_index));
+ CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY);
+ return result;
+}
+
+WriteDataByTypeAction::WriteDataByTypeAction(uint32_t address, uint8_t data_type, uint32_t data, uint8_t shift, uint32_t mask, uint8_t network_index) :
+ ContextSwitchConfigAction(ContextSwitchConfigAction::Type::WriteDataByType, CONTEXT_SWITCH_DEFS__ACTION_TYPE_WRITE_DATA_BY_TYPE),
+ m_address(address),
+ m_data_type(data_type),
+ m_data(data),
+ m_shift(shift),
+ m_mask(mask),
+ m_network_index(network_index)
+{}
+
+Expected<Buffer> WriteDataByTypeAction::serialize_params(const ContextResources &) const
+{
+ CONTEXT_SWITCH_DEFS__write_data_by_type_action_t params{};
+ params.address = m_address;
+ params.data_type = m_data_type;
+ params.data = m_data;
+ params.shift = m_shift;
+ params.mask = m_mask;
+ params.network_index = m_network_index;
+
+ return Buffer::create(reinterpret_cast<uint8_t*>(¶ms), sizeof(params));
+}
+
+bool WriteDataByTypeAction::supports_repeated_block() const
+{
+ return false;
+}
+
+Expected<ContextSwitchConfigActionPtr> SwitchLcuBatchAction::create(uint8_t cluster_index, uint8_t lcu_index, uint8_t network_index,
+ uint32_t kernel_done_count)
+{
+ auto result = ContextSwitchConfigActionPtr(new (std::nothrow) SwitchLcuBatchAction(cluster_index, lcu_index, network_index, kernel_done_count));
+ CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY);
+ return result;
+}
+
+SwitchLcuBatchAction::SwitchLcuBatchAction(uint8_t cluster_index, uint8_t lcu_index, uint8_t network_index, uint32_t kernel_done_count) :
+ ContextSwitchConfigAction(Type::SwitchLcuBatch, CONTEXT_SWITCH_DEFS__ACTION_TYPE_SWITCH_LCU_BATCH),
+ m_cluster_index(cluster_index),
+ m_lcu_index(lcu_index),
+ m_network_index(network_index),
+ m_kernel_done_count(kernel_done_count)
+{}
+
+bool SwitchLcuBatchAction::supports_repeated_block() const
+{
+ return true;
+}
+
+Expected<Buffer> SwitchLcuBatchAction::serialize_params(const ContextResources &) const
+{
+ CONTEXT_SWITCH_DEFS__switch_lcu_batch_action_data_t params{};
+ params.packed_lcu_id = pack_lcu_id(m_cluster_index, m_lcu_index);
+ params.network_index = m_network_index;
+ params.kernel_done_count = m_kernel_done_count;
+ return Buffer::create(reinterpret_cast<uint8_t*>(¶ms), sizeof(params));
+}
+
} /* namespace hailort */
WaitDmaIdle,
WaitNmsIdle,
EnableNms,
+ WriteDataByType,
+ SwitchLcuBatch,
};
ContextSwitchConfigAction(ContextSwitchConfigAction &&) = default;
class EnableNmsAction : public ContextSwitchConfigAction
{
public:
- static Expected<ContextSwitchConfigActionPtr> create(uint8_t nms_unit_index, uint8_t network_index);
+ static Expected<ContextSwitchConfigActionPtr> create(uint8_t nms_unit_index, uint8_t network_index, uint16_t number_of_classes,
+ uint16_t burst_size);
EnableNmsAction(EnableNmsAction &&) = default;
EnableNmsAction(const EnableNmsAction &) = delete;
EnableNmsAction &operator=(EnableNmsAction &&) = delete;
virtual Expected<Buffer> serialize_params(const ContextResources &context_resources) const override;
private:
- EnableNmsAction(uint8_t nms_unit_index, uint8_t network_index);
+ EnableNmsAction(uint8_t nms_unit_index, uint8_t network_index, uint16_t number_of_classes, uint16_t burst_size);
const uint8_t m_nms_unit_index;
const uint8_t m_network_index;
+ const uint16_t m_number_of_classes;
+ const uint16_t m_burst_size;
};
+class WriteDataByTypeAction : public ContextSwitchConfigAction
+{
+public:
+ static Expected<ContextSwitchConfigActionPtr> create(uint32_t address, uint8_t data_type, uint32_t data,
+ uint8_t shift, uint32_t mask, uint8_t network_index);
+
+ virtual bool supports_repeated_block() const override;
+ virtual Expected<Buffer> serialize_params(const ContextResources &context_resources) const override;
+
+private:
+ WriteDataByTypeAction(uint32_t address, uint8_t data_type, uint32_t data, uint8_t shift, uint32_t mask, uint8_t network_index);
+
+ const uint32_t m_address;
+ const uint8_t m_data_type;
+ const uint32_t m_data;
+ const uint8_t m_shift;
+ const uint32_t m_mask;
+ const uint8_t m_network_index;
+
+};
+
+class SwitchLcuBatchAction : public ContextSwitchConfigAction
+{
+public:
+ static Expected<ContextSwitchConfigActionPtr> create(uint8_t cluster_index, uint8_t lcu_index, uint8_t network_index,
+ uint32_t kernel_done_count);
+ SwitchLcuBatchAction(SwitchLcuBatchAction &&) = default;
+ SwitchLcuBatchAction(const SwitchLcuBatchAction &) = delete;
+ SwitchLcuBatchAction &operator=(SwitchLcuBatchAction &&) = delete;
+ SwitchLcuBatchAction &operator=(const SwitchLcuBatchAction &) = delete;
+ virtual ~SwitchLcuBatchAction() = default;
+ virtual bool supports_repeated_block() const override;
+ virtual Expected<Buffer> serialize_params(const ContextResources &context_resources) const override;
+
+private:
+ SwitchLcuBatchAction(uint8_t cluster_index, uint8_t lcu_index, uint8_t network_index, uint32_t kernel_done_count);
+
+ const uint8_t m_cluster_index;
+ const uint8_t m_lcu_index;
+ const uint8_t m_network_index;
+ const uint32_t m_kernel_done_count;
+};
+
+
} /* namespace hailort */
#endif /* _HAILO_CONTEXT_SWITCH_ACTIONS_HPP_ */
**/
#include "core_op_metadata.hpp"
+#include "hef_internal.hpp"
#include <numeric>
namespace hailort
ContextMetadata &&preliminary_context,
std::vector<ContextMetadata> &&dynamic_contexts,
std::vector<ConfigChannelInfo> &&config_channels_info,
- std::vector<std::string> &&sorted_output_names,
SupportedFeatures &supported_features,
- const std::vector<std::string> &sorted_network_names)
+ std::vector<std::string> sorted_network_names)
: m_preliminary_context(std::move(preliminary_context)),
m_dynamic_contexts(std::move(dynamic_contexts)),
m_config_channels_info(std::move(config_channels_info)),
- m_core_op_name(core_op_name), m_sorted_output_names(std::move(sorted_output_names)),
- m_supported_features(supported_features), m_sorted_network_names(sorted_network_names) {}
-
-Expected<LayerInfo> CoreOpMetadata::get_layer_info_by_stream_name(const std::string &stream_name) const
-{
- for (auto layer_info : get_all_layer_infos()) {
- if (layer_info.name == stream_name) {
- return layer_info;
- }
- }
- LOGGER__ERROR("Failed to find layer with name {}", stream_name);
- return make_unexpected(HAILO_NOT_FOUND);
-}
+ m_core_op_name(core_op_name), m_supported_features(supported_features),
+ m_sorted_network_names(sorted_network_names) {}
std::vector<LayerInfo> CoreOpMetadata::get_input_layer_infos() const
{
Expected<std::vector<hailo_stream_info_t>> CoreOpMetadata::get_input_stream_infos(const std::string &network_name) const
{
- auto input_layer_infos = get_input_layer_infos(network_name);
- CHECK_EXPECTED(input_layer_infos);
-
- return convert_layer_infos_to_stream_infos(input_layer_infos.value());
+ std::vector<hailo_stream_info_t> res;
+ auto input_layers = get_input_layer_infos(network_name);
+ CHECK_EXPECTED(input_layers);
+ for (auto &layer_info : input_layers.value()) {
+ res.push_back(LayerInfoUtils::get_stream_info_from_layer_info(layer_info));
+ }
+ return res;
}
Expected<std::vector<hailo_stream_info_t>> CoreOpMetadata::get_output_stream_infos(const std::string &network_name) const
{
- auto output_layer_infos = get_output_layer_infos(network_name);
- CHECK_EXPECTED(output_layer_infos);
-
- return convert_layer_infos_to_stream_infos(output_layer_infos.value());
+ std::vector<hailo_stream_info_t> res;
+ auto output_layers = get_output_layer_infos(network_name);
+ CHECK_EXPECTED(output_layers);
+ for (auto &layer_info : output_layers.value()) {
+ res.push_back(LayerInfoUtils::get_stream_info_from_layer_info(layer_info));
+ }
+ return res;
}
Expected<std::vector<hailo_stream_info_t>> CoreOpMetadata::get_all_stream_infos(const std::string &network_name) const
return res;
}
-Expected<std::vector<hailo_vstream_info_t>> CoreOpMetadata::get_input_vstream_infos(const std::string &network_name) const
+
+size_t CoreOpMetadata::get_contexts_count()
{
- auto input_layer_infos = get_input_layer_infos(network_name);
- CHECK_EXPECTED(input_layer_infos);
+ return (m_dynamic_contexts.size() + CONTROL_PROTOCOL__CONTEXT_SWITCH_NUMBER_OF_NON_DYNAMIC_CONTEXTS);
+}
- return convert_layer_infos_to_vstream_infos(input_layer_infos.value());
+Expected<size_t> CoreOpMetadata::get_total_transfer_size()
+{
+ size_t total_transfer_size = 0;
+ for (const auto &dynamic_context : m_dynamic_contexts) {
+ auto context_size = dynamic_context.get_context_transfer_size();
+ CHECK_EXPECTED(context_size);
+ total_transfer_size += context_size.release();
+ }
+ return total_transfer_size;
}
-Expected<std::vector<hailo_vstream_info_t>> CoreOpMetadata::get_output_vstream_infos(const std::string &network_name) const
+Expected<CoreOpMetadataPtr> CoreOpMetadataPerArch::get_metadata(uint32_t partial_clusters_layout_bitmap) const
{
- std::vector<hailo_vstream_info_t> res;
- if (m_supported_features.hailo_net_flow) {
- res = m_output_vstreams_infos;
- return res;
+ if (PARTIAL_CLUSTERS_LAYOUT_IGNORE == partial_clusters_layout_bitmap) {
+ // Passing PARTIAL_CLUSTERS_LAYOUT_IGNORE is magic for getting one of the metadata
+ assert(0 != m_metadata_per_arch.size());
+ auto result = m_metadata_per_arch.begin()->second;
+ return result;
}
- auto expected_output_layer_infos = get_output_layer_infos(network_name);
- CHECK_EXPECTED(expected_output_layer_infos);
- auto output_layer_infos = expected_output_layer_infos.release();
+ if (contains(m_metadata_per_arch, partial_clusters_layout_bitmap)) {
+ auto result = m_metadata_per_arch.at(partial_clusters_layout_bitmap);
+ return result;
+ }
+ LOGGER__ERROR("CoreOpPerArch does not contain metadata for partial_clusters_layout_bitmap {}", partial_clusters_layout_bitmap);
+ return make_unexpected(HAILO_INTERNAL_FAILURE);
+}
- res = convert_layer_infos_to_vstream_infos(output_layer_infos);
+void CoreOpMetadataPerArch::add_metadata(const CoreOpMetadataPtr &metadata, uint32_t partial_clusters_layout_bitmap)
+{
+ m_metadata_per_arch[partial_clusters_layout_bitmap] = metadata;
+}
+Expected<NetworkGroupMetadata> NetworkGroupMetadata::create(const std::string &network_group_name,
+ std::map<std::string, CoreOpMetadataPerArch> &&core_ops_metadata_per_arch, std::vector<std::string> &sorted_output_names,
+ SupportedFeatures &supported_features, const std::vector<std::string> &sorted_network_names,
+ std::vector<std::shared_ptr<NetFlowElement>> &net_flow_ops)
+{
+ auto all_layers_infos = get_all_layer_infos(core_ops_metadata_per_arch);
+ CHECK_EXPECTED(all_layers_infos);
+
+ std::vector<hailo_vstream_info_t> input_vstream_infos;
+ std::vector<hailo_vstream_info_t> output_vstream_infos;
+ for (auto &layer_info : all_layers_infos.value()) {
+ if (std::any_of(net_flow_ops.begin(), net_flow_ops.end(),
+ [&layer_info](auto &op) { return contains(op->input_streams, layer_info.name); })) {
+ continue; // all output_vstream_infos that relates to the op are coming from the op itself instead of layer_infos
+ }
+ auto vstreams_info = LayerInfoUtils::get_vstream_infos_from_layer_info(layer_info);
+ if (HAILO_D2H_STREAM == layer_info.direction) {
+ // In case of fused nms layers, several LayerInfos will contain data about the same fused layer
+ for (auto &vstream_info : vstreams_info) {
+ if (!LayerInfoUtils::vstream_info_already_in_vector(output_vstream_infos, vstream_info.name)) {
+ output_vstream_infos.push_back(vstream_info);
+ }
+ }
+ } else {
+ input_vstream_infos.insert(input_vstream_infos.end(),
+ std::make_move_iterator(vstreams_info.begin()), std::make_move_iterator(vstreams_info.end()));
+ }
+ }
+ for (auto &op : net_flow_ops) {
+ output_vstream_infos.push_back(op->output_vstream_info);
+ }
+
+ // Sort vstream infos by sorted_output_names
hailo_status status = HAILO_SUCCESS;
- std::sort(res.begin(), res.end(),
- [this, &status](const auto &info1, const auto &info2)
+ std::sort(output_vstream_infos.begin(), output_vstream_infos.end(),
+ [&sorted_output_names, &status](const auto &info1, const auto &info2)
{
- const auto index1 = std::find(m_sorted_output_names.begin(), m_sorted_output_names.end(), std::string(info1.name));
- const auto index2 = std::find(m_sorted_output_names.begin(), m_sorted_output_names.end(), std::string(info2.name));
+ const auto index1 = std::find(sorted_output_names.begin(), sorted_output_names.end(), std::string(info1.name));
+ const auto index2 = std::find(sorted_output_names.begin(), sorted_output_names.end(), std::string(info2.name));
- if (m_sorted_output_names.end() == index1) {
- LOGGER__ERROR("Stream {} not found in sorted output names", info1.name);
+ if (sorted_output_names.end() == index1) {
+ LOGGER__ERROR("VStream {} not found in sorted output names", info1.name);
status = HAILO_INTERNAL_FAILURE;
return false;
}
- if (m_sorted_output_names.end() == index2) {
- LOGGER__ERROR("Stream {} not found in sorted output names", info2.name);
+ if (sorted_output_names.end() == index2) {
+ LOGGER__ERROR("VStream {} not found in sorted output names", info2.name);
status = HAILO_INTERNAL_FAILURE;
return false;
}
});
CHECK_SUCCESS_AS_EXPECTED(status);
+ return NetworkGroupMetadata(network_group_name, std::move(core_ops_metadata_per_arch), sorted_output_names, supported_features, sorted_network_names,
+ input_vstream_infos, output_vstream_infos, net_flow_ops);
+}
+
+Expected<std::vector<hailo_vstream_info_t>> NetworkGroupMetadata::get_input_vstream_infos(const std::string &network_name) const
+{
+ std::vector<hailo_vstream_info_t> res;
+ for (auto &vstream_info : m_input_vstreams_infos) {
+ if ((network_name == std::string(vstream_info.network_name)) || (network_name.empty()) || (network_name == default_network_name())) {
+ res.push_back(vstream_info);
+ }
+ }
+ CHECK_AS_EXPECTED(0 != res.size(), HAILO_NOT_FOUND, "No VStreams where found for network {}", network_name);
+
return res;
}
-Expected<std::vector<hailo_vstream_info_t>> CoreOpMetadata::get_all_vstream_infos(const std::string &network_name) const
+Expected<std::vector<hailo_vstream_info_t>> NetworkGroupMetadata::get_output_vstream_infos(const std::string &network_name) const
+{
+ std::vector<hailo_vstream_info_t> res;
+ for (auto &vstream_info : m_output_vstreams_infos) {
+ if ((network_name == std::string(vstream_info.network_name)) || (network_name.empty()) || (network_name == default_network_name())) {
+ res.push_back(vstream_info);
+ }
+ }
+ CHECK_AS_EXPECTED(0 != res.size(), HAILO_NOT_FOUND, "No VStreams where found for network {}", network_name);
+
+ return res;
+}
+
+Expected<std::vector<hailo_vstream_info_t>> NetworkGroupMetadata::get_all_vstream_infos(const std::string &network_name) const
{
auto input_vstream_infos = get_input_vstream_infos(network_name);
CHECK_EXPECTED(input_vstream_infos);
return res;
}
-Expected<std::vector<std::string>> CoreOpMetadata::get_vstream_names_from_stream_name(const std::string &stream_name) const
+Expected<std::vector<std::string>> NetworkGroupMetadata::get_vstream_names_from_stream_name(const std::string &stream_name)
{
std::vector<std::string> results;
- for (auto &layer_info : get_all_layer_infos()) {
+ for (auto &pp : m_net_flow_ops) {
+ if (contains(pp->input_streams, stream_name)) {
+ for (auto &output_metadata : pp->op->outputs_metadata()) {
+ results.push_back(output_metadata.first);
+ }
+ return results;
+ }
+ }
+
+ auto all_layers_infos = get_all_layer_infos(m_core_ops_metadata_per_arch);
+ CHECK_EXPECTED(all_layers_infos);
+ for (auto &layer_info : all_layers_infos.release()) {
if (stream_name == layer_info.name) {
if (layer_info.is_defused_nms) {
return std::vector<std::string> (1, layer_info.fused_nms_layer[0].name);
return make_unexpected(HAILO_NOT_FOUND);
}
-Expected<std::vector<std::string>> CoreOpMetadata::get_stream_names_from_vstream_name(const std::string &vstream_name) const
+Expected<std::vector<std::string>> NetworkGroupMetadata::get_stream_names_from_vstream_name(const std::string &vstream_name)
{
std::vector<std::string> results;
- for (auto &layer_info : get_all_layer_infos()) {
+ for (auto &pp : m_net_flow_ops) {
+ if (contains(pp->op->outputs_metadata(), vstream_name)) {
+ for (auto &input_name : pp->input_streams) {
+ results.push_back(input_name);
+ }
+ return results;
+ }
+ }
+
+ auto all_layers_infos = get_all_layer_infos(m_core_ops_metadata_per_arch);
+ CHECK_EXPECTED(all_layers_infos);
+ for (auto &layer_info : all_layers_infos.release()) {
if (layer_info.is_mux) {
if (is_edge_under_mux(layer_info, vstream_name)) {
// vstream_name is a demux of the layer info
return results;
}
-std::vector<hailo_stream_info_t> CoreOpMetadata::convert_layer_infos_to_stream_infos(const std::vector<LayerInfo> &layer_infos) const
-{
- std::vector<hailo_stream_info_t> res;
- for (auto &layer_info : layer_infos) {
- res.push_back(LayerInfoUtils::get_stream_info_from_layer_info(layer_info));
- }
- return res;
-}
-
-std::vector<hailo_vstream_info_t> CoreOpMetadata::convert_layer_infos_to_vstream_infos(const std::vector<LayerInfo> &layer_infos) const
-{
- std::vector<hailo_vstream_info_t> res;
- for (auto &layer_info : layer_infos) {
- auto vstream_infos = LayerInfoUtils::get_vstream_infos_from_layer_info(layer_info);
- for (const auto &vstream_info : vstream_infos) {
- // In case of fused nms layers, several LayerInfos will contain data about the same fused layer
- if (!LayerInfoUtils::vstream_info_already_in_vector(res, vstream_info.name)) {
- res.push_back(vstream_info);
- }
- }
- }
- return res;
-}
-
-Expected<std::vector<hailo_network_info_t>> CoreOpMetadata::get_network_infos() const
+Expected<std::vector<hailo_network_info_t>> NetworkGroupMetadata::get_network_infos() const
{
std::vector<hailo_network_info_t> network_infos;
network_infos.reserve(m_sorted_network_names.size());
return network_infos;
}
-size_t CoreOpMetadata::get_contexts_count()
-{
- return (m_dynamic_contexts.size() + CONTROL_PROTOCOL__CONTEXT_SWITCH_NUMBER_OF_NON_DYNAMIC_CONTEXTS);
-}
-
-Expected<size_t> CoreOpMetadata::get_total_transfer_size()
-{
- size_t total_transfer_size = 0;
- for (const auto &dynamic_context : m_dynamic_contexts) {
- auto context_size = dynamic_context.get_context_transfer_size();
- CHECK_EXPECTED(context_size);
- total_transfer_size += context_size.release();
- }
- return total_transfer_size;
-}
-
-Expected<CoreOpMetadata> CoreOpMetadataPerArch::get_metadata(uint32_t partial_clusters_layout_bitmap)
-{
- if (PARTIAL_CLUSTERS_LAYOUT_IGNORE == partial_clusters_layout_bitmap) {
- // Passing PARTIAL_CLUSTERS_LAYOUT_IGNORE is magic for getting one of the metadata
- assert(0 != m_metadata_per_arch.size());
- auto result = m_metadata_per_arch.begin()->second;
- return result;
- }
- if (contains(m_metadata_per_arch, partial_clusters_layout_bitmap)) {
- auto result = m_metadata_per_arch[partial_clusters_layout_bitmap];
- return result;
- }
- LOGGER__ERROR("CoreOpPerArch does not contain metadata for partial_clusters_layout_bitmap {}", partial_clusters_layout_bitmap);
- return make_unexpected(HAILO_INTERNAL_FAILURE);
-}
-
-void CoreOpMetadataPerArch::add_metadata(const CoreOpMetadata &metadata, uint32_t partial_clusters_layout_bitmap)
-{
- m_metadata_per_arch[partial_clusters_layout_bitmap] = metadata;
-}
-
} /* namespace hailort */
bool multi_context = false;
bool preliminary_run_asap = false;
bool hailo_net_flow = false;
+ bool dual_direction_stream_index = false;
+ bool nms_burst_mode = false;
+ bool output_scale_by_feature = false;
+ bool periph_calculation_in_hailort = false;
};
// For each config_stream_index we store vector of all ccw write length. The vector is used to build the config buffer.g
class ContextMetadata final {
public:
- ContextMetadata() = default; // TODO HRT-8478: remove
ContextMetadata(std::vector<ContextSwitchConfigActionPtr> &&actions,
ConfigBufferInfoMap&& config_buffers_info);
class CoreOpMetadata final {
public:
- CoreOpMetadata() = default; // TODO HRT-8478: remove
CoreOpMetadata(const std::string &core_op_name,
ContextMetadata &&preliminary_context,
std::vector<ContextMetadata> &&dynamic_contexts,
std::vector<ConfigChannelInfo> &&config_channels_info,
- std::vector<std::string> &&sorted_output_names,
SupportedFeatures &supported_features,
- const std::vector<std::string> &sorted_network_names);
+ std::vector<std::string> sorted_network_names);
std::vector<LayerInfo> get_input_layer_infos() const;
std::vector<LayerInfo> get_output_layer_infos() const;
Expected<std::vector<LayerInfo>> get_input_layer_infos(const std::string &network_name) const;
Expected<std::vector<LayerInfo>> get_output_layer_infos(const std::string &network_name) const;
Expected<std::vector<LayerInfo>> get_all_layer_infos(const std::string &network_name) const;
- Expected<LayerInfo> get_layer_info_by_stream_name(const std::string &stream_name) const;
const ContextMetadata &preliminary_context() const;
const std::vector<ContextMetadata> &dynamic_contexts() const;
const std::vector<ConfigChannelInfo> &config_channels_info() const;
+ // TODO: Move stream infos into NetworkGroupMetadata
Expected<std::vector<hailo_stream_info_t>> get_input_stream_infos(const std::string &network_name = "") const;
Expected<std::vector<hailo_stream_info_t>> get_output_stream_infos(const std::string &network_name = "") const;
Expected<std::vector<hailo_stream_info_t>> get_all_stream_infos(const std::string &network_name = "") const;
- // TODO: HRT-9546 - Remove, should only be in CNG
+ size_t get_contexts_count();
+
+ const std::string &core_op_name() const
+ {
+ return m_core_op_name;
+ }
+
+ const SupportedFeatures &supported_features() const
+ {
+ return m_supported_features;
+ }
+
+ Expected<size_t> get_total_transfer_size();
+
+ // TODO: Remove
+ const std::vector<std::string> &get_network_names() const
+ {
+ return m_sorted_network_names;
+ }
+
+private:
+ // TODO: Remove
+ const std::string default_network_name() const
+ {
+ return HailoRTDefaults::get_network_name(m_core_op_name);
+ }
+
+ ContextMetadata m_preliminary_context;
+ std::vector<ContextMetadata> m_dynamic_contexts;
+ std::vector<ConfigChannelInfo> m_config_channels_info;
+ std::string m_core_op_name;
+ SupportedFeatures m_supported_features;
+ std::vector<std::string> m_sorted_network_names;
+};
+
+using CoreOpMetadataPtr = std::shared_ptr<CoreOpMetadata>;
+
+class CoreOpMetadataPerArch final
+{
+public:
+ CoreOpMetadataPerArch() = default;
+
+ Expected<CoreOpMetadataPtr> get_metadata(uint32_t partial_clusters_layout_bitmap) const;
+ void add_metadata(const CoreOpMetadataPtr &metadata, uint32_t partial_clusters_layout_bitmap);
+
+private:
+ std::map<uint32_t, CoreOpMetadataPtr> m_metadata_per_arch;
+};
+
+struct NetFlowElement;
+
+class NetworkGroupMetadata final {
+public:
+ static Expected<NetworkGroupMetadata> create(const std::string &network_group_name,
+ std::map<std::string, CoreOpMetadataPerArch> &&core_ops_metadata_per_arch,
+ std::vector<std::string> &sorted_output_names,
+ SupportedFeatures &supported_features,
+ const std::vector<std::string> &sorted_network_names,
+ std::vector<std::shared_ptr<hailort::NetFlowElement>> &net_flow_ops);
+
+ NetworkGroupMetadata(const std::string &network_group_name,
+ std::map<std::string, CoreOpMetadataPerArch> &&core_ops_metadata_per_arch,
+ std::vector<std::string> &sorted_output_names,
+ SupportedFeatures &supported_features,
+ const std::vector<std::string> &sorted_network_names,
+ std::vector<hailo_vstream_info_t> &input_vstreams_infos,
+ std::vector<hailo_vstream_info_t> &output_vstreams_infos,
+ std::vector<std::shared_ptr<hailort::NetFlowElement>> &net_flow_ops) :
+ m_network_group_name(network_group_name),
+ m_sorted_output_names(sorted_output_names),
+ m_supported_features(supported_features),
+ m_sorted_network_names(sorted_network_names),
+ m_input_vstreams_infos(input_vstreams_infos),
+ m_output_vstreams_infos(output_vstreams_infos),
+ m_core_ops_metadata_per_arch(std::move(core_ops_metadata_per_arch)),
+ m_net_flow_ops(net_flow_ops)
+ {};
+
Expected<std::vector<hailo_vstream_info_t>> get_input_vstream_infos(const std::string &network_name = "") const;
Expected<std::vector<hailo_vstream_info_t>> get_output_vstream_infos(const std::string &network_name = "") const;
Expected<std::vector<hailo_vstream_info_t>> get_all_vstream_infos(const std::string &network_name = "") const;
- // TODO: HRT-9546 - Remove, should only be in CNG - need to decide if relevant only for one CoreOp case.
- Expected<std::vector<std::string>> get_vstream_names_from_stream_name(const std::string &stream_name) const;
- Expected<std::vector<std::string>> get_stream_names_from_vstream_name(const std::string &vstream_name) const;
+ Expected<std::vector<std::string>> get_vstream_names_from_stream_name(const std::string &stream_name);
+ Expected<std::vector<std::string>> get_stream_names_from_vstream_name(const std::string &vstream_name);
Expected<std::vector<hailo_network_info_t>> get_network_infos() const;
- size_t get_contexts_count();
-
- const std::string &core_op_name() const
+ const std::string &name() const
{
- return m_core_op_name;
+ return m_network_group_name;
}
const std::string default_network_name() const
{
- return HailoRTDefaults::get_network_name(m_core_op_name);
+ return HailoRTDefaults::get_network_name(m_network_group_name);
}
const std::vector<std::string> get_sorted_output_names() const
return m_sorted_output_names;
}
- // duplicated for each CoreOp
const SupportedFeatures &supported_features() const
{
return m_supported_features;
return m_sorted_network_names;
}
- // TODO: HRT-9546 - Move to CNG
- void add_output_vstream_info(const hailo_vstream_info_t &output_vstream_info) {
- m_output_vstreams_infos.push_back(output_vstream_info);
- }
-
- Expected<size_t> get_total_transfer_size();
-
private:
- std::vector<hailo_stream_info_t> convert_layer_infos_to_stream_infos(const std::vector<LayerInfo> &layer_infos) const;
- std::vector<hailo_vstream_info_t> convert_layer_infos_to_vstream_infos(const std::vector<LayerInfo> &layer_infos) const;
+ static Expected<std::vector<LayerInfo>> get_all_layer_infos(std::map<std::string, CoreOpMetadataPerArch> &core_ops_metadata_per_arch)
+ /* This function is used for names getters (such as get_vstream_names_from_stream_name),
+ so should be same across all clusters layouts */
+ {
+ CHECK_AS_EXPECTED(1 == core_ops_metadata_per_arch.size(), HAILO_INTERNAL_FAILURE);
+ auto core_op_metadata = core_ops_metadata_per_arch.begin()->second.get_metadata(PARTIAL_CLUSTERS_LAYOUT_IGNORE);
+ CHECK_EXPECTED(core_op_metadata);
- ContextMetadata m_preliminary_context;
- std::vector<ContextMetadata> m_dynamic_contexts;
- std::vector<ConfigChannelInfo> m_config_channels_info;
- std::string m_core_op_name;
+ return core_op_metadata.value()->get_all_layer_infos();
+ }
+
+ std::string m_network_group_name;
std::vector<std::string> m_sorted_output_names;
SupportedFeatures m_supported_features;
std::vector<std::string> m_sorted_network_names;
- // TODO: remove this from here! NetworkGroupMetadata should be CoreOpMetadata and contain no net_flow information! (HRT-9546)
- // To add insult to injury, this is being constructed lazyly by add_output_layer_info
- std::vector<hailo_vstream_info_t> m_output_vstreams_infos; // Valid only in case of post process
-};
+ std::vector<hailo_vstream_info_t> m_input_vstreams_infos;
+ std::vector<hailo_vstream_info_t> m_output_vstreams_infos;
+ std::map<std::string, CoreOpMetadataPerArch> m_core_ops_metadata_per_arch; // Key is core_op_name
+ std::vector<std::shared_ptr<NetFlowElement>> m_net_flow_ops;
-class CoreOpMetadataPerArch final
-{
-public:
- CoreOpMetadataPerArch() = default;
-
- Expected<CoreOpMetadata> get_metadata(uint32_t partial_clusters_layout_bitmap);
- void add_metadata(const CoreOpMetadata &metadata, uint32_t partial_clusters_layout_bitmap);
-
-private:
- std::map<uint32_t, CoreOpMetadata> m_metadata_per_arch;
+ friend class Hef;
+ friend class ConfiguredNetworkGroupBase;
};
} /* namespace hailort */
#include "net_flow/ops/nms_post_process.hpp"
#include "net_flow/ops/yolo_post_process.hpp"
+#include "net_flow/ops/yolox_post_process.hpp"
#include "net_flow/ops/ssd_post_process.hpp"
+#include "net_flow/ops/argmax_post_process.hpp"
+#include "net_flow/ops/softmax_post_process.hpp"
#include "hef/hef_internal.hpp"
#include "vdma/pcie/pcie_device.hpp"
#include "vdma/vdma_config_manager.hpp"
#include "eth/hcp_config_core_op.hpp"
#include "hef/layer_info.hpp"
#include "device_common/control.hpp"
+#include "stream_common/nms_stream_reader.hpp"
#include "byte_order.h"
#include "context_switch_defs.h"
#define HEF__MD5_BUFFER_SIZE (1024)
#define DEFAULT_BATCH_SIZE (1)
#define SKIP_SPACE_COMMA_CHARACTERS (2)
+#define ALIGNED_TO_4_BYTES (4)
+#define DEFAULT_NMS_NO_BURST_SIZE (1)
static const uint8_t ENABLE_LCU_CONTROL_WORD[4] = {1, 0, 0, 0};
{
fill_extensions_bitset();
- CoreOpMetadataPerArch metadata;
+ CoreOpMetadataPerArch core_op_metadata;
uint32_t partial_clusters_layout_bitmap = 0;
for (auto &network_group : m_groups) {
+ // Prepare core_op_metadata
auto network_group_name = HefUtils::get_network_group_name(*network_group, m_supported_features);
// TODO: keep metadata per core_op (HRT-9551)
const auto &core_ops = m_core_ops_per_group[network_group_name];
assert(core_ops.size() == 1);
const auto &core_op = core_ops[0];
+
+ // TODO: Clean this code after hef.proto refactor
+ std::vector<std::string> sorted_network_names;
+ if (m_supported_features.multi_network_support) {
+ if (0 != network_group->networks_names_size()) {
+ sorted_network_names.reserve(core_op.networks_names.size());
+ for (auto &partial_network_name : core_op.networks_names) {
+ auto network_name = HefUtils::get_network_name(network_group_name, partial_network_name);
+ sorted_network_names.push_back(network_name);
+ }
+ } else if (0 != network_group->partial_network_groups_size()) {
+ sorted_network_names.reserve(network_group->partial_network_groups().begin()->network_group().networks_names_size());
+ for (auto &partial_network_name : network_group->partial_network_groups().begin()->network_group().networks_names()) {
+ auto network_name = HefUtils::get_network_name(network_group_name, partial_network_name);
+ sorted_network_names.push_back(network_name);
+ }
+ }
+ }
+ if (sorted_network_names.empty()) {
+ sorted_network_names.push_back(HailoRTDefaults::get_network_name(network_group_name));
+ }
+
if (ProtoHEFHwArch::PROTO__HW_ARCH__HAILO8L == get_device_arch()) {
if (m_supported_features.hailo_net_flow) {
for (auto &partial_core_op : core_op.partial_core_ops) {
partial_clusters_layout_bitmap = partial_core_op->layout.partial_clusters_layout_bitmap();
- auto metadata_per_arch = create_metadata_per_arch(*(partial_core_op->core_op));
- CHECK_EXPECTED_AS_STATUS(metadata_per_arch);
- auto &&arch_metadata = metadata_per_arch.release();
- auto expected_net_flow_ops = create_net_flow_ops(*network_group, arch_metadata);
+ auto metadata_per_arch_exp = create_metadata_per_arch(*(partial_core_op->core_op), sorted_network_names);
+ CHECK_EXPECTED_AS_STATUS(metadata_per_arch_exp);
+ auto metadata_per_arch = metadata_per_arch_exp.release();
+
+ auto expected_net_flow_ops = create_net_flow_ops(*network_group, *metadata_per_arch, get_device_arch());
CHECK_EXPECTED_AS_STATUS(expected_net_flow_ops);
- m_post_process_ops_per_group.insert({arch_metadata.core_op_name(), expected_net_flow_ops.value()});
- metadata.add_metadata(arch_metadata, partial_clusters_layout_bitmap);
+ m_post_process_ops_per_group.insert({metadata_per_arch->core_op_name(), expected_net_flow_ops.value()});
+ core_op_metadata.add_metadata(metadata_per_arch, partial_clusters_layout_bitmap);
}
} else {
for (auto &partial_network_group : network_group->partial_network_groups()) {
partial_network_group.network_group().networks_names(),
{}
};
- auto metadata_per_arch = create_metadata_per_arch(partial_core_op);
- CHECK_EXPECTED_AS_STATUS(metadata_per_arch);
- auto &&arch_metadata = metadata_per_arch.release();
+
+ auto metadata_per_arch_exp = create_metadata_per_arch(partial_core_op, sorted_network_names);
+ CHECK_EXPECTED_AS_STATUS(metadata_per_arch_exp);
+ auto metadata_per_arch = metadata_per_arch_exp.release();
+
std::vector<std::shared_ptr<NetFlowElement>> empty_ops;
- m_post_process_ops_per_group.insert({arch_metadata.core_op_name(), empty_ops});
- metadata.add_metadata(arch_metadata, partial_clusters_layout_bitmap);
+ m_post_process_ops_per_group.insert({metadata_per_arch->core_op_name(), empty_ops});
+ core_op_metadata.add_metadata(metadata_per_arch, partial_clusters_layout_bitmap);
}
}
} else {
partial_clusters_layout_bitmap = PARTIAL_CLUSTERS_LAYOUT_IGNORE;
- auto metadata_per_arch = create_metadata_per_arch(core_op);
- CHECK_EXPECTED_AS_STATUS(metadata_per_arch);
- auto &&arch_metadata = metadata_per_arch.release();
- auto expected_net_flow_ops = create_net_flow_ops(*network_group, arch_metadata);
+ auto metadata_per_arch_exp = create_metadata_per_arch(core_op, sorted_network_names);
+ CHECK_EXPECTED_AS_STATUS(metadata_per_arch_exp);
+ auto metadata_per_arch = metadata_per_arch_exp.release();
+
+ auto expected_net_flow_ops = create_net_flow_ops(*network_group, *metadata_per_arch, get_device_arch());
CHECK_EXPECTED_AS_STATUS(expected_net_flow_ops);
- m_post_process_ops_per_group.insert({arch_metadata.core_op_name(), expected_net_flow_ops.value()});
- metadata.add_metadata(arch_metadata, partial_clusters_layout_bitmap);
+ m_post_process_ops_per_group.insert({metadata_per_arch->core_op_name(), expected_net_flow_ops.value()});
+ core_op_metadata.add_metadata(metadata_per_arch, partial_clusters_layout_bitmap);
}
- CHECK(!contains(m_core_op_per_arch, network_group_name),
+
+ // Taking the full-layout's name (name is same across all layouts)
+ auto metadata_exp = core_op_metadata.get_metadata(PARTIAL_CLUSTERS_LAYOUT_IGNORE);
+ CHECK_EXPECTED_AS_STATUS(metadata_exp);
+ auto core_op_name = metadata_exp.value()->core_op_name();
+ std::map<std::string, CoreOpMetadataPerArch> core_op_metadata_map;
+ core_op_metadata_map[core_op_name] = core_op_metadata;
+ // Prepare network_group_metadata
+ CHECK(!contains(m_network_group_metadata, network_group_name),
HAILO_INVALID_OPERATION, "Network group with the name {} is already configured on the device", network_group_name);
- m_core_op_per_arch.emplace(network_group_name, metadata);
+
+ // TODO: Clean this code after hef.proto refactor
+ std::vector<std::string> sorted_output_names;
+ if (core_op.fused_layers_metadata.network_has_fused_layers()) {
+ // If the model has fused layers, updated sorted_output_names is under the fused layer metadata
+ for (auto &name : core_op.fused_layers_metadata.updated_sorted_output_names()) {
+ sorted_output_names.push_back(name);
+ }
+ } else if(!m_supported_features.hailo_net_flow && (0 != network_group->partial_network_groups_size()) &&
+ (network_group->partial_network_groups().begin()->network_group().sorted_outputs_order_size())) {
+ // If the model doesnt support net_flow, its possible that sorted output names will be under the partial_network_groups metadata
+ for (auto &name : network_group->partial_network_groups().begin()->network_group().sorted_outputs_order()) {
+ sorted_output_names.push_back(name);
+ }
+ } else if (0 != network_group->sorted_outputs_order_size()) {
+ // Most cases should fall here - either net_flow is supported, or network_group->sorted_outputs_order() has values
+ for (auto &name : network_group->sorted_outputs_order()) {
+ sorted_output_names.push_back(name);
+ }
+ } else {
+ // For very old HEFs, sorted_output_names might be in the last context's metadata
+ uint32_t number_of_contexts = core_op.contexts.size();
+ const auto& context_metadata = core_op.contexts[number_of_contexts - 1].metadata();
+ CHECK(0 < context_metadata.sorted_outputs_order_size(), HAILO_INVALID_HEF,
+ "Sorted output names is not set up in the HEF.");
+ for (auto &name : context_metadata.sorted_outputs_order()) {
+ sorted_output_names.push_back(name);
+ }
+ }
+
+ auto network_group_metadata = NetworkGroupMetadata::create(network_group_name, std::move(core_op_metadata_map),
+ sorted_output_names, m_supported_features, sorted_network_names, m_post_process_ops_per_group.at(network_group_name));
+
+ CHECK_EXPECTED_AS_STATUS(network_group_metadata);
+ m_network_group_metadata.emplace(network_group_name, network_group_metadata.release());
}
return HAILO_SUCCESS;
}
return config_channels_info;
}
-Expected<CoreOpMetadata> Hef::Impl::create_metadata_per_arch(const ProtoHEFCoreOpMock &core_op)
+Expected<CoreOpMetadataPtr> Hef::Impl::create_metadata_per_arch(const ProtoHEFCoreOpMock &core_op, const std::vector<std::string> &sorted_network_names)
{
auto preliminary_context = HefUtils::parse_preliminary_context(core_op.preliminary_config, m_supported_features);
CHECK_EXPECTED(preliminary_context);
- auto dynamic_contexts = HefUtils::parse_dynamic_contexts(core_op, m_supported_features);
+ auto dynamic_contexts = HefUtils::parse_dynamic_contexts(core_op, m_supported_features, get_device_arch());
CHECK_EXPECTED(dynamic_contexts);
auto config_channels_info = parse_config_channels_info(core_op);
CHECK_EXPECTED(config_channels_info);
- auto sorted_output_names = HefUtils::get_sorted_output_names(core_op);
- CHECK_EXPECTED(sorted_output_names);
-
- std::vector<std::string> sorted_network_names;
- if (m_supported_features.multi_network_support) {
- sorted_network_names.reserve(core_op.networks_names.size());
- for (auto &partial_network_name : core_op.networks_names) {
- auto network_name = HefUtils::get_network_name(core_op, partial_network_name);
- sorted_network_names.push_back(network_name);
- }
- } else {
- sorted_network_names.push_back(HailoRTDefaults::get_network_name(core_op.network_group_metadata.network_group_name()));
- }
-
// Currently, CoreOp name is the same as network_group_name, thats why we init it with it.
// TODO: HRT-9551 - Change it when supporting multi core ops.
- CoreOpMetadata metadata_per_arch(core_op.network_group_metadata.network_group_name(),
- preliminary_context.release(), dynamic_contexts.release(), config_channels_info.release(),
- sorted_output_names.release(), m_supported_features, sorted_network_names);
+ auto metadata_per_arch = make_shared_nothrow<CoreOpMetadata>(core_op.network_group_metadata.network_group_name(),
+ preliminary_context.release(), dynamic_contexts.release(), config_channels_info.release(), m_supported_features, sorted_network_names);
+ CHECK_NOT_NULL_AS_EXPECTED(metadata_per_arch, HAILO_OUT_OF_HOST_MEMORY);
return metadata_per_arch;
}
header, hef_extensions, included_features);
supported_features.hailo_net_flow = check_hef_extension(ProtoHEFExtensionType::HAILO_NET_FLOW,
header, hef_extensions, included_features);
+ supported_features.dual_direction_stream_index = check_hef_extension(ProtoHEFExtensionType::DUAL_DIRECTION_STREAM_INDEX,
+ header, hef_extensions, included_features);
+ supported_features.nms_burst_mode = check_hef_extension(ProtoHEFExtensionType::NMS_OUTPUT_BURST,
+ header, hef_extensions, included_features);
+ supported_features.output_scale_by_feature = check_hef_extension(ProtoHEFExtensionType::OUTPUT_SCALE_PER_FEATURE,
+ header, hef_extensions, included_features);
+ supported_features.periph_calculation_in_hailort = check_hef_extension(ProtoHEFExtensionType::PERIPH_CALCULATION_IN_HAILORT,
+ header, hef_extensions, included_features);
return supported_features;
}
nms_config.nms_score_th = (float32_t)op_proto.nms_op().nms_score_th();
nms_config.nms_iou_th = (float32_t)op_proto.nms_op().nms_iou_th();
nms_config.max_proposals_per_class = op_proto.nms_op().max_proposals_per_class();
- nms_config.classes = op_proto.nms_op().classes();
+ nms_config.number_of_classes = op_proto.nms_op().classes();
nms_config.background_removal = op_proto.nms_op().background_removal();
nms_config.background_removal_index = op_proto.nms_op().background_removal_index();
const std::map<size_t, LayerInfo> &pad_index_to_streams_info, const std::map<size_t, size_t> &input_to_output_pads)
{
auto nms_config = create_nms_config(op_proto);
- net_flow::YoloPostProcessConfig yolo_config{};
- yolo_config.image_height = (float32_t)op_proto.nms_op().yolo_nms_op().image_height();
- yolo_config.image_width = (float32_t)op_proto.nms_op().yolo_nms_op().image_width();
+ net_flow::YoloxPostProcessConfig yolox_config{};
+ yolox_config.image_height = (float32_t)op_proto.nms_op().yolox_nms_op().image_height();
+ yolox_config.image_width = (float32_t)op_proto.nms_op().yolox_nms_op().image_width();
std::map<std::string, net_flow::BufferMetaData> inputs_metadata;
std::map<std::string, net_flow::BufferMetaData> outputs_metadata;
net_flow::BufferMetaData output_metadata{};
output_metadata.format = output_format;
outputs_metadata.insert({op_proto.output_pads()[0].name(), output_metadata});
+
+ for (auto &bbox_proto : op_proto.nms_op().yolox_nms_op().bbox_decoders()) {
+ assert(contains(pad_index_to_streams_info, static_cast<size_t>(bbox_proto.reg_pad_index())));
+ auto reg_name = pad_index_to_streams_info.at(bbox_proto.reg_pad_index()).name;
+ assert(contains(pad_index_to_streams_info, static_cast<size_t>(bbox_proto.cls_pad_index())));
+ auto cls_name = pad_index_to_streams_info.at(bbox_proto.cls_pad_index()).name;
+ assert(contains(pad_index_to_streams_info, static_cast<size_t>(bbox_proto.obj_pad_index())));
+ auto obj_name = pad_index_to_streams_info.at(bbox_proto.obj_pad_index()).name;
+ yolox_config.input_names.emplace_back(net_flow::MatchingLayersNames{reg_name, obj_name, cls_name});
+ }
for (auto &input_pad : op_proto.input_pads()) {
CHECK_AS_EXPECTED(contains(input_to_output_pads, static_cast<size_t>(input_pad.index())), HAILO_INVALID_HEF,
input_metadata.padded_shape = op_input_stream.hw_shape;
inputs_metadata.insert({op_input_stream.name, input_metadata});
}
- return net_flow::YOLOXPostProcessOp::create(inputs_metadata, outputs_metadata, nms_config, yolo_config);
+ return net_flow::YOLOXPostProcessOp::create(inputs_metadata, outputs_metadata, nms_config, yolox_config);
}
Expected<std::shared_ptr<net_flow::Op>> create_ssd_op(const ProtoHEFOp &op_proto, hailo_format_t output_format,
return net_flow::SSDPostProcessOp::create(inputs_metadata, outputs_metadata, nms_config, ssd_config);
}
+Expected<std::shared_ptr<net_flow::Op>> create_argmax_op(const ProtoHEFPad &input_pad, const ProtoHEFPad &output_pad,
+ const std::string &input_name, const std::string &output_name, const bool &is_hw_padding_supported)
+{
+ // create input meta
+ std::map<std::string, hailort::net_flow::BufferMetaData> inputs_metadata;
+ hailort::net_flow::BufferMetaData input_metadata{};
+ input_metadata.shape = {input_pad.tensor_shape().height(), input_pad.tensor_shape().width(), input_pad.tensor_shape().features()};
+ // If padding is done in HW, the padded shape is as the shape (TODO: Remove once HRT support hw_padding from DFC)
+ if (is_hw_padding_supported) {
+ input_metadata.padded_shape = input_metadata.shape;
+ } else {
+ input_metadata.padded_shape = {input_pad.tensor_shape().padded_height(), input_pad.tensor_shape().padded_width(),
+ input_pad.tensor_shape().padded_features()};
+ }
+
+ input_metadata.format.type = static_cast<hailo_format_type_t>(input_pad.format_type());
+ input_metadata.format.order = static_cast<hailo_format_order_t>(input_pad.format_order());
+ input_metadata.format.flags = HAILO_FORMAT_FLAGS_NONE;
+ input_metadata.quant_info.qp_zp = input_pad.numeric_info().qp_zp();
+ input_metadata.quant_info.qp_scale = input_pad.numeric_info().qp_scale();
+ input_metadata.quant_info.limvals_min = input_pad.numeric_info().limvals_min();
+ input_metadata.quant_info.limvals_max = input_pad.numeric_info().limvals_max();
+ inputs_metadata.insert({input_name, input_metadata});
+
+ // create output meta
+ std::map<std::string, hailort::net_flow::BufferMetaData> outputs_metadata;
+ hailort::net_flow::BufferMetaData output_metadata{};
+ output_metadata.shape = {input_pad.tensor_shape().height(), input_pad.tensor_shape().width(), hailort::net_flow::ARGMAX_OUTPUT_FEATURES_SIZE};
+ output_metadata.padded_shape = output_metadata.shape; // padded_shape is the same as the output_shape in argmax op
+ output_metadata.format.order = static_cast<hailo_format_order_t>(output_pad.format_order());
+ output_metadata.format.type = static_cast<hailo_format_type_t>(output_pad.format_type());
+ output_metadata.quant_info.qp_zp = output_pad.numeric_info().qp_zp();
+ output_metadata.quant_info.qp_scale = output_pad.numeric_info().qp_scale();
+ output_metadata.quant_info.limvals_min = output_pad.numeric_info().limvals_min();
+ output_metadata.quant_info.limvals_max = output_pad.numeric_info().limvals_max();
+ output_metadata.format.flags = HAILO_FORMAT_FLAGS_NONE;
+ outputs_metadata.insert({output_name, output_metadata});
+ return net_flow::ArgmaxPostProcessOp::create(inputs_metadata, outputs_metadata);
+}
+
+Expected<std::shared_ptr<net_flow::Op>> create_softmax_op(const ProtoHEFPad &input_pad, const ProtoHEFPad &output_pad,
+ const std::string &input_name, const std::string &output_name)
+{
+ // create input meta
+ std::map<std::string, hailort::net_flow::BufferMetaData> inputs_metadata;
+ hailort::net_flow::BufferMetaData input_metadata{};
+ input_metadata.shape = {input_pad.tensor_shape().height(), input_pad.tensor_shape().width(), input_pad.tensor_shape().features()};
+ input_metadata.padded_shape = input_metadata.shape; // since softmax is connected to transform context, shape and padded shape are the same
+
+ input_metadata.format.type = static_cast<hailo_format_type_t>(input_pad.format_type());
+ input_metadata.format.order = static_cast<hailo_format_order_t>(input_pad.format_order());
+ input_metadata.format.flags = HAILO_FORMAT_FLAGS_NONE;
+ input_metadata.quant_info.qp_zp = input_pad.numeric_info().qp_zp();
+ input_metadata.quant_info.qp_scale = input_pad.numeric_info().qp_scale();
+ input_metadata.quant_info.limvals_min = input_pad.numeric_info().limvals_min();
+ input_metadata.quant_info.limvals_max = input_pad.numeric_info().limvals_max();
+ inputs_metadata.insert({input_name, input_metadata});
+
+ // create output meta
+ std::map<std::string, hailort::net_flow::BufferMetaData> outputs_metadata;
+ hailort::net_flow::BufferMetaData output_metadata{};
+ output_metadata.shape = {input_pad.tensor_shape().height(), input_pad.tensor_shape().width(), input_pad.tensor_shape().features()};
+ output_metadata.padded_shape = output_metadata.shape; // padded_shape is the same as the output_shape in softmax op
+ output_metadata.format.order = static_cast<hailo_format_order_t>(output_pad.format_order());
+ output_metadata.format.type = static_cast<hailo_format_type_t>(output_pad.format_type());
+ output_metadata.quant_info.qp_zp = output_pad.numeric_info().qp_zp();
+ output_metadata.quant_info.qp_scale = output_pad.numeric_info().qp_scale();
+ output_metadata.quant_info.limvals_min = output_pad.numeric_info().limvals_min();
+ output_metadata.quant_info.limvals_max = output_pad.numeric_info().limvals_max();
+ output_metadata.format.flags = HAILO_FORMAT_FLAGS_NONE;
+ outputs_metadata.insert({output_name, output_metadata});
+ return net_flow::SoftmaxPostProcessOp::create(inputs_metadata, outputs_metadata);
+}
+
+Expected<std::shared_ptr<net_flow::Op>> create_logits_op(const ProtoHEFOp &op_proto, const std::map<size_t, size_t> &input_to_output_pads,
+ const std::map<size_t, ProtoHEFPad> &pad_index_to_pad_data, NetFlowElement &net_flow_element,
+ const std::map<size_t, LayerInfo> &pad_index_to_streams_info, const ProtoHEFHwArch &hef_arch)
+{
+ // connect input_streams to net_flow element
+ CHECK_AS_EXPECTED(op_proto.input_pads().size() == 1, HAILO_INVALID_HEF, "Logits op must have 1 input only");
+ CHECK_AS_EXPECTED(op_proto.output_pads().size() == 1, HAILO_INVALID_HEF, "Logits op must have 1 output only");
+ auto input_pad = op_proto.input_pads()[0];
+ auto output_pad = op_proto.output_pads()[0];
+ CHECK_AS_EXPECTED(contains(input_to_output_pads, static_cast<size_t>(input_pad.index())), HAILO_INVALID_HEF,
+ "Logits op is not connected to core-op");
+ auto output_pad_index = input_to_output_pads.at(input_pad.index());
+ CHECK_AS_EXPECTED(contains(pad_index_to_streams_info, output_pad_index), HAILO_INVALID_HEF,
+ "Pad {} of post-process {} is not connected to any core output stream", input_pad.index(), op_proto.name());
+
+ // Data of the input_pad is taken from the output_pad of the core op
+ const auto &connected_output_pad = pad_index_to_pad_data.at(output_pad_index);
+ net_flow_element.input_streams.insert(connected_output_pad.name());
+ // TODO: HRT-10603
+ const auto &op_input_stream = pad_index_to_streams_info.at(output_pad_index);
+ auto max_periph_bytes_from_hef = HefConfigurator::max_periph_bytes_value(DeviceBase::hef_arch_to_device_arch(hef_arch));
+ CHECK_EXPECTED(max_periph_bytes_from_hef);
+ const auto max_periph_bytes = (0 == op_input_stream.max_shmifo_size) ? max_periph_bytes_from_hef.value():
+ MIN(max_periph_bytes_from_hef.value(), op_input_stream.max_shmifo_size);
+ const auto is_hw_padding_supported = HefConfigurator::is_hw_padding_supported(op_input_stream, max_periph_bytes);
+ net_flow_element.name = op_proto.name();
+
+ switch (op_proto.logits_op().logits_type()) {
+ case ProtoHEFLogitsType::PROTO_HEF_ARGMAX_TYPE: {
+ net_flow_element.op_type = HAILO_NET_FLOW_OP_TYPE_ARGMAX;
+ return create_argmax_op(connected_output_pad, output_pad, input_pad.name(), output_pad.name(), is_hw_padding_supported);
+ }
+ case ProtoHEFLogitsType::PROTO_HEF_SOFTMAX_TYPE: {
+ net_flow_element.op_type = HAILO_NET_FLOW_OP_TYPE_SOFTMAX;
+ return create_softmax_op(connected_output_pad, output_pad, input_pad.name(), output_pad.name());
+ }
+ default: {
+ LOGGER__ERROR("Invalid Net-Flow Logits-Op {}", ProtoHEFLogitsType_Name(op_proto.logits_op().logits_type()));
+ return make_unexpected(HAILO_INTERNAL_FAILURE);
+ }
+ }
+}
Expected<std::vector<std::shared_ptr<NetFlowElement>>> Hef::Impl::create_net_flow_ops(const ProtoHEFNetworkGroup &network_group_proto,
- CoreOpMetadata &core_op_metadata) const
+ CoreOpMetadata &core_op_metadata, const ProtoHEFHwArch &hef_arch) const
{
std::vector<std::shared_ptr<NetFlowElement>> result;
if (!m_supported_features.hailo_net_flow) {
for (auto &pad_edge : network_group_proto.pad_edges()) {
input_to_output_pads.insert({pad_edge.dst(), pad_edge.src()});
}
+ std::map<size_t, ProtoHEFPad> pad_index_to_pad_data;
+ for (auto &op_proto : network_group_proto.ops()) {
+ for (auto &output_pad : op_proto.output_pads()) {
+ pad_index_to_pad_data.insert({output_pad.index(), output_pad});
+ }
+ for (auto &input_pad : op_proto.input_pads()) {
+ pad_index_to_pad_data.insert({input_pad.index(), input_pad});
+ }
+ }
+
for (auto &op_proto : network_group_proto.ops()) {
switch (op_proto.op_case()) {
case ProtoHEFOp::kCoreOp: {
}
case ProtoHEFOp::kNmsOp: {
hailo_format_t output_format{};
- output_format.type = HAILO_FORMAT_TYPE_FLOAT32;
- output_format.order = HAILO_FORMAT_ORDER_HAILO_NMS;
- output_format.flags = HAILO_FORMAT_FLAGS_QUANTIZED;
+ output_format.order = HAILO_FORMAT_ORDER_HAILO_NMS; // TODO Remove- HRT-9737
+
NetFlowElement net_flow_element{};
+ net_flow_element.op_type = HAILO_NET_FLOW_OP_TYPE_NMS;
// TODO: HRT-9902 - Move nms_info to be an op member instead of NetFlowElement
net_flow_element.nms_info = {
sizeof(hailo_bbox_float32_t),
1, // input_division_factor
false,
- hailo_nms_defuse_info_t()
+ hailo_nms_defuse_info_t(),
+ DEFAULT_NMS_NO_BURST_SIZE,
+ HAILO_BURST_TYPE_NO_BURST
};
for (auto &input_pad : op_proto.input_pads()) {
CHECK_AS_EXPECTED(contains(input_to_output_pads, static_cast<size_t>(input_pad.index())), HAILO_INVALID_HEF,
}
}
net_flow_element.op = post_process_op;
-
// Fill meta-data output vstream info
auto net_group_name = HefUtils::get_network_group_name(network_group_proto, m_supported_features);
auto network_name = HailoRTDefaults::get_network_name(net_group_name);
net_flow_output_vstream_info.nms_shape.number_of_classes--;
net_flow_element.nms_info.number_of_classes--;
}
+ net_flow_element.output_vstream_info = net_flow_output_vstream_info;
- result.push_back(std::make_shared<NetFlowElement>(net_flow_element));
+ auto net_flow_element_ptr = make_shared_nothrow<NetFlowElement>(net_flow_element);
+ CHECK_NOT_NULL_AS_EXPECTED(net_flow_element_ptr, HAILO_OUT_OF_HOST_MEMORY);
+ result.push_back(net_flow_element_ptr);
+ break;
+ }
+ case ProtoHEFOp::kLogitsOp: {
+ NetFlowElement net_flow_element{};
+ auto expected_logits_op = create_logits_op(op_proto, input_to_output_pads, pad_index_to_pad_data, net_flow_element,
+ pad_index_to_streams_info, hef_arch);
+ CHECK_EXPECTED(expected_logits_op);
+ net_flow_element.op = expected_logits_op.release();
+
+ hailo_vstream_info_t net_flow_output_vstream_info{};
+ auto proto_output_pad = op_proto.output_pads()[0];
+ auto net_group_name = HefUtils::get_network_group_name(network_group_proto, m_supported_features);
+ auto network_name = HailoRTDefaults::get_network_name(net_group_name);
+ strncpy(net_flow_output_vstream_info.name, proto_output_pad.name().c_str(), proto_output_pad.name().length() + 1);
+ strncpy(net_flow_output_vstream_info.network_name, network_name.c_str(), network_name.length() + 1);
+ net_flow_output_vstream_info.direction = HAILO_D2H_STREAM;
+ net_flow_output_vstream_info.format = net_flow_element.op.get()->outputs_metadata().begin()->second.format;
+ net_flow_output_vstream_info.shape = net_flow_element.op.get()->outputs_metadata().begin()->second.shape;
+ net_flow_element.output_vstream_info = net_flow_output_vstream_info;
- // TODO: HRT-9546 - Move vstreams out of core op
- core_op_metadata.add_output_vstream_info(net_flow_output_vstream_info);
+ auto net_flow_element_ptr = make_shared_nothrow<NetFlowElement>(net_flow_element);
+ CHECK_NOT_NULL_AS_EXPECTED(net_flow_element_ptr, HAILO_OUT_OF_HOST_MEMORY);
+ result.push_back(net_flow_element_ptr);
break;
}
default: {
return result;
}
-Expected<CoreOpMetadata> Hef::Impl::get_core_op_metadata(const std::string &network_group_name, uint32_t partial_clusters_layout_bitmap)
+Expected<CoreOpMetadataPtr> Hef::Impl::get_core_op_metadata(const std::string &network_group_name, uint32_t partial_clusters_layout_bitmap)
{
- CHECK_AS_EXPECTED(contains(m_core_op_per_arch, network_group_name), HAILO_NOT_FOUND,
+ CHECK_AS_EXPECTED(contains(m_network_group_metadata, network_group_name), HAILO_NOT_FOUND,
"Network group with name {} wasn't found", network_group_name);
- auto metadata_per_arch = m_core_op_per_arch.at(network_group_name);
+ auto &ng_metadata = m_network_group_metadata.at(network_group_name);
+ CHECK_AS_EXPECTED(contains(ng_metadata.m_core_ops_metadata_per_arch, network_group_name), HAILO_NOT_FOUND,
+ "Core-op with name {} wasn't found", network_group_name);
+ auto metadata_per_arch = ng_metadata.m_core_ops_metadata_per_arch.at(network_group_name);
auto metadata = metadata_per_arch.get_metadata(partial_clusters_layout_bitmap);
return metadata;
}
}
Expected<CONTROL_PROTOCOL__nn_stream_config_t> HefConfigurator::parse_nn_stream_config(hailo_format_order_t format_order, uint32_t width, uint32_t features,
- uint32_t hw_data_bytes, uint16_t core_buffers_per_frame, uint16_t core_bytes_per_buffer, bool hw_padding_supported, bool is_ddr)
+ uint32_t hw_data_bytes, uint16_t core_buffers_per_frame, uint16_t core_bytes_per_buffer, bool hw_padding_supported, bool is_ddr,
+ uint16_t periph_buffers_per_frame, uint16_t periph_bytes_per_buffer)
{
CONTROL_PROTOCOL__nn_stream_config_t stream_config = {};
stream_config.core_buffers_per_frame = core_buffers_per_frame;
stream_config.core_bytes_per_buffer = core_bytes_per_buffer;
- stream_config.periph_buffers_per_frame = core_buffers_per_frame; // periph buffers per frame is the same (even if
- // for hw padding each buffer is smaller).
+ stream_config.periph_buffers_per_frame = periph_buffers_per_frame;
+ stream_config.periph_bytes_per_buffer = periph_bytes_per_buffer;
/* For DDR buffering - core buffers is depended on the amount of buffers per PCIe interrupt. No HW padding required */
if (is_ddr) {
stream_config.core_buffers_per_frame = 1;
stream_config.feature_padding_payload = 0;
- stream_config.periph_bytes_per_buffer = stream_config.core_bytes_per_buffer;
} else {
if (hw_padding_supported) {
auto status = get_hw_padding_params(format_order, width, features, hw_data_bytes,
stream_config.feature_padding_payload, stream_config.periph_bytes_per_buffer);
CHECK_SUCCESS_AS_EXPECTED(status);
+ stream_config.periph_buffers_per_frame = core_buffers_per_frame;
} else {
stream_config.feature_padding_payload = 0;
- stream_config.periph_bytes_per_buffer = stream_config.core_bytes_per_buffer;
}
/* For now, no support for buffer padding */
stream_config.buffer_padding_payload = 0;
auto format_order = format_order_exp.release();
auto is_ddr = ProtoHEFEdgeConnectionType::PROTO__EDGE_CONNECTION_TYPE__DDR == edge_connection_type;
+ CHECK_AS_EXPECTED(IS_FIT_IN_UINT32(edge_layer.padded_width() * edge_layer.padded_features() *
+ edge_layer.padded_height() * edge_layer.data_bytes()), HAILO_INVALID_HEF, "padded shape too big");
+
+ // TODO HRT-10993: Remove these parameters for the parse_nn_stream_config function call
+ // These values will get overrided in update_layer_info in resource_manager_builder - except in case of
+ // MIPI stream with hw padding supported (HRT-11030)
+ // TODO HRT-11030 - in MIPI with hw padding supported - in this case because the layer thinks hw padding is
+ // supported it wont recalculate periph values , but when creating the InputStreamBase - it will not use hw padding
+ // and then will take the initial values. Should fix this behavior
+ const uint16_t INITIAL_PERIPH_BYTES_PER_BUFFER = static_cast<uint16_t>(edge_layer.core_bytes_per_buffer());
+ const uint16_t INITIAL_PERIPH_BUFFERS_PER_FRAME = static_cast<uint16_t>(edge_layer.core_buffers_per_frame());
+
// Width and features only used in case hw_padding is supported. In that case, they represent the HW shape (without padding)
return parse_nn_stream_config(format_order, edge_layer.width(), edge_layer.features(),
edge_layer.data_bytes(), static_cast<uint16_t>(edge_layer.core_buffers_per_frame()),
- static_cast<uint16_t>(edge_layer.core_bytes_per_buffer()), hw_padding_supported, is_ddr);
+ static_cast<uint16_t>(edge_layer.core_bytes_per_buffer()), hw_padding_supported, is_ddr,
+ INITIAL_PERIPH_BUFFERS_PER_FRAME, INITIAL_PERIPH_BYTES_PER_BUFFER);
}
Expected<CONTROL_PROTOCOL__nn_stream_config_t> HefConfigurator::parse_nn_stream_config(const LayerInfo &edge_layer, bool hw_padding_supported)
{
// TODO HRT-7177 - pass interface to layer info instead of re-calculated Layer info from stream_internal.hpp
// After passing stream interface, there is no need for this function. Just use CONTROL_PROTOCOL__nn_stream_config_t from layer info.
- auto is_ddr = false; // This function is called only on boundary layers, so no DDR
+ assert(LayerType::BOUNDARY == edge_layer.type);
+ const auto is_ddr = false; // This function is called only on boundary layers, so no DDR
+
return parse_nn_stream_config(edge_layer.format.order, edge_layer.hw_shape.width, edge_layer.hw_shape.features,
edge_layer.hw_data_bytes, edge_layer.nn_stream_config.core_buffers_per_frame,
- edge_layer.nn_stream_config.core_bytes_per_buffer, hw_padding_supported, is_ddr);
+ edge_layer.nn_stream_config.core_bytes_per_buffer, hw_padding_supported, is_ddr, edge_layer.nn_stream_config.periph_buffers_per_frame,
+ edge_layer.nn_stream_config.periph_bytes_per_buffer);
+}
+
+Expected<uint32_t> HefConfigurator::max_periph_bytes_value(const hailo_device_architecture_t hw_arch)
+{
+ switch (hw_arch) {
+ case HAILO_ARCH_HAILO8_A0:
+ case HAILO_ARCH_HAILO8:
+ case HAILO_ARCH_HAILO8L:
+ return HAILO8_INBOUND_DATA_STREAM_SIZE;
+ case HAILO_ARCH_HAILO15:
+ return HAILO15_PERIPH_BYTES_PER_BUFFER_MAX_SIZE;
+ default:
+ LOGGER__ERROR("Unknown device architecture!");
+ return make_unexpected(HAILO_INVALID_ARGUMENT);
+ }
+}
+
+// TODO HRT-11006: remove this function when hw padding is removed from InputStreamBase / OutputStreamBase constructor
+Expected<uint32_t> HefConfigurator::max_periph_bytes_value(const hailo_stream_interface_t interface)
+{
+ switch (interface) {
+ case HAILO_STREAM_INTERFACE_ETH:
+ case HAILO_STREAM_INTERFACE_MIPI:
+ case HAILO_STREAM_INTERFACE_PCIE:
+ return HAILO8_INBOUND_DATA_STREAM_SIZE;
+ case HAILO_STREAM_INTERFACE_INTEGRATED:
+ return HAILO15_PERIPH_BYTES_PER_BUFFER_MAX_SIZE;
+ default:
+ LOGGER__ERROR("Unknown stream interface!");
+ return make_unexpected(HAILO_INVALID_ARGUMENT);
+ }
}
bool HefConfigurator::is_hw_padding_supported(bool is_boundary, bool is_mux, hailo_format_order_t format_order,
- uint16_t core_buffers_per_frame, uint32_t height, uint32_t width, uint32_t features, uint32_t hw_data_bytes)
+ uint16_t core_buffers_per_frame, uint32_t height, uint32_t width, uint32_t features, uint32_t hw_data_bytes,
+ const uint32_t max_periph_bytes_value)
{
if (!is_boundary || is_mux) {
return false;
return false;
}
- if ((width * features * hw_data_bytes) >
- (HAILO8_INBOUND_DATA_STREAM_SIZE - 1)) {
+ if ((width * features * hw_data_bytes) > (max_periph_bytes_value - 1)) {
// TODO: HRT-4177
- LOGGER__DEBUG("HW padding is supported only on layers with features * width * data size > stream size");
+ LOGGER__DEBUG("HW padding is supported only on layers with shape size < stream size");
return false;
}
return true;
}
-bool HefConfigurator::is_hw_padding_supported(const LayerInfo &layer_info)
+bool HefConfigurator::is_hw_padding_supported(const LayerInfo &layer_info, const uint32_t max_periph_bytes_value)
{
/* If the network is transposed, the width and height are swapped in LayerInfo c'tor, so need to swap it again for calculations */
auto height = layer_info.shape.height;
std::swap(height, width);
}
- auto is_boundary = true; // This function is called only on boundary layers
+ auto is_boundary = (LayerType::BOUNDARY == layer_info.type);
return is_hw_padding_supported(is_boundary, layer_info.is_mux, layer_info.format.order,
layer_info.nn_stream_config.core_buffers_per_frame, height, width,
- layer_info.shape.features, layer_info.hw_data_bytes);
+ layer_info.shape.features, layer_info.hw_data_bytes, max_periph_bytes_value);
}
-bool HefConfigurator::is_hw_padding_supported(const ProtoHEFEdgeLayer &edge_layer)
+bool HefConfigurator::is_hw_padding_supported(const ProtoHEFEdgeLayer &edge_layer, const uint32_t max_periph_bytes_value)
{
auto is_boundary = (ProtoHEFEdgeConnectionType::PROTO__EDGE_CONNECTION_TYPE__BOUNDARY == edge_layer.context_switch_info().edge_connection_type());
auto is_mux = (ProtoHEFEdgeLayerType::PROTO__EDGE_LAYER_TYPE__MUX == edge_layer.edge_layer_type());
auto format_order = format_order_exp.release();
return is_hw_padding_supported(is_boundary, is_mux, format_order, static_cast<uint16_t>(edge_layer_base.core_buffers_per_frame()),
- edge_layer_base.height(), edge_layer_base.width(), edge_layer_base.features(), edge_layer_base.data_bytes());
+ edge_layer_base.height(), edge_layer_base.width(), edge_layer_base.features(), edge_layer_base.data_bytes(),
+ max_periph_bytes_value);
}
Expected<std::vector<hailo_stream_info_t>> Hef::Impl::get_input_stream_infos(const std::string &net_group_name,
const std::string &network_name)
{
- auto network_group_metadata = get_core_op_metadata(net_group_name);
- CHECK_EXPECTED(network_group_metadata);
- return network_group_metadata->get_input_stream_infos(network_name);
+ auto core_op_metadata = get_core_op_metadata(net_group_name);
+ CHECK_EXPECTED(core_op_metadata);
+
+ return core_op_metadata.value()->get_input_stream_infos(network_name);
}
Expected<std::vector<hailo_stream_info_t>> Hef::Impl::get_output_stream_infos(const std::string &net_group_name,
const std::string &network_name)
{
- auto network_group_metadata = get_core_op_metadata(net_group_name);
- CHECK_EXPECTED(network_group_metadata);
- return network_group_metadata->get_output_stream_infos(network_name);
+ auto core_op_metadata = get_core_op_metadata(net_group_name);
+ CHECK_EXPECTED(core_op_metadata);
+
+ return core_op_metadata.value()->get_output_stream_infos(network_name);
}
Expected<std::vector<hailo_stream_info_t>> Hef::Impl::get_all_stream_infos(const std::string &net_group_name,
const std::string &network_name)
{
- auto network_group_metadata = get_core_op_metadata(net_group_name);
- CHECK_EXPECTED(network_group_metadata);
- return network_group_metadata->get_all_stream_infos(network_name);
+ auto core_op_metadata = get_core_op_metadata(net_group_name);
+ CHECK_EXPECTED(core_op_metadata);
+
+ return core_op_metadata.value()->get_all_stream_infos(network_name);
}
Expected<std::vector<hailo_network_info_t>> Hef::Impl::get_network_infos(const std::string &net_group_name)
{
- auto network_group_metadata = get_core_op_metadata(net_group_name);
- CHECK_EXPECTED(network_group_metadata);
- return network_group_metadata->get_network_infos();
+ CHECK_AS_EXPECTED(contains(m_network_group_metadata, net_group_name), HAILO_NOT_FOUND);
+ return m_network_group_metadata.at(net_group_name).get_network_infos();
}
Expected<hailo_stream_info_t> Hef::Impl::get_stream_info_by_name(const std::string &stream_name,
hailo_stream_direction_t stream_direction, const std::string &net_group_name)
{
- auto network_group_metadata = get_core_op_metadata(net_group_name);
- CHECK_EXPECTED(network_group_metadata);
+ auto core_op_metadata = get_core_op_metadata(net_group_name);
+ CHECK_EXPECTED(core_op_metadata);
if (HAILO_H2D_STREAM == stream_direction) {
- auto stream_infos = network_group_metadata->get_input_stream_infos();
+ auto stream_infos = core_op_metadata.value()->get_input_stream_infos();
CHECK_EXPECTED(stream_infos);
for (auto &stream_info : stream_infos.value()) {
if (stream_name == stream_info.name) {
}
}
} else {
- auto stream_infos = network_group_metadata->get_output_stream_infos();
+ auto stream_infos = core_op_metadata.value()->get_output_stream_infos();
CHECK_EXPECTED(stream_infos);
for (auto &stream_info : stream_infos.value()) {
if (stream_name == stream_info.name) {
Expected<std::vector<hailo_vstream_info_t>> Hef::Impl::get_input_vstream_infos(const std::string &net_group_name,
const std::string &network_name)
{
- auto network_group_metadata = get_core_op_metadata(net_group_name);
- CHECK_EXPECTED(network_group_metadata);
- return network_group_metadata->get_input_vstream_infos(network_name);
+ CHECK_AS_EXPECTED(contains(m_network_group_metadata, net_group_name), HAILO_NOT_FOUND);
+ return m_network_group_metadata.at(net_group_name).get_input_vstream_infos(network_name);
}
Expected<std::vector<hailo_vstream_info_t>> Hef::Impl::get_output_vstream_infos(const std::string &net_group_name,
const std::string &network_name)
{
- auto network_group_metadata = get_core_op_metadata(net_group_name);
- CHECK_EXPECTED(network_group_metadata);
- return network_group_metadata->get_output_vstream_infos(network_name);
+ CHECK_AS_EXPECTED(contains(m_network_group_metadata, net_group_name), HAILO_NOT_FOUND);
+ return m_network_group_metadata.at(net_group_name).get_output_vstream_infos(network_name);
}
Expected<std::vector<hailo_vstream_info_t>> Hef::Impl::get_all_vstream_infos(const std::string &net_group_name,
const std::string &network_name)
{
- auto network_group_metadata = get_core_op_metadata(net_group_name);
- CHECK_EXPECTED(network_group_metadata);
- return network_group_metadata->get_all_vstream_infos(network_name);
+ CHECK_AS_EXPECTED(contains(m_network_group_metadata, net_group_name), HAILO_NOT_FOUND);
+ return m_network_group_metadata.at(net_group_name).get_all_vstream_infos(network_name);
}
const std::vector<ProtoHEFNetworkGroupPtr>& Hef::Impl::network_groups() const
return m_core_ops_per_group.at(net_group_name);
};
-const std::vector<std::shared_ptr<NetFlowElement>> Hef::Impl::post_process_ops(const std::string &net_group_name) const
+const NetworkGroupMetadata Hef::Impl::network_group_metadata(const std::string &net_group_name) const
{
- assert(contains(m_post_process_ops_per_group, net_group_name));
- return m_post_process_ops_per_group.at(net_group_name);
+ assert(contains(m_network_group_metadata, net_group_name));
+ auto metadata = m_network_group_metadata.at(net_group_name);
+ return metadata;
}
bool Hef::Impl::check_hef_extension(const ProtoHEFExtensionType &extension, const ProtoHEFHeader &header,
auto core_op_metadata = get_core_op_metadata(net_group_name);
CHECK_EXPECTED(core_op_metadata);
- auto input_layer_infos = core_op_metadata->get_input_layer_infos();
- return input_layer_infos.size();
+ auto input_stream_infos = core_op_metadata.value()->get_input_stream_infos();
+ CHECK_EXPECTED(input_stream_infos);
+ return input_stream_infos->size();
}
Expected<size_t> Hef::Impl::get_number_of_output_streams(const std::string &net_group_name)
auto core_op_metadata = get_core_op_metadata(net_group_name);
CHECK_EXPECTED(core_op_metadata);
- auto output_layer_infos = core_op_metadata->get_output_layer_infos();
- return output_layer_infos.size();
+ auto output_stream_infos = core_op_metadata.value()->get_output_stream_infos();
+ CHECK_EXPECTED(output_stream_infos);
+ return output_stream_infos->size();
}
static Expected<LayerType> get_layer_type(const ProtoHEFEdgeConnectionType &edge_connection_type)
}
}
-hailo_status HefUtils::fill_layer_info_with_base_info(const ProtoHEFEdgeLayerBase &base_info,
- const ProtoHEFEdgeConnectionType &edge_connection_type, const ProtoHEFNetworkGroupMetadata &network_group_proto,
- bool hw_padding_supported, bool transposed, const uint8_t context_index, const uint8_t network_index,
- LayerInfo &layer_info)
-{
- auto format_order_exp = HailoRTDefaults::get_device_format_order(base_info.format());
- CHECK_EXPECTED_AS_STATUS(format_order_exp);
-
- auto format_oder = format_order_exp.release();
-
- auto layer_type = get_layer_type(edge_connection_type);
- CHECK_EXPECTED_AS_STATUS(layer_type);
- layer_info.type = layer_type.value();
-
+static void parse_layer_shape(LayerInfo &layer_info, const ProtoHEFEdgeLayerBase &base_info, const bool hw_padding_supported) {
if (HEF__FORMAT__NMS != base_info.format()) {
layer_info.shape.height = base_info.height();
layer_info.shape.width = base_info.width();
layer_info.hw_shape.features = base_info.padded_features();
}
layer_info.hw_data_bytes = base_info.data_bytes();
+}
+
+hailo_status HefUtils::fill_layer_info_with_base_info(const ProtoHEFEdgeLayerBase &base_info,
+ const ProtoHEFEdgeConnectionType &edge_connection_type, const ProtoHEFNetworkGroupMetadata &network_group_proto,
+ bool hw_padding_supported, bool transposed, const uint8_t context_index, const uint8_t network_index,
+ LayerInfo &layer_info, const SupportedFeatures &supported_features, const ProtoHEFHwArch &hef_arch)
+{
+ auto format_order_exp = HailoRTDefaults::get_device_format_order(base_info.format());
+ CHECK_EXPECTED_AS_STATUS(format_order_exp);
+
+ auto format_oder = format_order_exp.release();
+
+ auto layer_type = get_layer_type(edge_connection_type);
+ CHECK_EXPECTED_AS_STATUS(layer_type);
+ layer_info.type = layer_type.value();
+
+ parse_layer_shape(layer_info, base_info, hw_padding_supported);
// TODO: remove duplications with stream info parse
layer_info.format.order = format_oder;
CHECK_EXPECTED_AS_STATUS(type);
layer_info.format.type = type.value();
- auto nn_stream_config = HefConfigurator::parse_nn_stream_config(base_info, hw_padding_supported,
+ auto nn_stream_config = HefConfigurator::parse_nn_stream_config(base_info, hw_padding_supported,
edge_connection_type);
CHECK_EXPECTED_AS_STATUS(nn_stream_config, "Failed parse nn stream config");
layer_info.nn_stream_config = nn_stream_config.release();
layer_info.dma_engine_index = static_cast<uint8_t>(base_info.engine_id());
if (HAILO_FORMAT_ORDER_HAILO_NMS == layer_info.format.order) {
- auto expected_nms_info = parse_proto_nms_info(base_info.additional_info().nms_info());
+ auto expected_nms_info = parse_proto_nms_info(base_info.additional_info().nms_info(), supported_features.nms_burst_mode,
+ hef_arch);
CHECK_EXPECTED_AS_STATUS(expected_nms_info);
layer_info.nms_info = expected_nms_info.release();
}
const ProtoHEFEdgeConnectionType &edge_connection_type,
const ProtoHEFCoreOpMock &core_op, hailo_stream_direction_t direction,
bool hw_padding_supported, const uint8_t context_index, const std::string &partial_network_name,
- uint8_t network_index, LayerInfo &layer_info)
+ uint8_t network_index, LayerInfo &layer_info, const SupportedFeatures &supported_features, const ProtoHEFHwArch &hef_arch)
{
auto status = fill_layer_info_with_base_info(info.edge_layer_base(), edge_connection_type, core_op.network_group_metadata,
- hw_padding_supported, info.transposed(), context_index, network_index, layer_info);
+ hw_padding_supported, info.transposed(), context_index, network_index, layer_info, supported_features, hef_arch);
CHECK_SUCCESS(status);
if (HAILO_MAX_STREAM_NAME_SIZE < (info.name().length() + 1)) {
layer_info.quant_info.limvals_min = info.numeric_info().limvals_min();
layer_info.quant_info.qp_scale = info.numeric_info().qp_scale();
layer_info.quant_info.qp_zp = info.numeric_info().qp_zp();
+
+ for (uint32_t i = 0; i < layer_info.shape.features; i++) {
+ hailo_quant_info_t quant_info = {};
+ if (supported_features.output_scale_by_feature) {
+ quant_info.qp_zp = static_cast<float32_t>(info.numeric_info().qp_zps()[i]);
+ quant_info.qp_scale = static_cast<float32_t>(info.numeric_info().qp_scales()[i]);
+ } else {
+ quant_info.qp_zp = info.numeric_info().qp_zp();
+ quant_info.qp_scale = info.numeric_info().qp_scale();
+ }
+ quant_info.limvals_min = info.numeric_info().limvals_min();
+ quant_info.limvals_max = info.numeric_info().limvals_max();
+ layer_info.quant_infos.push_back(std::move(quant_info));
+ }
+
// Simulation info
assert (1 == info.edge_layer_base().buffer_indices_size());
layer_info.buffer_indices.cluster_index = info.edge_layer_base().buffer_indices(0).cluster_index();
// This creates a new LayerInfo for the fused layer *for each defused layer*, even though they all share the same fused layer.
// TODO Make it so all defused layer reference the same LayerInfo of the fused layer.
LayerInfo fused_layer_info = {};
- status = fill_fused_nms_info(fused_layer, fused_layer_info, layer_info.quant_info, layer_info.network_name);
+ status = fill_fused_nms_info(fused_layer, fused_layer_info, layer_info.quant_info, layer_info.network_name,
+ supported_features.nms_burst_mode, hef_arch);
CHECK_SUCCESS(status);
layer_info.fused_nms_layer.push_back(fused_layer_info);
break;
}
hailo_status HefUtils::fill_fused_nms_info(const ProtoHEFEdgeLayerFused &info, LayerInfo &layer_info,
- hailo_quant_info_t &defuse_quant_info, const std::string &network_name)
+ hailo_quant_info_t &defuse_quant_info, const std::string &network_name, const bool burst_mode_enabled,
+ const ProtoHEFHwArch &hef_arch)
{
auto base_info = info.layer_info().edge_layer_base();
auto format_order_exp = HailoRTDefaults::get_device_format_order(base_info.format());
CHECK_EXPECTED_AS_STATUS(type);
layer_info.format.type = type.value();
- auto expected_nms_info = parse_proto_nms_info(info.nms_info());
+ auto expected_nms_info = parse_proto_nms_info(info.nms_info(), burst_mode_enabled, hef_arch);
CHECK_EXPECTED_AS_STATUS(expected_nms_info);
layer_info.nms_info = expected_nms_info.release();
const ProtoHEFEdgeConnectionType &edge_connection_type,
const ProtoHEFCoreOpMock &core_op, hailo_stream_direction_t direction,
bool hw_padding_supported, const uint8_t context_index, const std::string &partial_network_name,
- uint8_t network_index, LayerInfo &layer_info)
+ uint8_t network_index, LayerInfo &layer_info, const SupportedFeatures &supported_features, const ProtoHEFHwArch &hef_arch)
{
const bool transposed = false;
auto status = fill_layer_info_with_base_info(info.edge_layer_base(), edge_connection_type, core_op.network_group_metadata,
- hw_padding_supported, transposed, context_index, network_index, layer_info);
+ hw_padding_supported, transposed, context_index, network_index, layer_info, supported_features, hef_arch);
CHECK_SUCCESS(status);
if (HAILO_MAX_STREAM_NAME_SIZE < (info.name().length() + 1)) {
switch (info.predecessors(i).edge_case()) {
case ProtoHefEdge::kLayerInfo:
status = fill_layer_info(info.predecessors(i).layer_info(), edge_connection_type, core_op,
- direction, hw_padding_supported, context_index, partial_network_name, network_index, temp_layer);
+ direction, hw_padding_supported, context_index, partial_network_name, network_index, temp_layer,
+ supported_features, hef_arch);
if (HAILO_SUCCESS != status) {
return status;
}
break;
case ProtoHefEdge::kLayerMux:
status = fill_mux_info(info.predecessors(i).layer_mux(), edge_connection_type, core_op,
- direction, hw_padding_supported, context_index, partial_network_name, network_index, temp_layer);
+ direction, hw_padding_supported, context_index, partial_network_name, network_index, temp_layer,
+ supported_features, hef_arch);
if (HAILO_SUCCESS != status) {
return status;
}
const uint8_t context_index,
const ProtoHEFEdgeLayer &layer,
const SupportedFeatures &supported_features,
- ContextMetadata &context_metadata)
+ ContextMetadata &context_metadata,
+ const ProtoHEFHwArch &hef_arch)
{
- auto layer_info = get_boundary_layer_info(core_op, context_index, layer, supported_features);
+ auto layer_info = get_boundary_layer_info(core_op, context_index, layer, supported_features, hef_arch);
CHECK_EXPECTED_AS_STATUS(layer_info);
context_metadata.add_boundary_layer(layer_info.release());
const uint8_t context_index,
const ProtoHEFEdgeLayer &layer,
const SupportedFeatures &supported_features,
- ContextMetadata &context_metadata)
+ ContextMetadata &context_metadata, const ProtoHEFHwArch &hef_arch)
{
- auto layer_info = get_inter_context_layer_info(core_op, context_index, layer, supported_features);
+ auto layer_info = get_inter_context_layer_info(core_op, context_index, layer, supported_features, hef_arch);
CHECK_EXPECTED_AS_STATUS(layer_info);
context_metadata.add_inter_context_layer(layer_info.release());
const uint8_t context_index,
const ProtoHEFEdgeLayer &layer,
const SupportedFeatures &supported_features,
- ContextMetadata &context_metadata)
+ ContextMetadata &context_metadata, const ProtoHEFHwArch &hef_arch)
{
- auto layer_info = get_ddr_layer_info(core_op, context_index, layer, supported_features);
+ auto layer_info = get_ddr_layer_info(core_op, context_index, layer, supported_features, hef_arch);
CHECK_EXPECTED_AS_STATUS(layer_info);
context_metadata.add_ddr_layer(layer_info.release());
CHECK_AS_EXPECTED(IS_FIT_IN_UINT8(proto_action.enable_nms().network_index()), HAILO_INVALID_HEF,
"Failed to parse HEF. Invalid network_index: {}.", proto_action.enable_nms().network_index());
+ uint16_t number_of_classes = 0;
+ uint16_t burst_size = 0;
+ // TODO: HRT-10750 - change to error and failure in case of old enable nms action
+ if (0 == proto_action.enable_nms().number_of_classes() || 0 == proto_action.enable_nms().burst_size()) {
+ LOGGER__WARNING("Enable NMS Action must have number of classes and burst size, Please update Hef to SDK version newer than 3.24");
+ number_of_classes = 1;
+ burst_size = 1;
+ } else {
+ number_of_classes = static_cast<uint16_t>(proto_action.enable_nms().number_of_classes());
+ burst_size = static_cast<uint16_t>(proto_action.enable_nms().burst_size());
+ }
+
auto support_multi_networks = supported_features.multi_network_support;
auto network_index = static_cast<uint8_t>((support_multi_networks) ? proto_action.enable_nms().network_index() : 0);
const auto nms_unit_index = static_cast<uint8_t>(proto_action.enable_nms().nms_unit_index());
- return EnableNmsAction::create(nms_unit_index, network_index);
+ return EnableNmsAction::create(nms_unit_index, network_index, number_of_classes, burst_size);
}
+ case ProtoHEFAction::kWriteDataByType:
+ {
+ CHECK_AS_EXPECTED(IS_FIT_IN_UINT32(proto_action.write_data_by_type().address()), HAILO_INVALID_HEF,
+ "Failed to parse HEF. Invalid write_data_by_type address: {} (should fit uint32_t).",
+ proto_action.write_data_by_type().address());
+ CHECK_AS_EXPECTED((0 == (proto_action.write_data_by_type().address() % ALIGNED_TO_4_BYTES)), HAILO_INVALID_HEF,
+ "Failed to parse HEF. Invalid write_data_by_type address. Address should be aligned to 4 bytes: {}.",
+ proto_action.write_data_by_type().address());
+ CHECK_AS_EXPECTED(proto_action.write_data_by_type().data_type() == ProtoHEFWriteDataType::DATA_FROM_ACTION ||
+ proto_action.write_data_by_type().data_type() == ProtoHEFWriteDataType::BATCH_SIZE, HAILO_INVALID_HEF,
+ "Failed to parse HEF. Invalid write_data_by_type data_type: {} ", proto_action.write_data_by_type().data_type());
+ CHECK_AS_EXPECTED(proto_action.write_data_by_type().data().length() <= CONTEXT_SWITCH_DEFS__WRITE_ACTION_BY_TYPE_MAX_SIZE, HAILO_INVALID_HEF,
+ "Failed to parse HEF. Invalid write_data_by_type data size: {} ", proto_action.write_data_by_type().data().length());
+ CHECK_AS_EXPECTED(IS_FIT_IN_UINT8(proto_action.write_data_by_type().shift()), HAILO_INVALID_HEF,
+ "Failed to parse HEF. Invalid write_data_by_type shift: {} (should fit uint8_t).",
+ proto_action.write_data_by_type().shift());
+
+ uint32_t data = 0x0;
+ memcpy(&data, proto_action.write_data_by_type().data().data(),
+ /* Limit the data to one register */
+ MIN(CONTEXT_SWITCH_DEFS__WRITE_ACTION_BY_TYPE_MAX_SIZE, proto_action.write_data_by_type().data().length()));
+
+ const auto address = static_cast<uint32_t>(proto_action.write_data_by_type().address());
+ const auto data_type = static_cast<uint8_t>(proto_action.write_data_by_type().data_type());
+ const auto mask = proto_action.write_data_by_type().mask();
+ auto support_multi_networks = supported_features.multi_network_support;
+ const auto network_index = static_cast<uint8_t>((support_multi_networks) ? proto_action.write_data_by_type().network_index() : 0);
+ const auto shift = static_cast<uint8_t>(proto_action.write_data_by_type().shift());
+
+ return WriteDataByTypeAction::create(address, data_type, data, shift, mask, network_index);
+ }
default:
LOGGER__ERROR("Action {} not implemented", proto_action.action_case());
break;
}
Expected<ContextMetadata> HefUtils::parse_single_dynamic_context(const ProtoHEFCoreOpMock &core_op,
- const ProtoHEFContext &context_proto, uint8_t context_index, const SupportedFeatures &supported_features)
+ const ProtoHEFContext &context_proto, uint8_t context_index, const SupportedFeatures &supported_features,
+ const ProtoHEFHwArch &hef_arch)
{
auto context_metadata_exp = parse_operations(context_proto.operations(), supported_features);
CHECK_EXPECTED(context_metadata_exp);
if (ProtoHEFEdgeConnectionType::PROTO__EDGE_CONNECTION_TYPE__BOUNDARY ==
edge_layer.context_switch_info().edge_connection_type()) {
auto status = fill_boundary_layers_info(core_op, context_index, edge_layer,
- supported_features, context_metadata);
+ supported_features, context_metadata, hef_arch);
CHECK_SUCCESS_AS_EXPECTED(status);
} else if (ProtoHEFEdgeConnectionType::PROTO__EDGE_CONNECTION_TYPE__INTERMEDIATE ==
edge_layer.context_switch_info().edge_connection_type()) {
auto status = fill_inter_context_layers_info(core_op, context_index, edge_layer,
- supported_features, context_metadata);
+ supported_features, context_metadata, hef_arch);
CHECK_SUCCESS_AS_EXPECTED(status);
} else if (ProtoHEFEdgeConnectionType::PROTO__EDGE_CONNECTION_TYPE__DDR ==
edge_layer.context_switch_info().edge_connection_type()) {
auto status = fill_ddr_layers_info(core_op, context_index, edge_layer,
- supported_features, context_metadata);
+ supported_features, context_metadata, hef_arch);
CHECK_SUCCESS_AS_EXPECTED(status);
}
}
return HAILO_SUCCESS;
}
-Expected<std::vector<ContextMetadata>> HefUtils::parse_dynamic_contexts(const ProtoHEFCoreOpMock &core_op, const SupportedFeatures &supported_features)
+Expected<std::vector<ContextMetadata>> HefUtils::parse_dynamic_contexts(const ProtoHEFCoreOpMock &core_op, const SupportedFeatures &supported_features,
+ const ProtoHEFHwArch &hef_arch)
{
std::vector<ContextMetadata> contexts_metadata;
for (uint8_t context_index = 0; context_index < core_op.contexts.size(); context_index++) {
auto &context_proto = core_op.contexts[context_index];
- auto context_metadata = parse_single_dynamic_context(core_op, context_proto, context_index, supported_features);
+ auto context_metadata = parse_single_dynamic_context(core_op, context_proto, context_index, supported_features, hef_arch);
CHECK_EXPECTED(context_metadata);
contexts_metadata.emplace_back(context_metadata.release());
}
return contexts_metadata;
}
-Expected<hailo_nms_info_t> HefUtils::parse_proto_nms_info(const ProtoHEFNmsInfo &proto_nms_info)
+Expected<hailo_nms_info_t> HefUtils::parse_proto_nms_info(const ProtoHEFNmsInfo &proto_nms_info, const bool burst_mode_enabled,
+ const ProtoHEFHwArch &hef_arch)
{
hailo_nms_info_t nms_info = {};
nms_info.number_of_classes = static_cast<uint32_t>(proto_nms_info.number_of_classes());
nms_info.bbox_size = static_cast<uint32_t>(proto_nms_info.bbox_size());
nms_info.max_bboxes_per_class = static_cast<uint32_t>(proto_nms_info.max_output_size());
nms_info.chunks_per_frame = static_cast<uint32_t>(proto_nms_info.input_division_factor());
+
+ if (burst_mode_enabled) {
+ nms_info.burst_size = static_cast<uint32_t>(proto_nms_info.burst_size());
+ nms_info.burst_type = static_cast<hailo_nms_burst_type_t>(proto_nms_info.burst_type());
+
+ CHECK_AS_EXPECTED(nms_info.burst_type != HAILO_BURST_TYPE_NO_BURST, HAILO_INVALID_HEF,
+ "Invalid HEF, nms burst type is no burst but burst extension is enabled");
+
+ CHECK_AS_EXPECTED((nms_info.burst_size * nms_info.bbox_size) <= MAX_NMS_BURST_SIZE,
+ HAILO_INVALID_HEF, "Invalid HEF, nms burst size {} larger than maximum burst size {}",
+ (nms_info.burst_size * nms_info.bbox_size), MAX_NMS_BURST_SIZE);
+
+ // Validate that burst type matches architecture
+ const auto dev_arch = DeviceBase::hef_arch_to_device_arch(hef_arch);
+ CHECK_AS_EXPECTED(LayerInfoUtils::validate_nms_burst_type(nms_info.burst_type, dev_arch), HAILO_INVALID_HEF,
+ "Invalid HEF, nms burst type {} on device architecture {}", nms_info.burst_type, dev_arch);
+ } else {
+ CHECK_AS_EXPECTED(HAILO_BURST_TYPE_NO_BURST == static_cast<hailo_nms_burst_type_t>(proto_nms_info.burst_type()),
+ HAILO_INVALID_HEF, "Invalid HEF, nms burst extension is disabled yet burst type is {}", nms_info.burst_type);
+
+ // In case of HAILO_BURST_TYPE_NO_BURST make burst size DEFAULT_NMS_NO_BURST_SIZE
+ nms_info.burst_size = DEFAULT_NMS_NO_BURST_SIZE;
+ nms_info.burst_type = static_cast<hailo_nms_burst_type_t>(proto_nms_info.burst_type());
+ }
+
if (nms_info.chunks_per_frame == 0) {
// Old hef, use default value 1
nms_info.chunks_per_frame = 1;
}
Expected<LayerInfo> HefUtils::get_boundary_layer_info(const ProtoHEFCoreOpMock &core_op,
- const uint8_t context_index, const ProtoHEFEdgeLayer &layer, const SupportedFeatures &supported_features)
+ const uint8_t context_index, const ProtoHEFEdgeLayer &layer, const SupportedFeatures &supported_features,
+ const ProtoHEFHwArch &hef_arch)
{
// We parse only boundary layers for user usage
CHECK_AS_EXPECTED(
auto network_index = static_cast<uint8_t>((support_multi_networks) ? layer.network_index() : 0);
auto partial_network_name = HefUtils::get_partial_network_name_by_index(core_op, network_index, supported_features);
CHECK_EXPECTED(partial_network_name);
- const bool hw_padding_supported = HefConfigurator::is_hw_padding_supported(layer);
+ auto max_periph_bytes_from_hef = HefConfigurator::max_periph_bytes_value(DeviceBase::hef_arch_to_device_arch(hef_arch));
+ CHECK_EXPECTED(max_periph_bytes_from_hef);
+ const auto max_periph_bytes = (0 == layer.layer_info().edge_layer_base().max_shmifo_size()) ? max_periph_bytes_from_hef.value():
+ MIN(max_periph_bytes_from_hef.value(), layer.layer_info().edge_layer_base().max_shmifo_size());
+ const bool hw_padding_supported = HefConfigurator::is_hw_padding_supported(layer, max_periph_bytes);
if (ProtoHEFEdgeLayerType::PROTO__EDGE_LAYER_TYPE__INFO == layer.edge_layer_type()) {
// TODO: return LayerInfo
auto status = fill_layer_info(layer.layer_info(), layer.context_switch_info().edge_connection_type(),
- core_op, direction, hw_padding_supported, context_index, partial_network_name.value(), network_index, result);
+ core_op, direction, hw_padding_supported, context_index, partial_network_name.value(), network_index, result,
+ supported_features, hef_arch);
CHECK_SUCCESS_AS_EXPECTED(status);
} else if (ProtoHEFEdgeLayerType::PROTO__EDGE_LAYER_TYPE__MUX == layer.edge_layer_type()) {
// TODO: return LayerInfo
auto status = fill_mux_info(layer.layer_mux(), layer.context_switch_info().edge_connection_type(),
- core_op, direction, hw_padding_supported, context_index, partial_network_name.value(), network_index, result);
+ core_op, direction, hw_padding_supported, context_index, partial_network_name.value(), network_index, result,
+ supported_features, hef_arch);
CHECK_SUCCESS_AS_EXPECTED(status);
} else {
LOGGER__ERROR("Invalid layer type");
}
Expected<LayerInfo> HefUtils::get_inter_context_layer_info(const ProtoHEFCoreOpMock &core_op,
- const uint8_t context_index, const ProtoHEFEdgeLayer &layer, const SupportedFeatures &supported_features)
+ const uint8_t context_index, const ProtoHEFEdgeLayer &layer, const SupportedFeatures &supported_features,
+ const ProtoHEFHwArch &hef_arch)
{
LayerInfo result = {};
CHECK_AS_EXPECTED(PROTO__EDGE_LAYER_TYPE__INFO == layer.edge_layer_type(), HAILO_INVALID_HEF, "Inter-context layer can't be mux.");
CHECK_EXPECTED(partial_network_name);
result.network_name = HefUtils::get_network_name(core_op, partial_network_name.release());
result.context_index = context_index;
- const bool hw_padding_supported = HefConfigurator::is_hw_padding_supported(layer);
+ auto max_periph_bytes_from_hef = HefConfigurator::max_periph_bytes_value(DeviceBase::hef_arch_to_device_arch(hef_arch));
+ CHECK_EXPECTED(max_periph_bytes_from_hef);
+ const auto max_periph_bytes = (0 == layer.layer_info().edge_layer_base().max_shmifo_size()) ? max_periph_bytes_from_hef.value():
+ MIN(max_periph_bytes_from_hef.value(), layer.layer_info().edge_layer_base().max_shmifo_size());
+ const bool hw_padding_supported = HefConfigurator::is_hw_padding_supported(layer, max_periph_bytes);
result.name = layer.layer_info().name();
- auto nn_stream_config_exp = HefConfigurator::parse_nn_stream_config(layer.layer_info().edge_layer_base(),
+
+ auto nn_stream_config_exp = HefConfigurator::parse_nn_stream_config(layer.layer_info().edge_layer_base(),
hw_padding_supported, layer.context_switch_info().edge_connection_type());
CHECK_EXPECTED(nn_stream_config_exp);
result.nn_stream_config = nn_stream_config_exp.release();
result.max_shmifo_size = layer.layer_info().edge_layer_base().max_shmifo_size();
+ parse_layer_shape(result, layer.layer_info().edge_layer_base(), hw_padding_supported);
+
result.direction = (ProtoHEFEdgeLayerDirection::PROTO__EDGE_LAYER_DIRECTION__DEVICE_TO_HOST ==
layer.direction()) ? HAILO_D2H_STREAM : HAILO_H2D_STREAM;
}
Expected<LayerInfo> HefUtils::get_ddr_layer_info(const ProtoHEFCoreOpMock &core_op,
- const uint8_t context_index, const ProtoHEFEdgeLayer &layer, const SupportedFeatures &supported_features)
+ const uint8_t context_index, const ProtoHEFEdgeLayer &layer, const SupportedFeatures &supported_features,
+ const ProtoHEFHwArch &hef_arch)
{
LayerInfo result = {};
CHECK_AS_EXPECTED(PROTO__EDGE_LAYER_TYPE__INFO == layer.edge_layer_type(), HAILO_INVALID_HEF, "DDR layer can't be mux.");
CHECK_EXPECTED(partial_network_name);
result.network_name = HefUtils::get_network_name(core_op, partial_network_name.release());
result.context_index = context_index;
- const bool hw_padding_supported = HefConfigurator::is_hw_padding_supported(layer);
+ auto max_periph_bytes_from_hef = HefConfigurator::max_periph_bytes_value(DeviceBase::hef_arch_to_device_arch(hef_arch));
+ CHECK_EXPECTED(max_periph_bytes_from_hef);
+ const auto max_periph_bytes = (0 == layer.layer_info().edge_layer_base().max_shmifo_size()) ? max_periph_bytes_from_hef.value():
+ MIN(max_periph_bytes_from_hef.value(), layer.layer_info().edge_layer_base().max_shmifo_size());
+ const bool hw_padding_supported = HefConfigurator::is_hw_padding_supported(layer, max_periph_bytes);
result.name = layer.layer_info().name();
- auto nn_stream_config_exp = HefConfigurator::parse_nn_stream_config(layer.layer_info().edge_layer_base(),
+ auto nn_stream_config_exp = HefConfigurator::parse_nn_stream_config(layer.layer_info().edge_layer_base(),
hw_padding_supported, layer.context_switch_info().edge_connection_type());
CHECK_EXPECTED(nn_stream_config_exp);
result.nn_stream_config = nn_stream_config_exp.release();
result.direction = (ProtoHEFEdgeLayerDirection::PROTO__EDGE_LAYER_DIRECTION__DEVICE_TO_HOST ==
layer.direction()) ? HAILO_D2H_STREAM : HAILO_H2D_STREAM;
+ parse_layer_shape(result, layer.layer_info().edge_layer_base(), hw_padding_supported);
+
CHECK_AS_EXPECTED(IS_FIT_IN_UINT16(layer.layer_info().edge_layer_base().core_buffers_per_frame()), HAILO_INVALID_HEF,
"Failed to parse HEF. Invalid core_buffers_per_frame: {}.", layer.layer_info().edge_layer_base().core_buffers_per_frame());
result.ddr_info.total_buffers_per_frame = static_cast<uint16_t>(layer.layer_info().edge_layer_base().core_buffers_per_frame());
return result;
}
-Expected<std::vector<std::string>> HefUtils::get_sorted_output_names(const ProtoHEFCoreOpMock &core_op)
-{
- if (core_op.fused_layers_metadata.network_has_fused_layers()) {
- return std::vector<std::string>(std::begin(core_op.fused_layers_metadata.updated_sorted_output_names()),
- std::end(core_op.fused_layers_metadata.updated_sorted_output_names()));
- } else if (0 != core_op.sorted_outputs_order.size()) {
- // For backwards compatibility before we've added updated_sorted_output_names
- return std::vector<std::string>(std::begin(core_op.sorted_outputs_order),
- std::end(core_op.sorted_outputs_order));
- } else {
- // For backwards compatibility before we've added this field
- uint32_t number_of_contexts = core_op.contexts.size();
- const auto& context_metadata = core_op.contexts[number_of_contexts - 1].metadata();
-
- CHECK_AS_EXPECTED(0 < context_metadata.sorted_outputs_order_size(), HAILO_INVALID_HEF,
- "Sorted output names is not set up in the HEF.");
-
- return std::vector<std::string>(std::begin(context_metadata.sorted_outputs_order()),
- std::end(context_metadata.sorted_outputs_order()));
- }
-}
-
Expected<std::string> HefUtils::get_partial_network_name_by_index(const ProtoHEFCoreOpMock &core_op, uint8_t network_index,
const SupportedFeatures &supported_features)
{
Expected<std::vector<std::string>> Hef::Impl::get_sorted_output_names(const std::string &net_group_name)
{
- if (m_supported_features.hailo_net_flow) {
- std::vector<std::string> res;
- for (const auto &net_group : m_groups) {
- auto curr_name = HefUtils::get_network_group_name(*net_group, m_supported_features);
- if (curr_name == net_group_name) {
- res.reserve(net_group->sorted_outputs_order().size());
- for (auto &name : net_group->sorted_outputs_order()) {
- res.push_back(name);
- }
- return res;
- }
- }
- LOGGER__ERROR("Did not find network group of name {}", net_group_name);
- return make_unexpected(HAILO_INVALID_HEF);
- }
- auto core_op_metadata = get_core_op_metadata(net_group_name);
- CHECK_EXPECTED(core_op_metadata);
-
- auto res = core_op_metadata->get_sorted_output_names();
+ CHECK_AS_EXPECTED(contains(m_network_group_metadata, net_group_name), HAILO_NOT_FOUND);
+ auto res = m_network_group_metadata.at(net_group_name).get_sorted_output_names();
return res;
}
Expected<std::vector<std::string>> Hef::Impl::get_stream_names_from_vstream_name(const std::string &vstream_name,
const std::string &net_group_name)
{
- auto core_op_metadata = get_core_op_metadata(net_group_name);
- CHECK_EXPECTED(core_op_metadata);
-
- return core_op_metadata->get_stream_names_from_vstream_name(vstream_name);
+ CHECK_AS_EXPECTED(contains(m_network_group_metadata, net_group_name), HAILO_NOT_FOUND);
+ return m_network_group_metadata.at(net_group_name).get_stream_names_from_vstream_name(vstream_name);
}
Expected<std::vector<std::string>> Hef::Impl::get_vstream_names_from_stream_name(const std::string &stream_name,
const std::string &net_group_name)
{
- auto core_op_metadata = get_core_op_metadata(net_group_name);
- CHECK_EXPECTED(core_op_metadata);
-
- return core_op_metadata->get_vstream_names_from_stream_name(stream_name);
+ CHECK_AS_EXPECTED(contains(m_network_group_metadata, net_group_name), HAILO_NOT_FOUND);
+ return m_network_group_metadata.at(net_group_name).get_vstream_names_from_stream_name(stream_name);
}
Expected<std::string> Hef::Impl::get_vstream_name_from_original_name_mux(const std::string &original_name, const ProtoHefEdge &layer)
std::vector<std::string> infos_strings;
std::string infos_string;
- auto post_process = post_process_ops(network_group_name);
+ CHECK_AS_EXPECTED(contains(m_network_group_metadata, network_group_name), HAILO_INTERNAL_FAILURE);
+
+ auto post_process = m_network_group_metadata.at(network_group_name).m_net_flow_ops;
for (const auto &post_process_info : post_process) {
infos_string = post_process_info->op->get_op_description();
- infos_string += ", Bbox size: " + std::to_string(post_process_info->nms_info.bbox_size) +
- ", Max bboxes per class: " + std::to_string(post_process_info->nms_info.max_bboxes_per_class);
+ if (HAILO_NET_FLOW_OP_TYPE_NMS == post_process_info->op_type) {
+
+ infos_string += ", Bbox size: " + std::to_string(post_process_info->nms_info.bbox_size) +
+ ", Max bboxes per class: " + std::to_string(post_process_info->nms_info.max_bboxes_per_class);
+ }
}
/* If the string is empty there is no need to continue. */
if (infos_string.empty()) {
return infos_strings;
}
-Expected<std::string> Hef::get_hef_description(bool stream_infos, bool vstream_infos)
+Expected<std::string> Hef::get_description(bool stream_infos, bool vstream_infos)
{
auto arch = get_hef_device_arch();
CHECK_EXPECTED(arch);
- return pimpl->get_hef_description(stream_infos, vstream_infos, arch.value());
+ return pimpl->get_description(stream_infos, vstream_infos, arch.value());
}
-Expected<std::string> Hef::Impl::get_hef_description(bool stream_infos, bool vstream_infos, hailo_device_architecture_t device_arch)
+Expected<std::string> Hef::Impl::get_description(bool stream_infos, bool vstream_infos, hailo_device_architecture_t device_arch)
{
std::string hef_infos;
auto hef_arch_str = HailoRTCommon::get_device_arch_str(device_arch);
auto network_group_infos = get_network_groups_infos();
CHECK_EXPECTED(network_group_infos);
for (const auto &network_group_info : network_group_infos.release()) {
- auto core_op_meta_data = get_core_op_metadata(network_group_info.name);
- CHECK_EXPECTED(core_op_meta_data);
- auto number_of_contexts = core_op_meta_data->get_contexts_count();
+ auto core_op_metadata = get_core_op_metadata(network_group_info.name);
+ CHECK_EXPECTED(core_op_metadata);
+ auto number_of_contexts = core_op_metadata.value()->get_contexts_count();
auto contexts_str = (network_group_info.is_multi_context ? "Multi Context - Number of contexts: " + std::to_string(number_of_contexts) : "Single Context");
hef_infos += "Network group name: " + std::string(network_group_info.name) + ", " + contexts_str + "\n";
const std::string &network_name, std::map<std::string, hailo_vstream_params_t> &input_vstreams_params,
bool quantized, hailo_format_type_t format_type, uint32_t timeout_ms, uint32_t queue_size)
{
- auto core_op_metadata = get_core_op_metadata(net_group_name);
- CHECK_EXPECTED_AS_STATUS(core_op_metadata);
- auto input_vstream_infos = core_op_metadata->get_input_vstream_infos(network_name);
+ CHECK(contains(m_network_group_metadata, net_group_name), HAILO_NOT_FOUND);
+ auto input_vstream_infos = m_network_group_metadata.at(net_group_name).get_input_vstream_infos(network_name);
CHECK_EXPECTED_AS_STATUS(input_vstream_infos);
return fill_missing_vstream_params_with_default(input_vstreams_params, input_vstream_infos.value(),
const std::string &network_name, std::map<std::string, hailo_vstream_params_t> &output_vstream_params,
bool quantized, hailo_format_type_t format_type, uint32_t timeout_ms, uint32_t queue_size)
{
- auto core_op_metadata = get_core_op_metadata(net_group_name);
- CHECK_EXPECTED_AS_STATUS(core_op_metadata);
- auto output_vstream_infos = core_op_metadata->get_output_vstream_infos(network_name);
+ CHECK(contains(m_network_group_metadata, net_group_name), HAILO_NOT_FOUND);
+ auto output_vstream_infos = m_network_group_metadata.at(net_group_name).get_output_vstream_infos(network_name);
CHECK_EXPECTED_AS_STATUS(output_vstream_infos);
return fill_missing_vstream_params_with_default(output_vstream_params, output_vstream_infos.value(),
CHECK_EXPECTED(core_op_metadata);
std::map<std::string, hailo_stream_parameters_t> results;
- for (auto &input_layer : core_op_metadata->get_input_layer_infos()) {
+ auto input_stream_infos = core_op_metadata.value()->get_input_stream_infos();
+ CHECK_EXPECTED(input_stream_infos);
+ for (auto &input_layer : input_stream_infos.value()) {
auto params = HailoRTDefaults::get_stream_parameters(stream_interface, HAILO_H2D_STREAM);
CHECK_EXPECTED(params);
results.emplace(std::make_pair(input_layer.name, params.release()));
}
- for (auto &output_layer : core_op_metadata->get_output_layer_infos()) {
+ auto output_stream_infos = core_op_metadata.value()->get_output_stream_infos();
+ CHECK_EXPECTED(output_stream_infos);
+ for (auto &output_layer : output_stream_infos.value()) {
auto params = HailoRTDefaults::get_stream_parameters(stream_interface, HAILO_D2H_STREAM);
CHECK_EXPECTED(params);
results.emplace(std::make_pair(output_layer.name, params.release()));
std::map<std::string, hailo_network_parameters_t> results;
- if (core_op_metadata->supported_features().multi_network_support) {
+ if (core_op_metadata.value()->supported_features().multi_network_support) {
CHECK_AS_EXPECTED((core_op.value()->networks_names.size() != 0), HAILO_INTERNAL_FAILURE,
"Hef support multiple networks, but no networks found in the proto");
for (const auto &partial_network_name : core_op.value()->networks_names) {
CHECK_EXPECTED(core_op_metadata);
std::map<std::string, hailo_stream_parameters_t> results;
- for (auto &input_layer : core_op_metadata->get_input_layer_infos()) {
+ auto input_stream_infos = core_op_metadata.value()->get_input_stream_infos();
+ CHECK_EXPECTED(input_stream_infos);
+ for (auto &input_layer : input_stream_infos.value()) {
hailo_stream_parameters_t params = {};
params.direction = HAILO_H2D_STREAM;
params.stream_interface = HAILO_STREAM_INTERFACE_MIPI;
params.mipi_input_params = mipi_params;
results.emplace(std::make_pair(input_layer.name, params));
}
- for (auto &output_layer : core_op_metadata->get_output_layer_infos()) {
+ auto output_stream_infos = core_op_metadata.value()->get_output_stream_infos();
+ CHECK_EXPECTED(output_stream_infos);
+ for (auto &output_layer : output_stream_infos.value()) {
auto params = HailoRTDefaults::get_stream_parameters(output_interface, HAILO_D2H_STREAM);
CHECK_EXPECTED(params);
results.emplace(std::make_pair(output_layer.name, params.release()));
HEF__FORMAT__F8CR,
} HEF__net_io_formatter_type_t;
+typedef enum {
+ HAILO_NET_FLOW_OP_TYPE_NMS = 0,
+ HAILO_NET_FLOW_OP_TYPE_ARGMAX = 1,
+ HAILO_NET_FLOW_OP_TYPE_SOFTMAX = 2,
+
+ /** Max enum value to maintain ABI Integrity */
+ HAILO_NET_FLOW_OP_TYPE_MAX_ENUM = HAILO_MAX_ENUM
+} hailo_net_flow_op_type_t;
+
struct NetFlowElement
{
std::string name;
std::shared_ptr<net_flow::Op> op;
std::set<std::string> input_streams;
hailo_nms_info_t nms_info;
+ hailo_net_flow_op_type_t op_type;
+ hailo_vstream_info_t output_vstream_info; // Should be vector?
};
const static uint32_t SUPPORTED_EXTENSIONS_BITSET_SIZE = 1000;
OFFLOAD_ARGMAX,
KO_RUN_ASAP,
HAILO_NET_FLOW,
- HAILO_NET_FLOW_YOLO_NMS // Extention added in platform 4.12 release
+ HAILO_NET_FLOW_YOLO_NMS, // Extention added in platform 4.12 release
+ HAILO_NET_FLOW_SSD_NMS, // Extention added in platform 4.14 release
+ WRITE_DATA_BY_TYPE, // Extention added in platform 4.14 release
+ NMS_OUTPUT_BURST, // Extention added in platform 4.14 release
+ DUAL_DIRECTION_STREAM_INDEX, // Extention added in platform 4.14 release
+ HAILO_NET_FLOW_ARGMAX, // Extention added in platform 4.14 release
+ HAILO_NET_FLOW_SOFTMAX, // Extention added in platform 4.14 release
+ ALIGNED_FORMAT_TYPE, // Extention added in platform 4.14 release
+ HAILO_NET_FLOW_YOLOX_NMS, // Extention added in platform 4.14 release
+ OUTPUT_SCALE_PER_FEATURE, // Extension added in platform 4.14 release
+ PERIPH_CALCULATION_IN_HAILORT, // Extension added in platform 4.14 release
};
static inline bool is_h2d_boundary_info_layer(const ProtoHEFEdgeLayer& layer)
const std::vector<ProtoHEFNetworkGroupPtr>& network_groups() const;
const std::vector<ProtoHEFCoreOpMock>& core_ops(const std::string &net_group_name) const;
- const std::vector<std::shared_ptr<NetFlowElement>> post_process_ops(const std::string &net_group_name) const;
+ const NetworkGroupMetadata network_group_metadata(const std::string &net_group_name) const;
Expected<std::pair<std::string, std::string>> get_network_group_and_network_name(const std::string &name);
// Also adds information to CoreOpMetadata
// TODO: When supporting multiple core ops in same netflow - Change metadata param to a map of core_ops_metadata.
Expected<std::vector<std::shared_ptr<NetFlowElement>>> create_net_flow_ops(const ProtoHEFNetworkGroup &network_group_proto,
- CoreOpMetadata &core_op_metadata) const;
+ CoreOpMetadata &core_op_metadata, const ProtoHEFHwArch &hef_arch) const;
// TODO: Should return map of NG's core_ops metadata?
- Expected<CoreOpMetadata> get_core_op_metadata(const std::string &network_group_name, uint32_t partial_clusters_layout_bitmap = PARTIAL_CLUSTERS_LAYOUT_IGNORE);
+ Expected<CoreOpMetadataPtr> get_core_op_metadata(const std::string &network_group_name, uint32_t partial_clusters_layout_bitmap = PARTIAL_CLUSTERS_LAYOUT_IGNORE);
- Expected<std::string> get_hef_description(bool stream_infos, bool vstream_infos, hailo_device_architecture_t device_arch);
+ Expected<std::string> get_description(bool stream_infos, bool vstream_infos, hailo_device_architecture_t device_arch);
const MD5_SUM_t &md5() const
{
static Expected<std::string> get_vstream_name_from_original_name_mux(const std::string &original_name, const ProtoHefEdge &layer);
static Expected<std::vector<std::string>> get_original_names_from_vstream_name_mux(const std::string &vstream_name, const ProtoHefEdge &layer);
- Expected<CoreOpMetadata> create_metadata_per_arch(const ProtoHEFCoreOpMock &core_op);
+ Expected<CoreOpMetadataPtr> create_metadata_per_arch(const ProtoHEFCoreOpMock &core_op, const std::vector<std::string> &sorted_network_names); // TODO: Remove sorted_network_names
Expected<std::vector<std::string>> get_stream_infos_description(const std::string &network_group_name, const std::string &network_name);
Expected<std::vector<std::string>> get_vstream_infos_description(const std::string &network_group_name, const std::string &network_name);
Expected<std::vector<std::string>> get_post_processes_infos_description(const std::string &network_group_name);
Buffer m_hef_buffer;
#endif // HAILO_SUPPORT_MULTI_PROCESS
- // CoreOps information - TODO: Should be a map of map, mapping network_groups to it's core ops (second map is mapping core op name to its metadata).
- std::map<std::string, CoreOpMetadataPerArch> m_core_op_per_arch;
+ std::map<std::string, NetworkGroupMetadata> m_network_group_metadata; // Key is NG name
};
// TODO: Make this part of a namespace? (HRT-2881)
static Expected<CONTROL_PROTOCOL__nn_stream_config_t> parse_nn_stream_config(const LayerInfo &edge_layer,
bool hw_padding_supported);
- static bool is_hw_padding_supported(const ProtoHEFEdgeLayer &edge_layer);
- static bool is_hw_padding_supported(const LayerInfo &layer_info);
+ static Expected<uint32_t> max_periph_bytes_value(const hailo_device_architecture_t hw_arch);
+ static Expected<uint32_t> max_periph_bytes_value(const hailo_stream_interface_t interface);
+
+ static bool is_hw_padding_supported(const ProtoHEFEdgeLayer &edge_layer, const uint32_t max_periph_bytes_value);
+ static bool is_hw_padding_supported(const LayerInfo &layer_info, const uint32_t max_periph_bytes_value);
private:
static Expected<CONTROL_PROTOCOL__nn_stream_config_t> parse_nn_stream_config(hailo_format_order_t format_order,
uint32_t width, uint32_t features, uint32_t hw_data_bytes, uint16_t core_buffers_per_frame,
- uint16_t core_bytes_per_buffer, bool hw_padding_supported, bool is_ddr);
+ uint16_t core_bytes_per_buffer, bool hw_padding_supported, bool is_ddr, uint16_t periph_buffers_per_frame,
+ uint16_t periph_bytes_per_buffer);
static bool is_hw_padding_supported(bool is_boundary, bool is_mux, hailo_format_order_t format_order,
- uint16_t core_buffers_per_frame, uint32_t height, uint32_t width, uint32_t features, uint32_t hw_data_bytes);
+ uint16_t core_buffers_per_frame, uint32_t height, uint32_t width, uint32_t features, uint32_t hw_data_bytes,
+ const uint32_t max_periph_bytes_value);
};
class HefUtils final
const uint8_t context_index,
const ProtoHEFEdgeLayer &layer,
const SupportedFeatures &supported_features,
- ContextMetadata &context_metadata);
+ ContextMetadata &context_metadata,
+ const ProtoHEFHwArch &hef_arch);
static Expected<LayerInfo> get_inter_context_layer_info(
const ProtoHEFCoreOpMock &core_op, const uint8_t context_index,
- const ProtoHEFEdgeLayer &layer, const SupportedFeatures &supported_features);
+ const ProtoHEFEdgeLayer &layer, const SupportedFeatures &supported_features, const ProtoHEFHwArch &hef_arch);
static hailo_status fill_inter_context_layers_info(
const ProtoHEFCoreOpMock &core_op,
const uint8_t context_index,
const ProtoHEFEdgeLayer &layer,
const SupportedFeatures &supported_features,
- ContextMetadata &context_metadata);
+ ContextMetadata &context_metadata, const ProtoHEFHwArch &hef_arch);
static Expected<LayerInfo> get_ddr_layer_info(
const ProtoHEFCoreOpMock &core_op, const uint8_t context_index,
- const ProtoHEFEdgeLayer &layer, const SupportedFeatures &supported_features);
+ const ProtoHEFEdgeLayer &layer, const SupportedFeatures &supported_features, const ProtoHEFHwArch &hef_arch);
static hailo_status fill_ddr_layers_info(
const ProtoHEFCoreOpMock &core_op,
const uint8_t context_index,
const ProtoHEFEdgeLayer &layer,
const SupportedFeatures &supported_features,
- ContextMetadata &context_metadata);
+ ContextMetadata &context_metadata, const ProtoHEFHwArch &hef_arch);
static hailo_status check_ddr_pairs_match(
const std::vector<LayerInfo> &context_ddr_input_layers,
const std::vector<LayerInfo> &context_ddr_output_layers,
static Expected<ContextMetadata> parse_preliminary_context(const ProtoHEFPreliminaryConfig &preliminary_proto,
const SupportedFeatures &supported_features);
static Expected<ContextMetadata> parse_single_dynamic_context(const ProtoHEFCoreOpMock &core_op,
- const ProtoHEFContext &context_proto, uint8_t context_index, const SupportedFeatures &supported_features);
+ const ProtoHEFContext &context_proto, uint8_t context_index, const SupportedFeatures &supported_features,
+ const ProtoHEFHwArch &hef_arch);
static Expected<std::vector<ContextMetadata>> parse_dynamic_contexts(const ProtoHEFCoreOpMock &core_op,
- const SupportedFeatures &supported_features);
- static Expected<hailo_nms_info_t> parse_proto_nms_info(const ProtoHEFNmsInfo &proto_nms_info);
+ const SupportedFeatures &supported_features, const ProtoHEFHwArch &hef_arch);
+ static Expected<hailo_nms_info_t> parse_proto_nms_info(const ProtoHEFNmsInfo &proto_nms_info,
+ const bool burst_mode_enabled, const ProtoHEFHwArch &hef_arch);
static Expected<LayerInfo> get_boundary_layer_info(const ProtoHEFCoreOpMock &core_op,
- const uint8_t context_index, const ProtoHEFEdgeLayer &layer, const SupportedFeatures &supported_features);
- static Expected<std::vector<std::string>> get_sorted_output_names(const ProtoHEFCoreOpMock &core_op);
+ const uint8_t context_index, const ProtoHEFEdgeLayer &layer, const SupportedFeatures &supported_features,
+ const ProtoHEFHwArch &hef_arch);
static Expected<std::string> get_partial_network_name_by_index(const ProtoHEFCoreOpMock &core_op, uint8_t network_index, const SupportedFeatures &supported_features);
- static Expected<std::vector<hailo_network_info_t>> get_network_infos(const ProtoHEFNetworkGroup &net_group,
- const std::string &net_group_name, const SupportedFeatures &supported_features);
-
static std::string get_network_group_name(const ProtoHEFNetworkGroup &net_group, const SupportedFeatures &supported_features);
static std::string get_network_name(const ProtoHEFCoreOpMock &core_op, const std::string &partial_network_name);
static std::string get_network_name(const std::string &net_group_name, const std::string &partial_network_name);
static hailo_status fill_layer_info_with_base_info(const ProtoHEFEdgeLayerBase &base_info,
const ProtoHEFEdgeConnectionType &edge_connection_type,
const ProtoHEFNetworkGroupMetadata &network_group_proto, bool hw_padding_supported, bool transposed,
- const uint8_t context_index, const uint8_t network_index, LayerInfo &layer_info);
+ const uint8_t context_index, const uint8_t network_index, LayerInfo &layer_info,
+ const SupportedFeatures &supported_features, const ProtoHEFHwArch &hef_arch);
static hailo_status fill_layer_info(const ProtoHEFEdgeLayerInfo &info,
const ProtoHEFEdgeConnectionType &edge_connection_type,
const ProtoHEFCoreOpMock &core_op, hailo_stream_direction_t direction,
bool hw_padding_supported, const uint8_t context_index, const std::string &partial_network_name,
- uint8_t network_index, LayerInfo &layer_info);
+ uint8_t network_index, LayerInfo &layer_info, const SupportedFeatures &supported_features,
+ const ProtoHEFHwArch &hef_arch);
static hailo_status fill_fused_nms_info(const ProtoHEFEdgeLayerFused &info,
- LayerInfo &layer_info, hailo_quant_info_t &defuse_quant_info, const std::string &network_name);
+ LayerInfo &layer_info, hailo_quant_info_t &defuse_quant_info, const std::string &network_name,
+ const bool burst_mode_enabled, const ProtoHEFHwArch &hef_arch);
static hailo_status fill_mux_info(const ProtoHEFEdgeLayerMux &info,
const ProtoHEFEdgeConnectionType &edge_connection_type,
const ProtoHEFCoreOpMock &core_op, hailo_stream_direction_t direction,
bool hw_padding_supported, const uint8_t context_index, const std::string &partial_network_name,
- uint8_t network_index, LayerInfo &layer_info);
+ uint8_t network_index, LayerInfo &layer_info, const SupportedFeatures &supported_features,
+ const ProtoHEFHwArch &hef_arch);
};
} /* namespace hailort */
{
#define INVALID_PAD_INDEX (UINT32_MAX)
+#define PERIPH_BYTES_PER_BUFFER_ALIGNMENT_SIZE (8)
+#define PERIPH_BYTES_PER_BUFFER_DDR_ALIGNMENT_SIZE (512)
enum class LayerType
{
uint16_t min_buffered_rows;
};
-
struct LayerInfo {
LayerType type = LayerType::NOT_SET;
hailo_stream_direction_t direction;
hailo_3d_image_shape_t hw_shape;
uint32_t hw_data_bytes;
hailo_format_t format;
- hailo_quant_info_t quant_info;
+ hailo_quant_info_t quant_info; // TODO: Remove, use vector
+ std::vector<hailo_quant_info_t> quant_infos;
hailo_nms_info_t nms_info;
// Mux info
DdrInfo ddr_info;
};
-// LayerIdentifier = <LayerType, layer_name, stream_index>
-using LayerIdentifier = std::tuple<LayerType, std::string, uint8_t>;
+// LayerIdentifier = <LayerType, hailo_stream_direction_t, layer_name, stream_index>
+using LayerIdentifier = std::tuple<LayerType, hailo_stream_direction_t, std::string, uint8_t>;
inline LayerIdentifier to_layer_identifier(const LayerInfo &info)
{
- return std::make_tuple(info.type, info.name, info.stream_index);
+ return std::make_tuple(info.type, info.direction, info.name, info.stream_index);
}
class LayerInfoUtils {
static Expected<size_t> get_transfer_size(const LayerInfo &layer_info) {
switch (layer_info.type) {
case LayerType::BOUNDARY:
+ if (is_nms_burst_layer(layer_info)) {
+ return get_nms_layer_transfer_size(layer_info);
+ }
+ return layer_info.nn_stream_config.periph_bytes_per_buffer * layer_info.nn_stream_config.periph_buffers_per_frame;
case LayerType::INTER_CONTEXT:
return layer_info.nn_stream_config.periph_bytes_per_buffer * layer_info.nn_stream_config.periph_buffers_per_frame;
case LayerType::DDR:
}
}
+ /**
+ * Validate nms burst type vs device architecture
+ *
+ * @param[in] burst_type A hailo_nms_burst_type_t burst_type.
+ * @param[in] arch A ::hailo_device_architecture_t architecture.
+ * @return true if the burst type matches the device architecture, otherwise false.
+ */
+ static bool validate_nms_burst_type(const hailo_nms_burst_type_t burst_type, const hailo_device_architecture_t arch)
+ {
+ switch (arch)
+ {
+ case HAILO_ARCH_HAILO8_A0:
+ case HAILO_ARCH_HAILO8:
+ case HAILO_ARCH_HAILO8L:
+ return (HAILO_BURST_TYPE_H8_PER_CLASS == burst_type);
+ case HAILO_ARCH_HAILO15:
+ return ((HAILO_BURST_TYPE_H15_PER_CLASS == burst_type) || (HAILO_BURST_TYPE_H15_PER_FRAME == burst_type));
+ default:
+ return false;
+ }
+ }
+
+ /**
+ * Gets stream's transfer size in bytes by stream info and layer info params.
+ *
+ * @param[in] stream_info A ::hailo_stream_info_t object.
+ * @param[in] layer_info A ::LayerInfo object.
+ * @return The streams's transfer size in bytes.
+ */
+ static constexpr uint32_t get_stream_transfer_size(const hailo_stream_info_t &stream_info, const LayerInfo &layer_info)
+ {
+ if (HAILO_FORMAT_ORDER_HAILO_NMS == layer_info.format.order) {
+ return get_nms_layer_transfer_size(layer_info);
+ }
+ return stream_info.hw_frame_size;
+ }
+
+ /**
+ * Get NMS layers's transfer size in bytes by NMS.
+ *
+ * @param[in] layer_info A ::LayerInfo object.
+ * @return The layer's transfer size in bytes.
+ */
+ static constexpr uint32_t get_nms_layer_transfer_size(const LayerInfo &layer_info)
+ {
+ switch (layer_info.nms_info.burst_type) {
+ // If No Burst mode - size of transfer is size of bbox
+ case HAILO_BURST_TYPE_NO_BURST:
+ return layer_info.nms_info.bbox_size;
+ // In hailo8 per class and hailo15 per class mode - check if can support interrupt per frame and if not do interrupt per burst
+ case HAILO_BURST_TYPE_H8_PER_CLASS:
+ case HAILO_BURST_TYPE_H15_PER_CLASS:
+ {
+ // In case of hailo8 - nn-core adds one delimeter per burst - in case of hailo15 nn-core adds delimeter and image delimeter per class
+ const size_t bboxes_needed_for_delimeter = (HAILO_BURST_TYPE_H8_PER_CLASS == layer_info.nms_info.burst_type) ?
+ 1 : 2;
+ // If burst size is bigger than max bboxes per class + bboxes_needed_for_delimeter - we can enable 1 interrupt per frame
+ // Becasue we know output size will be burst size * num classes
+ if (layer_info.nms_info.burst_size >= (layer_info.nms_info.max_bboxes_per_class + bboxes_needed_for_delimeter)) {
+ return layer_info.nms_info.burst_size * layer_info.nms_info.bbox_size * layer_info.nms_info.number_of_classes;
+ } else {
+ // support regular interrupt per burst
+ return layer_info.nms_info.burst_size * layer_info.nms_info.bbox_size;
+ }
+ }
+ // Currently HAILO_BURST_TYPE_H15_PER_FRAME mode isnt supported - Shouldn't reach here
+ case HAILO_BURST_TYPE_H15_PER_FRAME:
+ default:
+ assert(false);
+ return 0;
+ }
+ }
+
+ /**
+ * Return if layer is NMS Burst layers.
+ *
+ * @param[in] layer_info A ::LayerInfo object.
+ * @return True if the layer is NMS layer with burst mode - false otherwise.
+ */
+ static constexpr uint32_t is_nms_burst_layer(const LayerInfo &layer_info)
+ {
+ return (1 < layer_info.nms_info.burst_size);
+ }
+
+ /**
+ * Get layers's transfer size.
+ *
+ * @param[in] layer_info A ::LayerInfo object.
+ * @return The layer's transfer size in bytes.
+ */
+ static constexpr uint32_t get_layer_transfer_size(const LayerInfo &layer_info)
+ {
+ if (HAILO_FORMAT_ORDER_HAILO_NMS == layer_info.format.order) {
+ return get_nms_layer_transfer_size(layer_info);
+ }
+ return (layer_info.hw_shape.width * layer_info.hw_shape.features * layer_info.hw_shape.height * layer_info.hw_data_bytes);
+ }
+
private:
static hailo_vstream_info_t get_vstream_info_from_layer_info_impl(const LayerInfo &layer_info)
{
/** Package constants *********************************************************/
#define HAILO8_INBOUND_DATA_STREAM_SIZE (0x00010000L)
+// Max periph bytes per buffer for hailo15 because (we use its value shifted right by 3 - according to the spec) to
+// configure shmifo credit size - which in hailo15 only has a width of 10 bits
+#define HAILO15_PERIPH_BYTES_PER_BUFFER_MAX_SIZE (0x00002000L)
/** PCIe constants and macors ************************************************/
#define PCIE_CONFIG_BASE_ADDRESS (0x00200000L) // <hw_base_addresses_macros.h>::HW_BASE_ADDRESSES__PCIE_CONFIG(0, 0, 0)
return HAILO_SUCCESS;
}
-Expected<size_t> MipiInputStream::sync_write_raw_buffer(const MemoryView &buffer)
+hailo_status MipiInputStream::write_impl(const MemoryView &buffer)
{
(void)buffer;
- return make_unexpected(HAILO_INVALID_OPERATION);
-}
-
-hailo_status MipiInputStream::sync_write_all_raw_buffer_no_transform_impl(void *buffer, size_t offset, size_t size)
-{
- (void)buffer;
- (void)offset;
- (void)size;
return HAILO_INVALID_OPERATION;
}
CONTROL_PROTOCOL__mipi_input_config_params_t m_mipi_input_params;
protected:
- virtual Expected<size_t> sync_write_raw_buffer(const MemoryView &buffer) override;
- virtual hailo_status sync_write_all_raw_buffer_no_transform_impl(void *buffer, size_t offset, size_t size) override;
+ virtual hailo_status write_impl(const MemoryView &buffer) override;
virtual hailo_status set_timeout(std::chrono::milliseconds timeout) { (void)timeout; return HAILO_INVALID_OPERATION; };
public:
virtual std::chrono::milliseconds get_timeout() const override;
virtual hailo_status abort() override;
virtual hailo_status clear_abort() override;
-
};
} /* namespace hailort */
cmake_minimum_required(VERSION 3.0.0)
-set(HAILORT_OPS_CPP_SOURCES
+set(SRC_FILES
${CMAKE_CURRENT_SOURCE_DIR}/ops/nms_post_process.cpp
${CMAKE_CURRENT_SOURCE_DIR}/ops/yolo_post_process.cpp
+ ${CMAKE_CURRENT_SOURCE_DIR}/ops/yolox_post_process.cpp
${CMAKE_CURRENT_SOURCE_DIR}/ops/ssd_post_process.cpp
-)
+ ${CMAKE_CURRENT_SOURCE_DIR}/ops/argmax_post_process.cpp
+ ${CMAKE_CURRENT_SOURCE_DIR}/ops/softmax_post_process.cpp
-set(SRC_FILES
${CMAKE_CURRENT_SOURCE_DIR}/pipeline/pipeline.cpp
${CMAKE_CURRENT_SOURCE_DIR}/pipeline/inference_pipeline.cpp
${CMAKE_CURRENT_SOURCE_DIR}/pipeline/vstream.cpp
)
-set(HAILORT_CPP_SOURCES ${HAILORT_CPP_SOURCES} ${SRC_FILES} ${HAILORT_OPS_CPP_SOURCES} PARENT_SCOPE)
-set(HAILORT_OPS_CPP_SOURCES ${HAILORT_OPS_CPP_SOURCES} PARENT_SCOPE)
+set(HAILORT_CPP_SOURCES ${HAILORT_CPP_SOURCES} ${SRC_FILES} PARENT_SCOPE)
--- /dev/null
+/**
+ * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+**/
+/**
+ * @file argmax_post_process.cpp
+ * @brief: Argsmax op
+ **/
+
+#include "argmax_post_process.hpp"
+#include "hailo/hailort.h"
+#include "hailo/hailort_common.hpp"
+#include "common/utils.hpp"
+
+#include <limits>
+
+
+namespace hailort
+{
+namespace net_flow
+{
+
+// Source https://stackoverflow.com/questions/3793838/which-is-the-first-integer-that-an-ieee-754-float-is-incapable-of-representing-e
+#define FLOAT_LAST_CONSECUTIVE_REPRESENTABLE_INT (1 << std::numeric_limits<float32_t>::digits)
+
+hailo_status ArgmaxPostProcessOp::execute_not_supported(const BufferMetaData &input_metadata, const BufferMetaData &output_metadata,
+ const std::map<std::string, MemoryView> &inputs, std::map<std::string, MemoryView> &outputs)
+ {
+ (void)inputs;
+ (void)outputs;
+ LOGGER__ERROR("Argmax post-process not supported with params: input_order {}, input_type {}, output_type {}",
+ HailoRTCommon::get_format_order_str(input_metadata.format.order),
+ HailoRTCommon::get_format_type_str(input_metadata.format.type),
+ HailoRTCommon::get_format_type_str(output_metadata.format.type));
+ return HAILO_INVALID_ARGUMENT;
+ }
+
+ArgmaxFunction ArgmaxPostProcessOp::m_argmax_function_array[ARGMAX_NUM_OF_POSSIBLE_FORMAT_ORDERS][ARGMAX_NUM_OF_POSSIBLE_FORMAT_TYPES][ARGMAX_NUM_OF_POSSIBLE_FORMAT_TYPES]
+{
+ {
+ {
+ // NHCW x AUTO
+ // We don't support input_format_type to be auto
+ ArgmaxPostProcessOp::execute_not_supported,
+ ArgmaxPostProcessOp::execute_not_supported,
+ ArgmaxPostProcessOp::execute_not_supported,
+ ArgmaxPostProcessOp::execute_not_supported
+ },
+ {
+ // NHCW x UINT8
+ ArgmaxPostProcessOp::execute_not_supported, // We don't support output_format_type to be auto
+ ArgmaxPostProcessOp::NHCW_to_NHW_feature_axis<uint8_t, uint8_t>,
+ ArgmaxPostProcessOp::NHCW_to_NHW_feature_axis<uint8_t, uint16_t>,
+ ArgmaxPostProcessOp::NHCW_to_NHW_feature_axis<uint8_t, float32_t>
+ },
+ {
+ // NHCW x UINT16
+ ArgmaxPostProcessOp::execute_not_supported, // We don't support output_format_type to be auto
+ ArgmaxPostProcessOp::NHCW_to_NHW_feature_axis<uint16_t, uint8_t>,
+ ArgmaxPostProcessOp::NHCW_to_NHW_feature_axis<uint16_t, uint16_t>,
+ ArgmaxPostProcessOp::NHCW_to_NHW_feature_axis<uint16_t, float32_t>
+ },
+ {
+ // NHCW x FLOAT32
+ // We don't support input_format_type to be float32
+ ArgmaxPostProcessOp::execute_not_supported,
+ ArgmaxPostProcessOp::execute_not_supported,
+ ArgmaxPostProcessOp::execute_not_supported,
+ ArgmaxPostProcessOp::execute_not_supported
+ }
+ },
+ {
+ {
+ // NHWC x AUTO
+ // We don't support input_format_type to be auto
+ ArgmaxPostProcessOp::execute_not_supported,
+ ArgmaxPostProcessOp::execute_not_supported,
+ ArgmaxPostProcessOp::execute_not_supported,
+ ArgmaxPostProcessOp::execute_not_supported
+ },
+ {
+ // NHWC x UINT8
+ ArgmaxPostProcessOp::execute_not_supported, // We don't support output_format_type to be auto
+ ArgmaxPostProcessOp::NHWC_to_NHW_feature_axis<uint8_t, uint8_t>,
+ ArgmaxPostProcessOp::NHWC_to_NHW_feature_axis<uint8_t, uint16_t>,
+ ArgmaxPostProcessOp::NHWC_to_NHW_feature_axis<uint8_t, float32_t>
+ },
+ {
+ // NHWC x UINT16
+ ArgmaxPostProcessOp::execute_not_supported, // We don't support output_format_type to be auto
+ ArgmaxPostProcessOp::NHWC_to_NHW_feature_axis<uint16_t, uint8_t>,
+ ArgmaxPostProcessOp::NHWC_to_NHW_feature_axis<uint16_t, uint16_t>,
+ ArgmaxPostProcessOp::NHWC_to_NHW_feature_axis<uint16_t, float32_t>,
+ },
+ {
+ // NHWC x FLOAT32
+ ArgmaxPostProcessOp::execute_not_supported,
+ ArgmaxPostProcessOp::execute_not_supported,
+ ArgmaxPostProcessOp::execute_not_supported,
+ ArgmaxPostProcessOp::execute_not_supported
+ }
+ },
+ {
+ {
+ // NC x AUTO
+ // We don't support input_format_type to be auto
+ ArgmaxPostProcessOp::execute_not_supported,
+ ArgmaxPostProcessOp::execute_not_supported,
+ ArgmaxPostProcessOp::execute_not_supported,
+ ArgmaxPostProcessOp::execute_not_supported
+ },
+ {
+ // NC x UINT8
+ ArgmaxPostProcessOp::execute_not_supported, // We don't support output_format_type to be auto
+ ArgmaxPostProcessOp::NC_to_N<uint8_t, uint8_t>,
+ ArgmaxPostProcessOp::NC_to_N<uint8_t, uint16_t>,
+ ArgmaxPostProcessOp::NC_to_N<uint8_t, float32_t>,
+ },
+ {
+ // NC x UINT16
+ ArgmaxPostProcessOp::execute_not_supported, // We don't support output_format_type to be auto
+ ArgmaxPostProcessOp::NC_to_N<uint16_t, uint8_t>,
+ ArgmaxPostProcessOp::NC_to_N<uint16_t, uint16_t>,
+ ArgmaxPostProcessOp::NC_to_N<uint16_t, float32_t>,
+ },
+ {
+ // NC x FLOAT32
+ // We don't support input_format_type to be float32
+ ArgmaxPostProcessOp::execute_not_supported,
+ ArgmaxPostProcessOp::execute_not_supported,
+ ArgmaxPostProcessOp::execute_not_supported,
+ ArgmaxPostProcessOp::execute_not_supported
+ }
+ }
+};
+
+hailo_status ArgmaxPostProcessOp::execute(const std::map<std::string, MemoryView> &inputs,
+ std::map<std::string, MemoryView> &outputs)
+{
+ auto &input_name = inputs.begin()->first;
+ auto &output_name = outputs.begin()->first;
+ auto &input_metadata = m_inputs_metadata[input_name];
+ auto &output_metadata = m_outputs_metadata[output_name];
+
+ uint8_t format_index = UINT8_MAX;
+ switch (input_metadata.format.order) {
+ case HAILO_FORMAT_ORDER_NHCW:
+ format_index = 0;
+ break;
+ case HAILO_FORMAT_ORDER_NHWC:
+ format_index = 1;
+ break;
+ case HAILO_FORMAT_ORDER_NC:
+ format_index = 2;
+ break;
+ default:
+ LOGGER__ERROR("Argmax post-process received invalid input order {}",
+ HailoRTCommon::get_format_order_str(input_metadata.format.order));
+ return HAILO_INVALID_ARGUMENT;
+ }
+ return ArgmaxPostProcessOp::m_argmax_function_array[format_index][input_metadata.format.type][output_metadata.format.type](input_metadata, output_metadata, inputs, outputs);
+}
+
+std::string ArgmaxPostProcessOp::get_op_description()
+{
+ auto config_info = fmt::format("ArgmaxPostProcess Op, Name: {}", m_name);
+ return config_info;
+}
+
+hailo_status ArgmaxPostProcessOp::validate_metadata()
+{
+ assert(m_inputs_metadata.size() == hailort::net_flow::ARGMAX_NUMBER_OF_SRCS);
+ assert(m_outputs_metadata.size() == hailort::net_flow::ARGMAX_NUMBER_OF_DSTS);
+
+ auto &input_metadata = m_inputs_metadata.begin()->second;
+ auto &output_metadata = m_outputs_metadata.begin()->second;
+
+ CHECK((
+ ((output_metadata.format.type == HAILO_FORMAT_TYPE_UINT8) && (input_metadata.shape.features <= std::numeric_limits<uint8_t>::max())) ||
+ ((output_metadata.format.type == HAILO_FORMAT_TYPE_UINT16) && (input_metadata.shape.features <= std::numeric_limits<uint16_t>::max())) ||
+ ((output_metadata.format.type == HAILO_FORMAT_TYPE_FLOAT32) && (input_metadata.shape.features <= FLOAT_LAST_CONSECUTIVE_REPRESENTABLE_INT))),
+ HAILO_INVALID_OPERATION, "Dst format type {} can't represent possible range {} for Argmax op",
+ HailoRTCommon::get_format_type_str(output_metadata.format.type), input_metadata.shape.features);
+ CHECK(
+ ((input_metadata.format.order == HAILO_FORMAT_ORDER_NHCW) && (output_metadata.format.order == HAILO_FORMAT_ORDER_NHW)) ||
+ ((input_metadata.format.order == HAILO_FORMAT_ORDER_NHWC) && (output_metadata.format.order == HAILO_FORMAT_ORDER_NHW)) ||
+ ((input_metadata.format.order == HAILO_FORMAT_ORDER_NC) && (output_metadata.format.order == HAILO_FORMAT_ORDER_NC)),
+ HAILO_INVALID_OPERATION, "Argmax op is not supported for src format order ({}) and dst format order ({})",
+ HailoRTCommon::get_format_order_str(input_metadata.format.order),
+ HailoRTCommon::get_format_order_str(output_metadata.format.order));
+
+ CHECK(output_metadata.shape.features == hailort::net_flow::ARGMAX_OUTPUT_FEATURES_SIZE, HAILO_INVALID_OPERATION,
+ "Dst features ({}) must be 1 on Argmax op", output_metadata.shape.features);
+ CHECK(input_metadata.shape.height == output_metadata.shape.height, HAILO_INVALID_OPERATION,
+ "Argmax op is supported only when src height ({}) is equal to dst height ({})",
+ input_metadata.shape.height, output_metadata.shape.height);
+ CHECK(input_metadata.shape.width == output_metadata.shape.width, HAILO_INVALID_OPERATION,
+ "Argmax op is supported only when src width ({}) is equal to dst width ({})",
+ input_metadata.shape.width, output_metadata.shape.width);
+ CHECK((
+ (input_metadata.format.type == HAILO_FORMAT_TYPE_UINT8) || (input_metadata.format.type == HAILO_FORMAT_TYPE_UINT16)),
+ HAILO_INVALID_OPERATION, "Src format type {} is not valid. Must be either {} or {}",
+ HailoRTCommon::get_format_type_str(input_metadata.format.type), HailoRTCommon::get_format_type_str(HAILO_FORMAT_TYPE_UINT8),
+ HailoRTCommon::get_format_type_str(HAILO_FORMAT_TYPE_UINT16));
+
+ return HAILO_SUCCESS;
+}
+
+Expected<std::shared_ptr<Op>> ArgmaxPostProcessOp::create(const std::map<std::string, BufferMetaData> &inputs_metadata,
+ std::map<std::string, BufferMetaData> &outputs_metadata)
+{
+ auto op = std::shared_ptr<ArgmaxPostProcessOp>(new (std::nothrow) ArgmaxPostProcessOp(inputs_metadata, outputs_metadata));
+ CHECK_AS_EXPECTED(op != nullptr, HAILO_OUT_OF_HOST_MEMORY);
+
+ return std::shared_ptr<Op>(std::move(op));
+}
+
+} /* namespace net_flow */
+} /* namespace hailort */
--- /dev/null
+/**
+ * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+**/
+/**
+ * @file argmax_post_process.hpp
+ * @brief: Argmax op perform argmax op as described: https://www.tensorflow.org/api_docs/python/tf/math/argmax
+ * A few notes:
+ * - Support only on features axis
+ * - Support only on NHWC, NHCW and NC input data order
+ * - In case of 2 maximal values - the lower index one will be given.
+ **/
+
+#ifndef _HAILO_ARGMAX_POST_PROCESS_HPP_
+#define _HAILO_ARGMAX_POST_PROCESS_HPP_
+
+
+#include "hailo/hailort.h"
+#include "net_flow/ops/op.hpp"
+#include "common/utils.hpp"
+
+#include <iostream>
+
+namespace hailort
+{
+namespace net_flow
+{
+
+#define ARGMAX_NUM_OF_POSSIBLE_FORMAT_ORDERS (3)
+#define ARGMAX_NUM_OF_POSSIBLE_FORMAT_TYPES (4)
+
+constexpr std::size_t ARGMAX_OUTPUT_FEATURES_SIZE {1};
+constexpr std::size_t ARGMAX_NUMBER_OF_SRCS {1};
+constexpr std::size_t ARGMAX_NUMBER_OF_DSTS {1};
+
+typedef hailo_status (*ArgmaxFunction)(const BufferMetaData &input_metadata, const BufferMetaData &output_metadata,
+ const std::map<std::string, MemoryView> &inputs, std::map<std::string, MemoryView> &outputs);
+
+class ArgmaxPostProcessOp : public Op
+{
+
+private:
+ ArgmaxPostProcessOp(const std::map<std::string, BufferMetaData> &inputs_metadata,
+ const std::map<std::string, BufferMetaData> &outputs_metadata)
+ : Op(inputs_metadata, outputs_metadata, "Argmax-Post-Process")
+ {}
+
+ template<typename DeviceType, typename HostType>
+ static hailo_status NHCW_to_NHW_feature_axis(const BufferMetaData &input_metadata, const BufferMetaData &output_metadata,
+ const std::map<std::string, MemoryView> &inputs, std::map<std::string, MemoryView> &outputs)
+ {
+ auto src_ptr = (DeviceType*)inputs.begin()->second.data();
+ auto dst_ptr = (HostType*)outputs.begin()->second.data();
+ const auto src_row_size = input_metadata.padded_shape.width * input_metadata.padded_shape.features;
+ const auto dst_row_size = output_metadata.shape.width;
+
+ for (uint32_t r = 0; r < input_metadata.shape.height; r++) {
+ const DeviceType *src_row = src_ptr + (r * src_row_size);
+ HostType *dst_row = dst_ptr + (r * dst_row_size);
+ for (uint32_t w = 0; w < input_metadata.shape.width; w++) {
+ const DeviceType *offset_in_row = src_row + w;
+ HostType max_index = 0;
+ auto max_value = *offset_in_row;
+ for (uint32_t c = 1; c < input_metadata.shape.features; c++) {
+ offset_in_row += input_metadata.padded_shape.width;
+ const auto ¤t_value = *offset_in_row;
+ if (current_value > max_value) {
+ max_index = static_cast<HostType>(c);
+ max_value = current_value;
+ }
+ }
+ dst_row[w] = max_index;
+ }
+ }
+ return HAILO_SUCCESS;
+ }
+
+ template<typename DeviceType, typename HostType>
+ static hailo_status NHWC_to_NHW_feature_axis(const BufferMetaData &input_metadata, const BufferMetaData &output_metadata,
+ const std::map<std::string, MemoryView> &inputs, std::map<std::string, MemoryView> &outputs)
+ {
+ auto src_ptr = (DeviceType*)inputs.begin()->second.data();
+ auto dst_ptr = (HostType*)outputs.begin()->second.data();
+ const auto src_row_size = input_metadata.padded_shape.width * input_metadata.padded_shape.features;
+ const auto dst_row_size = output_metadata.shape.width;
+
+ for (uint32_t r = 0; r < input_metadata.shape.height; r++) {
+ const DeviceType *src_row = src_ptr + (r * src_row_size);
+ HostType *dst_row = dst_ptr + (r * dst_row_size);
+ for (uint32_t w = 0; w < input_metadata.shape.width; w++) {
+ const DeviceType *offset_in_row = src_row + (w * input_metadata.padded_shape.features);
+ HostType max_index = 0;
+ auto max_value = *offset_in_row;
+ for (uint32_t c = 1; c < input_metadata.shape.features; c++) {
+ const auto ¤t_value = *(offset_in_row + c);
+ if (current_value > max_value) {
+ max_index = static_cast<HostType>(c);
+ max_value = current_value;
+ }
+ }
+ dst_row[w] = max_index;
+ }
+ }
+ return HAILO_SUCCESS;
+ }
+
+ template<typename DeviceType, typename HostType>
+ static hailo_status NC_to_N(const BufferMetaData &input_metadata, const BufferMetaData &output_metadata,
+ const std::map<std::string, MemoryView> &inputs, std::map<std::string, MemoryView> &outputs)
+ {
+ (void) output_metadata; // only reason to have output_metadata is so that the function array will work
+ auto src_ptr = (DeviceType*)inputs.begin()->second.data();
+ auto dst_ptr = (HostType*)outputs.begin()->second.data();
+ HostType max_index = 0;
+ DeviceType max_value = 0;
+
+ for (uint32_t c = 0; c < input_metadata.shape.features; c++) {
+ const auto ¤t_value = *(src_ptr + c);
+ if (current_value > max_value) {
+ max_index = static_cast<HostType>(c);
+ max_value = current_value;
+ }
+ }
+ *dst_ptr = max_index;
+ return HAILO_SUCCESS;
+ }
+
+ static hailo_status execute_not_supported(const BufferMetaData &input_metadata, const BufferMetaData &output_metadata,
+ const std::map<std::string, MemoryView> &inputs, std::map<std::string, MemoryView> &outputs);
+
+public:
+ static Expected<std::shared_ptr<Op>> create(const std::map<std::string, BufferMetaData> &inputs_metadata,
+ std::map<std::string, BufferMetaData> &outputs_metadata);
+ virtual hailo_status execute(const std::map<std::string, MemoryView> &inputs,
+ std::map<std::string, MemoryView> &outputs) override;
+ virtual std::string get_op_description() override;
+ hailo_status validate_metadata() override;
+
+ // A 3D array of argmax functions to call:
+ // 1st dim represent the data format order
+ // 2nd dim represent the input data type (only uint8 or uint16 are valid)
+ // 3rd dim represent the output data type
+ // Note: Assumption here the ordering of the enum hailo_format_type_t doesn't change
+ static ArgmaxFunction m_argmax_function_array[ARGMAX_NUM_OF_POSSIBLE_FORMAT_ORDERS][ARGMAX_NUM_OF_POSSIBLE_FORMAT_TYPES][ARGMAX_NUM_OF_POSSIBLE_FORMAT_TYPES];
+
+};
+
+} /* namespace net_flow */
+} /* namespace hailort */
+
+#endif /* _HAILO_ARGMAX_POST_PROCESS_HPP_ */
{
namespace net_flow
{
+
+ hailo_status NmsPostProcessOp::validate_metadata()
+ {
+ for (const auto& output_metadata : m_outputs_metadata) {
+ CHECK(HAILO_FORMAT_ORDER_HAILO_NMS == output_metadata.second.format.order, HAILO_INVALID_ARGUMENT, "The given output format order {} is not supported, "
+ "should be HAILO_FORMAT_ORDER_HAILO_NMS", HailoRTCommon::get_format_order_str(output_metadata.second.format.order));
+
+ CHECK(HAILO_FORMAT_TYPE_FLOAT32 == output_metadata.second.format.type, HAILO_INVALID_ARGUMENT, "The given output format type {} is not supported, "
+ "should be HAILO_FORMAT_TYPE_FLOAT32", HailoRTCommon::get_format_type_str(output_metadata.second.format.type));
+
+ CHECK(!(HAILO_FORMAT_FLAGS_TRANSPOSED & output_metadata.second.format.flags), HAILO_INVALID_ARGUMENT, "Output {} is marked as transposed, which is not supported for this model.",
+ output_metadata.first);
+ CHECK(!(HAILO_FORMAT_FLAGS_HOST_ARGMAX & output_metadata.second.format.flags), HAILO_INVALID_ARGUMENT, "Output {} is marked as argmax, which is not supported for this model.",
+ output_metadata.first);
+ CHECK(!(HAILO_FORMAT_FLAGS_QUANTIZED & output_metadata.second.format.flags), HAILO_INVALID_ARGUMENT, "Output {} is marked as quantized, which is not supported for this model.",
+ output_metadata.first);
+ }
+
+ assert(1 <= m_inputs_metadata.size());
+ const hailo_format_type_t& first_input_type = m_inputs_metadata.begin()->second.format.type;
+ for (const auto& input_metadata : m_inputs_metadata) {
+ CHECK(HAILO_FORMAT_ORDER_NHCW == input_metadata.second.format.order, HAILO_INVALID_ARGUMENT, "The given input format order {} is not supported, "
+ "should be HAILO_FORMAT_ORDER_NHCW", HailoRTCommon::get_format_order_str(input_metadata.second.format.order));
+
+ CHECK((HAILO_FORMAT_TYPE_UINT8 == input_metadata.second.format.type) ||
+ (HAILO_FORMAT_TYPE_UINT16 == input_metadata.second.format.type),
+ HAILO_INVALID_ARGUMENT, "The given input format type {} is not supported, should be HAILO_FORMAT_TYPE_UINT8 or HAILO_FORMAT_TYPE_UINT16",
+ HailoRTCommon::get_format_type_str(input_metadata.second.format.type));
+
+ CHECK(input_metadata.second.format.type == first_input_type, HAILO_INVALID_ARGUMENT,"All inputs format type should be the same");
+
+ CHECK(HAILO_FORMAT_FLAGS_QUANTIZED == input_metadata.second.format.flags, HAILO_INVALID_ARGUMENT, "The given input format flag is not supported,"
+ "should be HAILO_FORMAT_FLAGS_QUANTIZED");
+ }
+
+ return HAILO_SUCCESS;
+ }
+
float NmsPostProcessOp::compute_iou(const hailo_bbox_float32_t &box_1, const hailo_bbox_float32_t &box_2)
{
const float overlap_area_width = std::min(box_1.x_max, box_2.x_max) - std::max(box_1.x_min, box_2.x_min);
std::vector<uint32_t> &classes_detections_count)
{
// Calculate the number of detections before each class, to help us later calculate the buffer_offset for it's detections.
- std::vector<uint32_t> num_of_detections_before;
- num_of_detections_before.reserve(m_nms_config.classes);
+ std::vector<uint32_t> num_of_detections_before(m_nms_config.number_of_classes, 0);
uint32_t ignored_detections_count = 0;
- for (size_t class_idx = 0; class_idx < m_nms_config.classes; class_idx++) {
+ for (size_t class_idx = 0; class_idx < m_nms_config.number_of_classes; class_idx++) {
if (classes_detections_count[class_idx] > m_nms_config.max_proposals_per_class) {
ignored_detections_count += (classes_detections_count[class_idx] - m_nms_config.max_proposals_per_class);
classes_detections_count[class_idx] = m_nms_config.max_proposals_per_class;
std::string NmsPostProcessOp::get_nms_config_description()
{
auto config_info = fmt::format("Score threshold: {:.3f}, Iou threshold: {:.2f}, Classes: {}, Cross classes: {}",
- m_nms_config.nms_score_th, m_nms_config.nms_iou_th, m_nms_config.classes, m_nms_config.cross_classes);
+ m_nms_config.nms_score_th, m_nms_config.nms_iou_th, m_nms_config.number_of_classes, m_nms_config.cross_classes);
if (m_nms_config.background_removal) {
config_info += fmt::format(", Background removal index: {}", m_nms_config.background_removal_index);
}
uint32_t max_proposals_per_class = 0;
// The model's number of classes. (This depends on the dataset that the model trained on).
- uint32_t classes = 0;
+ uint32_t number_of_classes = 0;
// Toggle background class removal from results
bool background_removal = false;
float32_t objectness, hailo_quant_info_t quant_info, uint32_t width)
{
std::pair<uint32_t, float32_t> max_id_score_pair;
- for (uint32_t class_index = 0; class_index < m_nms_config.classes; class_index++) {
+ for (uint32_t class_index = 0; class_index < m_nms_config.number_of_classes; class_index++) {
auto class_id = class_index;
if (m_nms_config.background_removal) {
if (m_nms_config.background_removal_index == class_index) {
std::string get_nms_config_description();
+ hailo_status validate_metadata() override;
+
};
}
*/
virtual hailo_status execute(const std::map<std::string, MemoryView> &inputs, std::map<std::string, MemoryView> &outputs) = 0;
+ virtual hailo_status validate_metadata() = 0;
+
const std::map<std::string, BufferMetaData> &inputs_metadata() const
{
return m_inputs_metadata;
return m_outputs_metadata;
}
+ void set_outputs_metadata(std::map<std::string, BufferMetaData> &outputs_metadata)
+ {
+ m_outputs_metadata = outputs_metadata;
+ }
+
+ void set_inputs_metadata(std::map<std::string, BufferMetaData> &inputs_metadata)
+ {
+ m_inputs_metadata = inputs_metadata;
+ }
+
std::string get_name() {
return m_name;
}
--- /dev/null
+/**
+ * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+**/
+/**
+ * @file softmax_post_process.cpp
+ * @brief: Softmax op
+ **/
+
+#include "softmax_post_process.hpp"
+#include "hailo/hailort.h"
+#include "hailo/hailort_common.hpp"
+#include "common/utils.hpp"
+
+#include <limits>
+
+namespace hailort
+{
+namespace net_flow
+{
+
+// This function is for when trying to perform softmax op for unsupported formats
+hailo_status SoftmaxPostProcessOp::execute_not_supported(const BufferMetaData &input_metadata, const BufferMetaData &output_metadata,
+ const std::map<std::string, MemoryView> &inputs, std::map<std::string, MemoryView> &outputs)
+ {
+ (void)inputs;
+ (void)outputs;
+ LOGGER__ERROR("Softmax post-process not supported with params: input_order {}, input_type {}, output_type {}",
+ HailoRTCommon::get_format_order_str(input_metadata.format.order),
+ HailoRTCommon::get_format_type_str(input_metadata.format.type),
+ HailoRTCommon::get_format_type_str(output_metadata.format.type));
+ return HAILO_INVALID_ARGUMENT;
+ }
+
+SoftmaxFunction SoftmaxPostProcessOp::m_softmax_function_array[SOFTMAX_NUM_OF_POSSIBLE_FORMAT_ORDERS][SOFTMAX_NUM_OF_POSSIBLE_FORMAT_TYPES][SOFTMAX_NUM_OF_POSSIBLE_FORMAT_TYPES]
+{
+ // Currently supported on:
+ // NC, float_32 to NC, float_32
+ // NHWC, float_32 to NHWC, float_32
+ {
+ {
+ // NHWC x AUTO
+ // We don't support input_format_type to be auto
+ SoftmaxPostProcessOp::execute_not_supported,
+ SoftmaxPostProcessOp::execute_not_supported,
+ SoftmaxPostProcessOp::execute_not_supported,
+ SoftmaxPostProcessOp::execute_not_supported
+ },
+ {
+ // NHWC x UINT8
+ // We don't support input_format_type to be UINT8
+ SoftmaxPostProcessOp::execute_not_supported,
+ SoftmaxPostProcessOp::execute_not_supported,
+ SoftmaxPostProcessOp::execute_not_supported,
+ SoftmaxPostProcessOp::execute_not_supported
+ },
+ {
+ // NHWC x UINT16
+ // We don't support input_format_type to be UINT16
+ SoftmaxPostProcessOp::execute_not_supported,
+ SoftmaxPostProcessOp::execute_not_supported,
+ SoftmaxPostProcessOp::execute_not_supported,
+ SoftmaxPostProcessOp::execute_not_supported
+ },
+ {
+ // NHWC x FLOAT32
+ SoftmaxPostProcessOp::execute_not_supported, // We don't support output_format_type format of AUTO
+ SoftmaxPostProcessOp::execute_not_supported, // We don't support output_format_type format of UINT8
+ SoftmaxPostProcessOp::execute_not_supported, // We don't support output_format_type format of UINT16
+ SoftmaxPostProcessOp::NHWC_to_NHWC_feature_axis<float32_t, float32_t>
+ }
+ },
+ {
+ {
+ // NC x AUTO
+ // We don't support input_format_type to be auto
+ SoftmaxPostProcessOp::execute_not_supported,
+ SoftmaxPostProcessOp::execute_not_supported,
+ SoftmaxPostProcessOp::execute_not_supported,
+ SoftmaxPostProcessOp::execute_not_supported
+ },
+ {
+ // NC x UINT8
+ // We don't support input_format_type to be UINT8
+ SoftmaxPostProcessOp::execute_not_supported,
+ SoftmaxPostProcessOp::execute_not_supported,
+ SoftmaxPostProcessOp::execute_not_supported,
+ SoftmaxPostProcessOp::execute_not_supported,
+ },
+ {
+ // NC x UINT16
+ // We don't support input_format_type to be UINT16
+ SoftmaxPostProcessOp::execute_not_supported,
+ SoftmaxPostProcessOp::execute_not_supported,
+ SoftmaxPostProcessOp::execute_not_supported,
+ SoftmaxPostProcessOp::execute_not_supported,
+ },
+ {
+ // NC x FLOAT32
+ SoftmaxPostProcessOp::execute_not_supported, // We don't support output_format_type format of AUTO
+ SoftmaxPostProcessOp::execute_not_supported, // We don't support output_format_type format of UINT8
+ SoftmaxPostProcessOp::execute_not_supported, // We don't support output_format_type format of UINT16
+ SoftmaxPostProcessOp::NC_to_NC<float32_t, float32_t>,
+ }
+ }
+};
+
+hailo_status SoftmaxPostProcessOp::execute(const std::map<std::string, MemoryView> &inputs,
+ std::map<std::string, MemoryView> &outputs)
+{
+ auto &input_name = inputs.begin()->first;
+ auto &output_name = outputs.begin()->first;
+ auto &input_metadata = m_inputs_metadata[input_name];
+ auto &output_metadata = m_outputs_metadata[output_name];
+
+ uint8_t format_index = UINT8_MAX;
+ switch (input_metadata.format.order) {
+ case HAILO_FORMAT_ORDER_NHWC:
+ format_index = 0;
+ break;
+ case HAILO_FORMAT_ORDER_NC:
+ format_index = 1;
+ break;
+ default:
+ LOGGER__ERROR("Softmax post-process received invalid input order {}",
+ HailoRTCommon::get_format_order_str(input_metadata.format.order));
+ return HAILO_INVALID_ARGUMENT;
+ }
+ return SoftmaxPostProcessOp::m_softmax_function_array[format_index][input_metadata.format.type][output_metadata.format.type](input_metadata, output_metadata, inputs, outputs);
+}
+
+std::string SoftmaxPostProcessOp::get_op_description()
+{
+ auto config_info = fmt::format("SoftmaxPostProcess Op, Name: {}", m_name);
+ return config_info;
+}
+
+hailo_status SoftmaxPostProcessOp::validate_metadata()
+{
+ assert(m_inputs_metadata.size() == hailort::net_flow::SOFTMAX_NUMBER_OF_SRCS);
+ assert(m_outputs_metadata.size() == hailort::net_flow::SOFTMAX_NUMBER_OF_DSTS);
+
+ auto &input_metadata = m_inputs_metadata.begin()->second;
+ auto &output_metadata = m_outputs_metadata.begin()->second;
+
+ CHECK(
+ ((input_metadata.format.flags & HAILO_FORMAT_FLAGS_QUANTIZED) == 0) && ((output_metadata.format.flags & HAILO_FORMAT_FLAGS_QUANTIZED) == 0),
+ HAILO_INVALID_OPERATION, "Softmax op is supported only on dequantized data");
+
+ CHECK(
+ ((input_metadata.format.order == HAILO_FORMAT_ORDER_NHWC) && (output_metadata.format.order == HAILO_FORMAT_ORDER_NHWC)) ||
+ ((input_metadata.format.order == HAILO_FORMAT_ORDER_NC) && (output_metadata.format.order == HAILO_FORMAT_ORDER_NC)),
+ HAILO_INVALID_OPERATION, "Softmax op is not supported for src format order ({}) and dst format order ({})",
+ HailoRTCommon::get_format_order_str(input_metadata.format.order),
+ HailoRTCommon::get_format_order_str(output_metadata.format.order));
+
+ CHECK(input_metadata.shape.features == output_metadata.shape.features, HAILO_INVALID_OPERATION,
+ "Softmax op is supported only when src num of features ({}) is equal to dst num of features ({})",
+ input_metadata.shape.features, output_metadata.shape.features);
+ CHECK(input_metadata.shape.height == output_metadata.shape.height, HAILO_INVALID_OPERATION,
+ "Softmax op is supported only when src height ({}) is equal to dst height ({})",
+ input_metadata.shape.height, output_metadata.shape.height);
+ CHECK(input_metadata.shape.width == output_metadata.shape.width, HAILO_INVALID_OPERATION,
+ "Softmax op is supported only when src width ({}) is equal to dst width ({})",
+ input_metadata.shape.width, output_metadata.shape.width);
+ CHECK(input_metadata.format.type == HAILO_FORMAT_TYPE_FLOAT32,
+ HAILO_INVALID_OPERATION, "Src format type {} is not valid. Must be {}",
+ HailoRTCommon::get_format_type_str(input_metadata.format.type),
+ HailoRTCommon::get_format_type_str(HAILO_FORMAT_TYPE_FLOAT32));
+ CHECK(output_metadata.format.type == HAILO_FORMAT_TYPE_FLOAT32,
+ HAILO_INVALID_OPERATION, "Dst format type {} is not valid. Must be {}",
+ HailoRTCommon::get_format_type_str(output_metadata.format.type),
+ HailoRTCommon::get_format_type_str(HAILO_FORMAT_TYPE_FLOAT32));
+ CHECK(!(HAILO_FORMAT_FLAGS_HOST_ARGMAX & output_metadata.format.flags), HAILO_INVALID_ARGUMENT, "Output {} is marked as argmax, which is not supported for this model.",
+ m_outputs_metadata.begin()->first);
+ CHECK(!(HAILO_FORMAT_FLAGS_QUANTIZED & output_metadata.format.flags), HAILO_INVALID_ARGUMENT, "Output {} is marked as quantized, which is not supported for this model.",
+ m_outputs_metadata.begin()->first);
+
+ return HAILO_SUCCESS;
+}
+
+Expected<std::shared_ptr<Op>> SoftmaxPostProcessOp::create(const std::map<std::string, BufferMetaData> &inputs_metadata,
+ std::map<std::string, BufferMetaData> &outputs_metadata)
+{
+ auto op = std::shared_ptr<SoftmaxPostProcessOp>(new (std::nothrow) SoftmaxPostProcessOp(inputs_metadata, outputs_metadata));
+ CHECK_AS_EXPECTED(op != nullptr, HAILO_OUT_OF_HOST_MEMORY);
+
+ return std::shared_ptr<Op>(std::move(op));
+}
+
+} /* namespace net_flow */
+} /* namespace hailort */
\ No newline at end of file
--- /dev/null
+/**
+ * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+**/
+/**
+ * @file softmax_post_process.hpp
+ * @brief: Softmax op perform softmax op as described: https://www.tensorflow.org/api_docs/python/tf/nn/softmax
+ * A few notes:
+ * - Support only on features axis
+ * - Support only on NHWC and NC input data order
+ **/
+
+#ifndef _HAILO_SOFTMAX_POST_PROCESS_HPP_
+#define _HAILO_SOFTMAX_POST_PROCESS_HPP_
+
+#include "hailo/hailort.h"
+#include "net_flow/ops/op.hpp"
+#include "common/utils.hpp"
+#include "hailo/quantization.hpp"
+
+#include <iostream>
+
+namespace hailort
+{
+namespace net_flow
+{
+
+#define SOFTMAX_NUM_OF_POSSIBLE_FORMAT_ORDERS (2) // NHWC, NC
+#define SOFTMAX_NUM_OF_POSSIBLE_FORMAT_TYPES (4) // Auto, UINT8, UINT16, FLOAT32
+
+constexpr std::size_t SOFTMAX_NUMBER_OF_SRCS {1};
+constexpr std::size_t SOFTMAX_NUMBER_OF_DSTS {1};
+
+typedef hailo_status (*SoftmaxFunction)(const BufferMetaData &input_metadata, const BufferMetaData &output_metadata,
+ const std::map<std::string, MemoryView> &inputs, std::map<std::string, MemoryView> &outputs);
+
+class SoftmaxPostProcessOp : public Op
+{
+
+private:
+ SoftmaxPostProcessOp(const std::map<std::string, BufferMetaData> &inputs_metadata,
+ const std::map<std::string, BufferMetaData> &outputs_metadata)
+ : Op(inputs_metadata, outputs_metadata, "Softmax-Post-Process")
+ {}
+
+ template<typename src_type = float32_t, typename dst_type = float32_t>
+ static hailo_status NHWC_to_NHWC_feature_axis(const BufferMetaData &input_metadata, const BufferMetaData &output_metadata,
+ const std::map<std::string, MemoryView> &inputs, std::map<std::string, MemoryView> &outputs)
+ {
+ auto src_ptr = (dst_type*)inputs.begin()->second.data();
+ auto dst_ptr = (src_type*)outputs.begin()->second.data();
+ const auto src_row_size = input_metadata.shape.width * input_metadata.shape.features;
+ const auto dst_row_size = output_metadata.shape.width * output_metadata.shape.features;
+ const auto src_width_size = input_metadata.shape.features;
+ const auto dst_width_size = output_metadata.shape.features;
+
+ for (uint32_t r = 0; r < input_metadata.shape.height; r++) { // H axis - rows
+ dst_type *src_row = src_ptr + (r * src_row_size);
+ src_type *dst_row = dst_ptr + (r * dst_row_size);
+ for (uint32_t w = 0; w < input_metadata.shape.width; w++) { // W axis - coloums
+ dst_type *src_col = src_row + (w * src_width_size);
+ src_type *dst_col = dst_row + (w * dst_width_size);
+ // In order to avoid overflows, we will perform the following:
+ // For each HW, we will find the maximal c value and then we will substract this value from
+ // all of the values in this HW. This will preserve the original softmax values + prevent overflows
+ src_type max_val = std::numeric_limits<float>::min();
+ for (uint32_t c = 0; c < input_metadata.shape.features; c++) {
+ auto ¤t_value = *(src_col + c);
+ if (current_value > max_val)
+ max_val = current_value;
+ }
+ dst_type sum_exp = 0; // denominator
+ for (uint32_t c = 0; c < input_metadata.shape.features; c++) { // C axis - features
+ auto ¤t_value = *(src_col + c);
+ current_value -= max_val; // This step preserves the original softmax values + prevent overflows
+ current_value = std::exp(static_cast<float32_t>(current_value)); // Set src_ptr[c] to e^(src_ptr[c]) so that we only calculate it once
+ sum_exp += current_value;
+ }
+ for (uint32_t c = 0; c < input_metadata.shape.features; c++) {
+ const auto ¤t_value = *(src_col + c);
+ dst_col[c] = static_cast<dst_type>(current_value / sum_exp);
+ }
+ }
+ }
+ return HAILO_SUCCESS;
+ }
+
+ template<typename src_type = float32_t, typename dst_type = float32_t>
+ static hailo_status NC_to_NC(const BufferMetaData &input_metadata, const BufferMetaData &output_metadata,
+ const std::map<std::string, MemoryView> &inputs, std::map<std::string, MemoryView> &outputs)
+ {
+ (void) output_metadata;
+ auto src_ptr = (src_type*)inputs.begin()->second.data();
+ auto dst_ptr = (dst_type*)outputs.begin()->second.data();
+ // In order to avoid overflows, we will perform the following:
+ // For each HW, we will find the maximal c value and then we will substract this value from
+ // all of the values in this HW. This will preserve the original softmax values + prevent overflows
+ src_type max_val = std::numeric_limits<float>::min();
+ for (uint32_t c = 0; c < input_metadata.shape.features; c++) {
+ auto ¤t_value = *(src_ptr + c);
+ if (current_value > max_val)
+ max_val = current_value;
+ }
+ dst_type sum_exp = 0;
+ for (uint32_t c = 0; c < input_metadata.shape.features; c++) {
+ auto ¤t_value = *(src_ptr + c);
+ current_value -= max_val; // This step preserves the original softmax values + prevent overflows
+ current_value = std::exp(static_cast<dst_type>(current_value)); // Set src_ptr[c] to e^(src_ptr[c])
+ sum_exp += current_value;
+ }
+ for (uint32_t c = 0; c < input_metadata.shape.features; c++) {
+ dst_ptr[c] = static_cast<dst_type>(src_ptr[c] / sum_exp);
+ }
+ return HAILO_SUCCESS;
+ }
+
+ static hailo_status execute_not_supported(const BufferMetaData &input_metadata, const BufferMetaData &output_metadata,
+ const std::map<std::string, MemoryView> &inputs, std::map<std::string, MemoryView> &outputs);
+
+ public:
+ static Expected<std::shared_ptr<Op>> create(const std::map<std::string, BufferMetaData> &inputs_metadata,
+ std::map<std::string, BufferMetaData> &outputs_metadata);
+ virtual hailo_status execute(const std::map<std::string, MemoryView> &inputs,
+ std::map<std::string, MemoryView> &outputs) override;
+ virtual std::string get_op_description() override;
+ hailo_status validate_metadata() override;
+
+ // A 3D array of softmax functions to call:
+ // 1st dim represent the data format order (NHWC and NC are supported)
+ // 2nd dim represent the input data type (only float_32 is supported)
+ // 3rd dim represent the output data type (only float_32 is supported)
+ static SoftmaxFunction m_softmax_function_array[SOFTMAX_NUM_OF_POSSIBLE_FORMAT_ORDERS][SOFTMAX_NUM_OF_POSSIBLE_FORMAT_TYPES][SOFTMAX_NUM_OF_POSSIBLE_FORMAT_TYPES];
+
+};
+
+} /* namespace net_flow */
+} /* namespace hailort */
+
+#endif /* _HAILO_SOFTMAX_POST_PROCESS_HPP_ */
\ No newline at end of file
namespace net_flow
{
+hailo_status SSDPostProcessOp::validate_metadata()
+{
+ auto status = NmsPostProcessOp::validate_metadata();
+ if (HAILO_SUCCESS != status) {
+ return status;
+ }
+
+ return HAILO_SUCCESS;
+}
+
Expected<std::shared_ptr<Op>> SSDPostProcessOp::create(const std::map<std::string, BufferMetaData> &inputs_metadata,
const std::map<std::string, BufferMetaData> &outputs_metadata,
const NmsPostProcessConfig &nms_post_process_config,
const SSDPostProcessConfig &ssd_post_process_config)
{
- for (auto &name_to_inputs_metadata : inputs_metadata) {
- CHECK_AS_EXPECTED(name_to_inputs_metadata.second.format.order == HAILO_FORMAT_ORDER_NHCW, HAILO_INVALID_ARGUMENT,
- "SSDPostProcessOp: Unexpected input format {}", name_to_inputs_metadata.second.format.order);
- }
-
// Validate each anchor is mapped by reg and cls inputs
for (const auto ®_to_cls_name : ssd_post_process_config.reg_to_cls_inputs) {
CHECK_AS_EXPECTED(ssd_post_process_config.anchors.count(reg_to_cls_name.first), HAILO_INVALID_ARGUMENT,
}
auto op = std::shared_ptr<SSDPostProcessOp>(new (std::nothrow) SSDPostProcessOp(inputs_metadata, outputs_metadata, nms_post_process_config, ssd_post_process_config));
CHECK_AS_EXPECTED(op != nullptr, HAILO_OUT_OF_HOST_MEMORY);
+
return std::shared_ptr<Op>(std::move(op));
}
m_ssd_config.anchors.size(), inputs.size());
std::vector<DetectionBbox> detections;
- std::vector<uint32_t> classes_detections_count(m_nms_config.classes, 0);
- detections.reserve(m_nms_config.max_proposals_per_class * m_nms_config.classes);
+ std::vector<uint32_t> classes_detections_count(m_nms_config.number_of_classes, 0);
+ detections.reserve(m_nms_config.max_proposals_per_class * m_nms_config.number_of_classes);
for (const auto ®_to_cls : m_ssd_config.reg_to_cls_inputs) {
assert(inputs.count(reg_to_cls.first));
assert(inputs.count(reg_to_cls.second));
const auto &layer_anchors = m_ssd_config.anchors[reg_input_name];
assert(layer_anchors.size() % 2 == 0);
const size_t num_of_anchors = (layer_anchors.size() / 2);
+ // TODO: HRT-11044 support mixed data types
+ auto data_size_in_bytes = HailoRTCommon::get_data_bytes(m_inputs_metadata.begin()->second.format.type);
// Validate reg buffer size
static const uint32_t reg_entry_size = 4;
auto number_of_entries = reg_padded_shape.height * reg_padded_shape.width * num_of_anchors;
- auto buffer_size = number_of_entries * reg_entry_size;
+ auto buffer_size = number_of_entries * reg_entry_size * data_size_in_bytes;
CHECK(buffer_size == reg_buffer.size(), HAILO_INVALID_ARGUMENT,
"Failed to extract_detections, reg {} buffer_size should be {}, but is {}", reg_input_name, buffer_size, reg_buffer.size());
// Validate cls buffer size
- const uint32_t cls_entry_size = m_nms_config.classes;
+ const uint32_t cls_entry_size = m_nms_config.number_of_classes;
number_of_entries = cls_padded_shape.height * cls_padded_shape.width * num_of_anchors;
- buffer_size = number_of_entries * cls_entry_size;
+ buffer_size = number_of_entries * cls_entry_size * data_size_in_bytes;
CHECK(buffer_size == cls_buffer.size(), HAILO_INVALID_ARGUMENT,
"Failed to extract_detections, cls {} buffer_size should be {}, but is {}", cls_input_name, buffer_size, cls_buffer.size());
detections, classes_detections_count);
CHECK_SUCCESS(status);
} else if (m_inputs_metadata[reg_input_name].format.type == HAILO_FORMAT_TYPE_FLOAT32) {
- // For testing - TODO: Remove after generator tests are in, and return error.
+ // For testing - TODO: HRT-9341 - Remove after generator tests are in, and return error.
auto status = extract_bbox_detections<float32_t, float32_t>(
reg_input_name, cls_input_name,
reg_buffer, cls_buffer,
hailo_status execute(const std::map<std::string, MemoryView> &inputs, std::map<std::string, MemoryView> &outputs) override;
std::string get_op_description() override;
+ hailo_status validate_metadata() override; // TODO: HRT-10676
static const uint32_t DEFAULT_Y_OFFSET_IDX = 0;
static const uint32_t DEFAULT_X_OFFSET_IDX = 1;
classes_detections_count[max_id_score_pair.first]++;
}
} else {
- for (uint32_t class_index = 0; class_index < m_nms_config.classes; class_index++) {
+ for (uint32_t class_index = 0; class_index < m_nms_config.number_of_classes; class_index++) {
auto class_id = class_index;
if (m_nms_config.background_removal) {
if (m_nms_config.background_removal_index == class_index) {
namespace net_flow
{
+hailo_status YOLOv5PostProcessOp::validate_metadata()
+{
+ auto status = NmsPostProcessOp::validate_metadata();
+ if (HAILO_SUCCESS != status) {
+ return status;
+ }
+
+ return HAILO_SUCCESS;
+}
+
+//TODO- move to a dedicated module and maybe convert all yolo function to yolov5, HRT-10858
Expected<std::shared_ptr<Op>> YOLOv5PostProcessOp::create(const std::map<std::string, BufferMetaData> &inputs_metadata,
const std::map<std::string, BufferMetaData> &outputs_metadata,
const NmsPostProcessConfig &nms_post_process_config,
}
auto op = std::shared_ptr<YOLOv5PostProcessOp>(new (std::nothrow) YOLOv5PostProcessOp(inputs_metadata, outputs_metadata, nms_post_process_config, yolo_post_process_config));
CHECK_AS_EXPECTED(op != nullptr, HAILO_OUT_OF_HOST_MEMORY);
- return std::shared_ptr<Op>(std::move(op));
-}
-Expected<std::shared_ptr<Op>> YOLOXPostProcessOp::create(const std::map<std::string, BufferMetaData> &inputs_metadata,
- const std::map<std::string, BufferMetaData> &outputs_metadata,
- const NmsPostProcessConfig &nms_post_process_config,
- const YoloPostProcessConfig &yolo_post_process_config)
-{
- for (auto &name_to_inputs_metadata : inputs_metadata) {
- CHECK_AS_EXPECTED(name_to_inputs_metadata.second.format.order == HAILO_FORMAT_ORDER_NHCW, HAILO_INVALID_ARGUMENT,
- "YOLOv5PostProcessOp: Unexpected input format {}", name_to_inputs_metadata.second.format.order);
- }
- auto modified_yolo_post_process_config = yolo_post_process_config;
- for (auto &name_to_meta : inputs_metadata) {
- std::vector<int> anchors = {1, 1};
- modified_yolo_post_process_config.anchors.insert({name_to_meta.first, anchors});
- }
- auto op = std::shared_ptr<YOLOXPostProcessOp>(new (std::nothrow) YOLOXPostProcessOp(inputs_metadata, outputs_metadata, nms_post_process_config,
- modified_yolo_post_process_config));
- CHECK_AS_EXPECTED(op != nullptr, HAILO_OUT_OF_HOST_MEMORY);
return std::shared_ptr<Op>(std::move(op));
}
m_yolo_config.anchors.size(), inputs.size());
std::vector<DetectionBbox> detections;
- std::vector<uint32_t> classes_detections_count(m_nms_config.classes, 0);
- detections.reserve(m_nms_config.max_proposals_per_class * m_nms_config.classes);
+ std::vector<uint32_t> classes_detections_count(m_nms_config.number_of_classes, 0);
+ detections.reserve(m_nms_config.max_proposals_per_class * m_nms_config.number_of_classes);
for (const auto &name_to_input : inputs) {
hailo_status status;
auto &name = name_to_input.first;
status = extract_detections<float32_t, uint16_t>(name_to_input.second, input_metadata.quant_info, input_metadata.shape,
input_metadata.padded_shape, m_yolo_config.anchors[name], detections, classes_detections_count);
} else {
- CHECK_SUCCESS(HAILO_INVALID_ARGUMENT, "YOLOv5 post-process received invalid input type");
+ CHECK_SUCCESS(HAILO_INVALID_ARGUMENT, "YOLO post-process received invalid input type {}", input_metadata.format.type);
}
CHECK_SUCCESS(status);
}
return hailo_bbox_float32_t{y_min, x_min, (y_min+h), (x_min+w), 0};
}
-hailo_bbox_float32_t YOLOXPostProcessOp::decode(float32_t tx, float32_t ty, float32_t tw, float32_t th,
- int wa, int ha, uint32_t col, uint32_t row, uint32_t w_stride, uint32_t h_stride) const
-{
- auto w = exp(tw) * static_cast<float32_t>(wa) / m_yolo_config.image_width;
- auto h = exp(th) * static_cast<float32_t>(ha) / m_yolo_config.image_height;
- auto x_center = (tx + static_cast<float32_t>(col)) / static_cast<float32_t>(w_stride);
- auto y_center = (ty + static_cast<float32_t>(row)) / static_cast<float32_t>(h_stride);
- auto x_min = (x_center - (w / 2.0f));
- auto y_min = (y_center - (h / 2.0f));
- return hailo_bbox_float32_t{y_min, x_min, (y_min+h), (x_min+w), 0};
-}
-
} // namespace net_flow
} // namespace hailort
public:
hailo_status execute(const std::map<std::string, MemoryView> &inputs, std::map<std::string, MemoryView> &outputs) override;
std::string get_op_description() override;
+ virtual hailo_status validate_metadata() = 0; // TODO: HRT-10676
protected:
virtual hailo_bbox_float32_t decode(float32_t tx, float32_t ty, float32_t tw, float32_t th,
assert(layer_anchors.size() % 2 == 0);
const size_t num_of_anchors = (layer_anchors.size() / 2);
- uint32_t entry_size = (uint32_t)((CLASSES_START_INDEX + m_nms_config.classes) * sizeof(DeviceType));
+ uint32_t entry_size = (uint32_t)(CLASSES_START_INDEX + m_nms_config.number_of_classes);
auto number_of_entries = padded_shape.height * padded_shape.width * num_of_anchors;
// TODO: this can also be part of the Op configuration
- auto buffer_size = number_of_entries * entry_size;
+ auto buffer_size = number_of_entries * entry_size * sizeof(DeviceType);
CHECK(buffer_size == buffer.size(), HAILO_INVALID_ARGUMENT,
"Failed to extract_detections, buffer_size should be {}, but is {}", buffer_size, buffer.size());
for (uint32_t col = 0; col < shape.width; col++) {
for (uint32_t anchor = 0; anchor < num_of_anchors; anchor++) {
auto entry_idx = (row_size * row) + col + ((anchor * entry_size) * padded_shape.width);
-
auto objectness = Quantization::dequantize_output<HostType, DeviceType>(data[entry_idx + OBJECTNESS_OFFSET], quant_info);
if (objectness < m_nms_config.nms_score_th) {
continue;
}
}
else {
- for (uint32_t class_index = 0; class_index < m_nms_config.classes; class_index++) {
+ for (uint32_t class_index = 0; class_index < m_nms_config.number_of_classes; class_index++) {
auto class_entry_idx = entry_idx + ((CLASSES_START_INDEX + class_index) * padded_shape.width);
auto class_confidence = Quantization::dequantize_output<HostType, DeviceType>(
data[class_entry_idx], quant_info);
const std::map<std::string, BufferMetaData> &outputs_metadata,
const NmsPostProcessConfig &nms_post_process_config,
const YoloPostProcessConfig &yolo_post_process_config);
+ hailo_status validate_metadata() override; // TODO: HRT-10676
protected:
virtual hailo_bbox_float32_t decode(float32_t tx, float32_t ty, float32_t tw, float32_t th,
{}
};
-class YOLOXPostProcessOp : public YOLOPostProcessOp
-{
-public:
- static Expected<std::shared_ptr<Op>> create(const std::map<std::string, BufferMetaData> &inputs_metadata,
- const std::map<std::string, BufferMetaData> &outputs_metadata,
- const NmsPostProcessConfig &nms_post_process_config,
- const YoloPostProcessConfig &yolo_post_process_config);
-
-protected:
- virtual hailo_bbox_float32_t decode(float32_t tx, float32_t ty, float32_t tw, float32_t th,
- int wa, int ha, uint32_t col, uint32_t row, uint32_t w_stride, uint32_t h_stride) const override;
-
-private:
- YOLOXPostProcessOp(const std::map<std::string, BufferMetaData> &inputs_metadata,
- const std::map<std::string, BufferMetaData> &outputs_metadata,
- const NmsPostProcessConfig &nms_post_process_config,
- const YoloPostProcessConfig &yolo_post_process_config)
- : YOLOPostProcessOp(inputs_metadata, outputs_metadata, nms_post_process_config, yolo_post_process_config, "YOLOX-Post-Process")
- {}
-};
-
} // namespace net_flow
} // namespace hailort
--- /dev/null
+/**
+ * Copyright (c) 2022 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+**/
+/**
+ * @file yolox_post_process.cpp
+ * @brief YOLOX post process
+ *
+ **/
+
+#include "net_flow/ops/yolox_post_process.hpp"
+
+namespace hailort
+{
+namespace net_flow
+{
+
+Expected<std::shared_ptr<Op>> YOLOXPostProcessOp::create(const std::map<std::string, BufferMetaData> &inputs_metadata,
+ const std::map<std::string, BufferMetaData> &outputs_metadata,
+ const NmsPostProcessConfig &nms_post_process_config,
+ const YoloxPostProcessConfig &yolox_post_process_config)
+{
+ auto op = std::shared_ptr<YOLOXPostProcessOp>(new (std::nothrow) YOLOXPostProcessOp(inputs_metadata, outputs_metadata, nms_post_process_config,
+ yolox_post_process_config));
+ CHECK_AS_EXPECTED(op != nullptr, HAILO_OUT_OF_HOST_MEMORY);
+
+ return std::shared_ptr<Op>(std::move(op));
+}
+
+hailo_status YOLOXPostProcessOp::validate_metadata()
+{
+ auto status = NmsPostProcessOp::validate_metadata();
+ if (HAILO_SUCCESS != status) {
+ return status;
+ }
+
+ // Validate regs, clss and objs matching layers have same shape
+ for (const auto &layer_names : m_yolox_config.input_names) {
+ CHECK(contains(m_inputs_metadata, layer_names.reg), HAILO_INVALID_ARGUMENT,
+ "YOLOXPostProcessOp: inputs_metadata does not contain reg layer {}", layer_names.reg);
+ CHECK(contains(m_inputs_metadata, layer_names.cls), HAILO_INVALID_ARGUMENT,
+ "YOLOXPostProcessOp: inputs_metadata does not contain cls layer {}", layer_names.cls);
+ CHECK(contains(m_inputs_metadata, layer_names.obj), HAILO_INVALID_ARGUMENT,
+ "YOLOXPostProcessOp: inputs_metadata does not contain obj layer {}", layer_names.obj);
+
+ const auto ®_input_metadata = m_inputs_metadata.at(layer_names.reg);
+ const auto &cls_input_metadata = m_inputs_metadata.at(layer_names.cls);
+ const auto &obj_input_metadata = m_inputs_metadata.at(layer_names.obj);
+
+ // NOTE: padded shape might be different because features might be different,
+ // and padding is added when width*features % 8 != 0
+ CHECK((reg_input_metadata.shape.height == cls_input_metadata.shape.height)
+ && (reg_input_metadata.shape.width == cls_input_metadata.shape.width),
+ HAILO_INVALID_ARGUMENT, "YOLOXPostProcess: reg input {} has different shape than cls input {}",
+ layer_names.reg, layer_names.cls);
+ CHECK((obj_input_metadata.shape.height == reg_input_metadata.shape.height)
+ && (obj_input_metadata.shape.width == reg_input_metadata.shape.width),
+ HAILO_INVALID_ARGUMENT, "YOLOXPostProcess: reg input {} has different shape than obj input {}",
+ layer_names.reg, layer_names.obj);
+
+ CHECK((cls_input_metadata.format.type == reg_input_metadata.format.type)
+ && (cls_input_metadata.format.flags == reg_input_metadata.format.flags)
+ && (cls_input_metadata.format.order == reg_input_metadata.format.order),
+ HAILO_INVALID_ARGUMENT, "YOLOXPostProcess: reg input {} has different format than cls input {}",
+ layer_names.reg, layer_names.cls);
+ CHECK((obj_input_metadata.format.type == reg_input_metadata.format.type)
+ && (obj_input_metadata.format.flags == reg_input_metadata.format.flags)
+ && (obj_input_metadata.format.order == reg_input_metadata.format.order),
+ HAILO_INVALID_ARGUMENT, "YOLOXPostProcess: reg input {} has different format than obj input {}",
+ layer_names.reg, layer_names.obj);
+
+ }
+
+ return HAILO_SUCCESS;
+}
+
+hailo_status YOLOXPostProcessOp::execute(const std::map<std::string, MemoryView> &inputs, std::map<std::string, MemoryView> &outputs)
+{
+ std::vector<DetectionBbox> detections;
+ std::vector<uint32_t> classes_detections_count(m_nms_config.number_of_classes, 0);
+ detections.reserve(m_nms_config.max_proposals_per_class * m_nms_config.number_of_classes);
+ for (const auto &layers_names_triplet : m_yolox_config.input_names) {
+ hailo_status status;
+ assert(contains(inputs, layers_names_triplet.cls));
+ assert(contains(inputs, layers_names_triplet.obj));
+ assert(contains(inputs, layers_names_triplet.reg));
+
+ auto &input_metadata = m_inputs_metadata[layers_names_triplet.reg];
+ if (input_metadata.format.type == HAILO_FORMAT_TYPE_UINT8) {
+ status = extract_detections<float32_t, uint8_t>(layers_names_triplet, inputs.at(layers_names_triplet.reg), inputs.at(layers_names_triplet.cls),
+ inputs.at(layers_names_triplet.obj), detections, classes_detections_count);
+ } else if (input_metadata.format.type == HAILO_FORMAT_TYPE_UINT16) {
+ status = extract_detections<float32_t, uint16_t>(layers_names_triplet, inputs.at(layers_names_triplet.reg), inputs.at(layers_names_triplet.cls),
+ inputs.at(layers_names_triplet.obj), detections, classes_detections_count);
+ } else {
+ CHECK_SUCCESS(HAILO_INVALID_ARGUMENT, "YOLO post-process received invalid input type {}", input_metadata.format.type);
+ }
+
+ CHECK_SUCCESS(status);
+ }
+
+ return hailo_nms_format(std::move(detections), outputs.begin()->second, classes_detections_count);
+}
+
+hailo_bbox_float32_t YOLOXPostProcessOp::decode(float32_t tx, float32_t ty, float32_t tw, float32_t th,
+ uint32_t col, uint32_t row, float32_t reg_shape_width, float32_t reg_shape_height) const
+{
+ /**
+ * Note that the calculations are bit different from the source (In order to save some run time)
+ * Each "/ reg_shape_width" is equivalent to "* w_stride / m_yolox_config.image_width".
+ * Each "/ reg_shape_height" is equivalent to "* h_stride / m_yolox_config.image_height".
+ **/
+ auto w = exp(tw) / reg_shape_width;
+ auto h = exp(th) / reg_shape_height;
+ auto x_center = (tx + static_cast<float32_t>(col)) / reg_shape_width;
+ auto y_center = (ty + static_cast<float32_t>(row)) / reg_shape_height;
+ auto x_min = (x_center - (w / 2.0f));
+ auto y_min = (y_center - (h / 2.0f));
+
+ return hailo_bbox_float32_t{y_min, x_min, (y_min+h), (x_min+w), 0};
+}
+
+std::string YOLOXPostProcessOp::get_op_description()
+{
+ auto nms_config_info = get_nms_config_description();
+ auto config_info = fmt::format("Name: {}, {}, Image height: {:.2f}, Image width: {:.2f}",
+ m_name, nms_config_info, m_yolox_config.image_height, m_yolox_config.image_width);
+ return config_info;
+}
+
+}
+}
--- /dev/null
+/**
+ * Copyright (c) 2022 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+**/
+/**
+ * @file yolox_post_process.hpp
+ * @brief YOLOX post process
+ *
+ **/
+
+#ifndef _HAILO_YOLOX_POST_PROCESS_HPP_
+#define _HAILO_YOLOX_POST_PROCESS_HPP_
+
+#include "net_flow/ops/nms_post_process.hpp"
+
+namespace hailort
+{
+namespace net_flow
+{
+
+struct MatchingLayersNames
+{
+ // Regression layer
+ std::string reg;
+
+ // Objectness layer
+ std::string obj;
+
+ // Classifications layer
+ std::string cls;
+};
+
+struct YoloxPostProcessConfig
+{
+ // The image height.
+ float32_t image_height = 0;
+
+ // The image width.
+ float32_t image_width = 0;
+
+ // A vector off three strings that represents the relations between the outputs names.
+ std::vector<MatchingLayersNames> input_names;
+};
+
+class YOLOXPostProcessOp : public NmsPostProcessOp
+{
+public:
+ static Expected<std::shared_ptr<Op>> create(const std::map<std::string, BufferMetaData> &inputs_metadata,
+ const std::map<std::string, BufferMetaData> &outputs_metadata,
+ const NmsPostProcessConfig &nms_post_process_config,
+ const YoloxPostProcessConfig &yolo_post_process_config);
+
+ hailo_status execute(const std::map<std::string, MemoryView> &inputs, std::map<std::string, MemoryView> &outputs) override;
+ std::string get_op_description() override;
+ hailo_status validate_metadata() override;// TODO: HRT-10676
+
+private:
+ template<typename HostType = float32_t, typename DeviceType>
+ hailo_status extract_detections(const MatchingLayersNames &layers_names, const MemoryView ®_buffer, const MemoryView &cls_buffer,
+ const MemoryView &obj_buffer, std::vector<DetectionBbox> &detections, std::vector<uint32_t> &classes_detections_count)
+ {
+ const auto ®_shape = m_inputs_metadata[layers_names.reg].shape;
+ const auto ®_padded_shape = m_inputs_metadata[layers_names.reg].padded_shape;
+ const auto &cls_padded_shape = m_inputs_metadata[layers_names.cls].padded_shape;
+ const auto &obj_padded_shape = m_inputs_metadata[layers_names.obj].padded_shape;
+ const auto ®_quant_info = m_inputs_metadata[layers_names.reg].quant_info;
+ const auto &cls_quant_info = m_inputs_metadata[layers_names.cls].quant_info;
+ const auto &obj_quant_info = m_inputs_metadata[layers_names.obj].quant_info;
+
+ static const uint32_t X_INDEX = 0;
+ static const uint32_t Y_INDEX = 1;
+ static const uint32_t W_INDEX = 2;
+ static const uint32_t H_INDEX = 3;
+
+ const uint32_t X_OFFSET = X_INDEX * reg_padded_shape.width;
+ const uint32_t Y_OFFSET = Y_INDEX * reg_padded_shape.width;
+ const uint32_t W_OFFSET = W_INDEX * reg_padded_shape.width;
+ const uint32_t H_OFFSET = H_INDEX * reg_padded_shape.width;
+
+ static const uint32_t CLASSES_START_INDEX = 0;
+
+ // Validate regression buffer size
+ static const uint32_t reg_entry_size = 4;
+ auto number_of_entries = reg_padded_shape.height * reg_padded_shape.width;
+ auto buffer_size = number_of_entries * reg_entry_size * sizeof(DeviceType);
+ CHECK(buffer_size == reg_buffer.size(), HAILO_INVALID_ARGUMENT,
+ "Failed to extract_detections, reg {} buffer_size should be {}, but is {}", layers_names.reg, buffer_size, reg_buffer.size());
+
+ // Validate classes buffer size
+ const uint32_t cls_entry_size = m_nms_config.number_of_classes;
+ number_of_entries = cls_padded_shape.height * cls_padded_shape.width;
+ buffer_size = number_of_entries * cls_entry_size * sizeof(DeviceType);
+ CHECK(buffer_size == cls_buffer.size(), HAILO_INVALID_ARGUMENT,
+ "Failed to extract_detections, cls {} buffer_size should be {}, but is {}", layers_names.cls, buffer_size, cls_buffer.size());
+
+ // Validate objectness buffer size
+ static const uint32_t obj_entry_size = 1;
+ number_of_entries = obj_padded_shape.height * obj_padded_shape.width;
+ buffer_size = number_of_entries * obj_entry_size * sizeof(DeviceType);
+ CHECK(buffer_size == obj_buffer.size(), HAILO_INVALID_ARGUMENT,
+ "Failed to extract_detections, obj {} buffer_size should be {}, but is {}", layers_names.obj, buffer_size, obj_buffer.size());
+
+ auto reg_row_size = reg_padded_shape.width * reg_padded_shape.features;
+ auto cls_row_size = cls_padded_shape.width * cls_padded_shape.features;
+ auto obj_row_size = obj_padded_shape.width * obj_padded_shape.features;
+
+ DeviceType *reg_data = (DeviceType*)reg_buffer.data();
+ DeviceType *obj_data = (DeviceType*)obj_buffer.data();
+ DeviceType *cls_data = (DeviceType*)cls_buffer.data();
+
+ for (uint32_t row = 0; row < reg_shape.height; row++) {
+ for (uint32_t col = 0; col < reg_shape.width; col++) {
+ auto obj_idx = (obj_row_size * row) + col;
+ auto objectness = Quantization::dequantize_output<HostType, DeviceType>(obj_data[obj_idx], obj_quant_info);
+
+ if (objectness < m_nms_config.nms_score_th) {
+ continue;
+ }
+
+ auto reg_idx = (reg_row_size * row) + col;
+ auto cls_idx = (cls_row_size * row) + col;
+
+ auto tx = Quantization::dequantize_output<HostType, DeviceType>(reg_data[reg_idx + X_OFFSET], reg_quant_info);
+ auto ty = Quantization::dequantize_output<HostType, DeviceType>(reg_data[reg_idx + Y_OFFSET], reg_quant_info);
+ auto tw = Quantization::dequantize_output<HostType, DeviceType>(reg_data[reg_idx + W_OFFSET], reg_quant_info);
+ auto th = Quantization::dequantize_output<HostType, DeviceType>(reg_data[reg_idx + H_OFFSET], reg_quant_info);
+ auto bbox = decode(tx, ty, tw, th, col, row, static_cast<float32_t>(reg_shape.width), static_cast<float32_t>(reg_shape.height));
+
+ if (m_nms_config.cross_classes) {
+ // Pre-NMS optimization. If NMS checks IOU over different classes, only the maximum class is relevant
+ auto max_id_score_pair = get_max_class<HostType, DeviceType>(cls_data, cls_idx, CLASSES_START_INDEX, objectness, cls_quant_info, cls_padded_shape.width);
+ bbox.score = max_id_score_pair.second;
+ if (max_id_score_pair.second >= m_nms_config.nms_score_th) {
+ detections.emplace_back(DetectionBbox(bbox, max_id_score_pair.first));
+ classes_detections_count[max_id_score_pair.first]++;
+ }
+ }
+ else {
+ for (uint32_t curr_class_idx = 0; curr_class_idx < m_nms_config.number_of_classes; curr_class_idx++) {
+ auto class_entry_idx = cls_idx + (curr_class_idx * cls_padded_shape.width);
+ auto class_confidence = Quantization::dequantize_output<HostType, DeviceType>(
+ cls_data[class_entry_idx], cls_quant_info);
+ auto class_score = class_confidence * objectness;
+ if (class_score >= m_nms_config.nms_score_th) {
+ bbox.score = class_score;
+ detections.emplace_back(DetectionBbox(bbox, curr_class_idx));
+ classes_detections_count[curr_class_idx]++;
+ }
+ }
+ }
+ }
+ }
+
+ return HAILO_SUCCESS;
+ }
+
+ virtual hailo_bbox_float32_t decode(float32_t tx, float32_t ty, float32_t tw, float32_t th,
+ uint32_t col, uint32_t row, float32_t w_stride, float32_t h_stride) const;
+
+ YoloxPostProcessConfig m_yolox_config;
+
+ YOLOXPostProcessOp(const std::map<std::string, BufferMetaData> &inputs_metadata,
+ const std::map<std::string, BufferMetaData> &outputs_metadata,
+ const NmsPostProcessConfig &nms_post_process_config,
+ const YoloxPostProcessConfig &yolo_post_process_config)
+ : NmsPostProcessOp(inputs_metadata, outputs_metadata, nms_post_process_config, "YOLOX-Post-Process")
+ , m_yolox_config(yolo_post_process_config)
+ {}
+
+};
+
+} // namespace net_flow
+} // namespace hailort
+
+#endif // _HAILO_YOLOX_POST_PROCESS_HPP_
return m_element.deactivate();
}
-hailo_status PipelinePad::post_deactivate()
+hailo_status PipelinePad::post_deactivate(bool should_clear_abort)
{
- return m_element.post_deactivate();
+ return m_element.post_deactivate(should_clear_abort);
}
hailo_status PipelinePad::clear()
return m_element.wait_for_finish();
}
-hailo_status PipelinePad::resume()
+hailo_status PipelinePad::clear_abort()
{
- return m_element.resume();
+ return m_element.clear_abort();
}
hailo_status PipelinePad::run_push(PipelineBuffer &&buffer)
return execute_deactivate();
}
-hailo_status PipelineElement::post_deactivate()
+hailo_status PipelineElement::post_deactivate(bool should_clear_abort)
{
- return execute_post_deactivate();
+ return execute_post_deactivate(should_clear_abort);
}
hailo_status PipelineElement::clear()
return execute_abort();
}
-hailo_status PipelineElement::resume()
+hailo_status PipelineElement::clear_abort()
{
- return execute_resume();
+ return execute_clear_abort();
}
hailo_status PipelineElement::wait_for_finish()
return execute([&](auto *pad){ return pad->deactivate(); });
}
-hailo_status PipelineElement::execute_post_deactivate()
+hailo_status PipelineElement::execute_post_deactivate(bool should_clear_abort)
{
- return execute([&](auto *pad){ return pad->post_deactivate(); });
+ return execute([&](auto *pad){ return pad->post_deactivate(should_clear_abort); });
}
hailo_status PipelineElement::execute_clear()
return execute([&](auto *pad){ return pad->abort(); });
}
-hailo_status PipelineElement::execute_resume()
+hailo_status PipelineElement::execute_clear_abort()
{
- return execute([&](auto *pad){ return pad->resume(); });
+ return execute([&](auto *pad){ return pad->clear_abort(); });
}
hailo_status PipelineElement::execute_wait_for_finish()
return HAILO_SUCCESS;
}
-hailo_status BaseQueueElement::execute_post_deactivate()
+hailo_status BaseQueueElement::execute_post_deactivate(bool should_clear_abort)
{
hailo_status status = m_deactivation_event.wait(INIFINITE_TIMEOUT());
if (HAILO_SUCCESS != status) {
LOGGER__ERROR("Failed to reset of deactivation event in {} with status {}", name(), status);
}
- return PipelineElement::execute_post_deactivate();
+ return PipelineElement::execute_post_deactivate(should_clear_abort);
}
hailo_status BaseQueueElement::execute_clear()
return m_activation_event.signal();
}
-hailo_status BaseQueueElement::execute_resume()
+hailo_status BaseQueueElement::execute_clear_abort()
{
auto status = m_shutdown_event->reset();
CHECK_SUCCESS(status);
m_pipeline_status->store(HAILO_SUCCESS);
- status = PipelineElement::execute_resume();
- CHECK_SUCCESS(status);
- return m_activation_event.signal();
+ return PipelineElement::execute_clear_abort();
}
hailo_status BaseQueueElement::set_timeout(std::chrono::milliseconds timeout)
LOGGER__INFO("Shutdown event was signaled in dequeue of queue element {}!", name());
return make_unexpected(HAILO_SHUTDOWN_EVENT_SIGNALED);
}
- CHECK_AS_EXPECTED(HAILO_TIMEOUT != output.status(), HAILO_TIMEOUT, "{} (D2H) failed with status={} (timeout={}ms)", name(), HAILO_TIMEOUT, m_timeout.count());
+ CHECK_AS_EXPECTED(HAILO_TIMEOUT != output.status(), HAILO_TIMEOUT, "{} (D2H) failed with status={} (timeout={}ms)",
+ name(), HAILO_TIMEOUT, m_timeout.count());
CHECK_EXPECTED(output);
CHECK_AS_EXPECTED(output->data() == optional.data(), HAILO_INTERNAL_FAILURE, "The buffer received in {} was not the same as the user buffer!", name());
}
m_is_activated = true;// TODO Should this always be true, no matter the status of source().activate()?
m_was_stream_aborted = false;
+
return PipelineElement::execute_activate();
}
return HAILO_SUCCESS;
}
-hailo_status BaseDemuxElement::execute_post_deactivate()
+hailo_status BaseDemuxElement::execute_post_deactivate(bool should_clear_abort)
{
for (uint32_t i = 0; i < m_was_source_called.size(); i++) {
m_was_source_called[i] = false;
}
- return PipelineElement::execute_post_deactivate();
+ return PipelineElement::execute_post_deactivate(should_clear_abort);
}
hailo_status BaseDemuxElement::execute_abort()
hailo_status activate();
hailo_status deactivate();
- hailo_status post_deactivate();
+ hailo_status post_deactivate(bool should_clear_abort);
hailo_status clear();
hailo_status flush();
hailo_status abort();
hailo_status wait_for_finish();
- hailo_status resume();
+ hailo_status clear_abort();
virtual hailo_status run_push(PipelineBuffer &&buffer);
virtual Expected<PipelineBuffer> run_pull(PipelineBuffer &&optional = PipelineBuffer());
void set_push_complete_callback(PushCompleteCallback push_complete_callback);
hailo_status activate();
hailo_status deactivate();
- hailo_status post_deactivate();
+ hailo_status post_deactivate(bool should_clear_abort);
hailo_status clear();
hailo_status flush();
hailo_status abort();
- hailo_status resume();
+ hailo_status clear_abort();
hailo_status wait_for_finish();
virtual hailo_status run_push(PipelineBuffer &&buffer) = 0;
virtual Expected<PipelineBuffer> run_pull(PipelineBuffer &&optional, const PipelinePad &source) = 0;
virtual std::vector<PipelinePad*> execution_pads() = 0;
virtual hailo_status execute_activate();
virtual hailo_status execute_deactivate();
- virtual hailo_status execute_post_deactivate();
+ virtual hailo_status execute_post_deactivate(bool should_clear_abort);
virtual hailo_status execute_clear();
virtual hailo_status execute_flush();
virtual hailo_status execute_abort();
- virtual hailo_status execute_resume();
+ virtual hailo_status execute_clear_abort();
virtual hailo_status execute_wait_for_finish();
virtual hailo_status execute(std::function<hailo_status(PipelinePad*)>);
hailo_status pipeline_status();
virtual hailo_status execute_activate() override;
- virtual hailo_status execute_post_deactivate() override;
+ virtual hailo_status execute_post_deactivate(bool should_clear_abort) override;
virtual hailo_status execute_clear() override;
- virtual hailo_status execute_resume() override;
+ virtual hailo_status execute_clear_abort() override;
virtual hailo_status execute_wait_for_finish() override;
/// Starts/stops the queue thread. This functions needs to be called on subclasses ctor and dtor
protected:
virtual hailo_status execute_activate() override;
virtual hailo_status execute_deactivate() override;
- virtual hailo_status execute_post_deactivate() override;
+ virtual hailo_status execute_post_deactivate(bool should_clear_abort) override;
virtual hailo_status execute_abort() override;
virtual Expected<std::vector<PipelineBuffer>> action(PipelineBuffer &&input) = 0;
virtual std::vector<PipelinePad*> execution_pads() override;
* @brief Implementation of the virtual stream
**/
+#include "common/utils.hpp"
#include "hailo/vstream.hpp"
#include "hailo/hailort_defaults.hpp"
+#include "hailo/hailort_common.hpp"
#include "common/runtime_statistics_internal.hpp"
Expected<std::shared_ptr<PostInferElement>> PostInferElement::create(const hailo_3d_image_shape_t &src_image_shape,
const hailo_format_t &src_format, const hailo_3d_image_shape_t &dst_image_shape, const hailo_format_t &dst_format,
const hailo_quant_info_t &dst_quant_info, const hailo_nms_info_t &nms_info, const std::string &name,
- hailo_pipeline_elem_stats_flags_t elem_flags, std::shared_ptr<std::atomic<hailo_status>> pipeline_status)
+ hailo_pipeline_elem_stats_flags_t elem_flags, std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
+ std::chrono::milliseconds timeout, hailo_vstream_stats_flags_t vstream_flags, EventPtr shutdown_event,
+ size_t buffer_pool_size)
{
+ auto frame_size = (dst_format.order == HAILO_FORMAT_ORDER_HAILO_NMS) ? HailoRTCommon::get_nms_host_frame_size(nms_info, dst_format) : HailoRTCommon::get_frame_size(dst_image_shape, dst_format);
+ auto buffer_pool_expected = BufferPool::create(frame_size, buffer_pool_size, shutdown_event, elem_flags, vstream_flags);
+ CHECK_EXPECTED(buffer_pool_expected, "Failed creating BufferPool for {}", name);
+
auto transform_context = OutputTransformContext::create(src_image_shape, src_format, dst_image_shape, dst_format,
dst_quant_info, nms_info);
CHECK_EXPECTED(transform_context, "Failed Creating OutputTransformContext");
CHECK_EXPECTED(duration_collector);
auto post_infer_elem_ptr = make_shared_nothrow<PostInferElement>(transform_context.release(),
- name, duration_collector.release(), std::move(pipeline_status));
+ name, duration_collector.release(), std::move(pipeline_status), buffer_pool_expected.release(), timeout);
CHECK_AS_EXPECTED(nullptr != post_infer_elem_ptr, HAILO_OUT_OF_HOST_MEMORY);
LOGGER__INFO("Created {}", post_infer_elem_ptr->name());
Expected<std::shared_ptr<PostInferElement>> PostInferElement::create(const hailo_3d_image_shape_t &src_image_shape, const hailo_format_t &src_format,
const hailo_3d_image_shape_t &dst_image_shape, const hailo_format_t &dst_format, const hailo_quant_info_t &dst_quant_info, const hailo_nms_info_t &nms_info,
- const std::string &name, const hailo_vstream_params_t &vstream_params, std::shared_ptr<std::atomic<hailo_status>> pipeline_status)
+ const std::string &name, const hailo_vstream_params_t &vstream_params, std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
+ EventPtr shutdown_event)
{
return PostInferElement::create(src_image_shape, src_format, dst_image_shape, dst_format, dst_quant_info, nms_info,
- name, vstream_params.pipeline_elements_stats_flags, pipeline_status);
+ name, vstream_params.pipeline_elements_stats_flags, pipeline_status, std::chrono::milliseconds(vstream_params.timeout_ms),
+ vstream_params.vstream_stats_flags, shutdown_event, vstream_params.queue_size);
}
PostInferElement::PostInferElement(std::unique_ptr<OutputTransformContext> &&transform_context, const std::string &name,
DurationCollector &&duration_collector,
- std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status) :
+ std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
+ BufferPoolPtr buffer_pool, std::chrono::milliseconds timeout) :
FilterElement(name, std::move(duration_collector), std::move(pipeline_status)),
- m_transform_context(std::move(transform_context))
+ m_transform_context(std::move(transform_context)),
+ m_pool(buffer_pool),
+ m_timeout(timeout)
{}
hailo_status PostInferElement::run_push(PipelineBuffer &&/*buffer*/)
Expected<PipelineBuffer> PostInferElement::action(PipelineBuffer &&input, PipelineBuffer &&optional)
{
- CHECK_AS_EXPECTED(optional, HAILO_INVALID_ARGUMENT, "Optional buffer must be valid in {}!", name());
+ auto buffer = m_pool->get_available_buffer(std::move(optional), m_timeout);
+ if (HAILO_SHUTDOWN_EVENT_SIGNALED == buffer.status()) {
+ return make_unexpected(buffer.status());
+ }
+ CHECK_EXPECTED(buffer, "{} (D2H) failed with status={}", name(), buffer.status());
// Note: The latency to be measured starts as the buffer is read from the HW (it's 'input' in this case)
- optional.set_metadata(input.get_metadata());
+ buffer->set_metadata(input.get_metadata());
- auto dst = optional.as_view();
+ auto dst = buffer->as_view();
m_duration_collector.start_measurement();
const auto status = m_transform_context->transform(input.as_view(), dst);
m_duration_collector.complete_measurement();
CHECK_SUCCESS_AS_EXPECTED(status);
- return std::move(optional);
+ return buffer.release();
+}
+
+std::vector<AccumulatorPtr> PostInferElement::get_queue_size_accumulators()
+{
+ if (nullptr == m_pool->get_queue_size_accumulator()) {
+ return std::vector<AccumulatorPtr>();
+ }
+ return {m_pool->get_queue_size_accumulator()};
}
static hailo_nms_info_t fuse_nms_info(const std::vector<hailo_nms_info_t> &nms_infos)
return outputs;
}
+Expected<std::shared_ptr<ArgmaxPostProcessElement>> ArgmaxPostProcessElement::create(std::shared_ptr<net_flow::Op> argmax_op,
+ const std::string &name, hailo_pipeline_elem_stats_flags_t elem_flags,
+ std::shared_ptr<std::atomic<hailo_status>> pipeline_status)
+{
+ auto duration_collector = DurationCollector::create(elem_flags);
+ CHECK_EXPECTED(duration_collector);
+ auto argmax_elem_ptr = make_shared_nothrow<ArgmaxPostProcessElement>(argmax_op,
+ name, duration_collector.release(), std::move(pipeline_status));
+ CHECK_AS_EXPECTED(nullptr != argmax_elem_ptr, HAILO_OUT_OF_HOST_MEMORY);
+ LOGGER__INFO("Created {}", argmax_elem_ptr->name());
+ return argmax_elem_ptr;
+}
+
+ArgmaxPostProcessElement::ArgmaxPostProcessElement(std::shared_ptr<net_flow::Op> argmax_op, const std::string &name,
+ DurationCollector &&duration_collector,
+ std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status) :
+ FilterElement(name, std::move(duration_collector), std::move(pipeline_status)),
+ m_argmax_op(argmax_op)
+{}
+
+hailo_status ArgmaxPostProcessElement::run_push(PipelineBuffer &&/*buffer*/)
+{
+ LOGGER__ERROR("ArgmaxPostProcessElement does not support run_push operation");
+ return HAILO_INVALID_OPERATION;
+}
+
+PipelinePad &ArgmaxPostProcessElement::next_pad()
+{
+ // Note: The next elem to be run is upstream from this elem (i.e. buffers are pulled)
+ return *m_sinks[0].prev();
+}
+
+std::string ArgmaxPostProcessElement::description() const
+{
+ std::stringstream element_description;
+ element_description << "(" << this->name() << " | " << m_argmax_op->get_op_description() << ")";
+ return element_description.str();
+}
+
+Expected<PipelineBuffer> ArgmaxPostProcessElement::action(PipelineBuffer &&input, PipelineBuffer &&optional)
+{
+ std::map<std::string, MemoryView> inputs;
+ std::map<std::string, MemoryView> outputs;
+ auto &input_name = m_argmax_op->inputs_metadata().begin()->first;
+ auto &output_name = m_argmax_op->outputs_metadata().begin()->first;
+ inputs.insert({input_name, input.as_view()});
+ outputs.insert({output_name, optional.as_view()});
+ m_duration_collector.start_measurement();
+ auto post_process_result = m_argmax_op->execute(inputs, outputs);
+ CHECK_SUCCESS_AS_EXPECTED(post_process_result);
+ m_duration_collector.complete_measurement();
+
+ return std::move(optional);
+}
+
+Expected<std::shared_ptr<SoftmaxPostProcessElement>> SoftmaxPostProcessElement::create(std::shared_ptr<net_flow::Op> softmax_op,
+ const std::string &name, hailo_pipeline_elem_stats_flags_t elem_flags,
+ std::shared_ptr<std::atomic<hailo_status>> pipeline_status)
+{
+ auto duration_collector = DurationCollector::create(elem_flags);
+ CHECK_EXPECTED(duration_collector);
+ auto softmax_elem_ptr = make_shared_nothrow<SoftmaxPostProcessElement>(softmax_op,
+ name, duration_collector.release(), std::move(pipeline_status));
+ CHECK_AS_EXPECTED(nullptr != softmax_elem_ptr, HAILO_OUT_OF_HOST_MEMORY);
+ LOGGER__INFO("Created {}", softmax_elem_ptr->name());
+ return softmax_elem_ptr;
+}
+
+SoftmaxPostProcessElement::SoftmaxPostProcessElement(std::shared_ptr<net_flow::Op> softmax_op, const std::string &name,
+ DurationCollector &&duration_collector,
+ std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status) :
+ FilterElement(name, std::move(duration_collector), std::move(pipeline_status)),
+ m_softmax_op(softmax_op)
+{}
+
+hailo_status SoftmaxPostProcessElement::run_push(PipelineBuffer &&/*buffer*/)
+{
+ LOGGER__ERROR("SoftmaxPostProcessElement does not support run_push operation");
+ return HAILO_INVALID_OPERATION;
+}
+
+PipelinePad &SoftmaxPostProcessElement::next_pad()
+{
+ // Note: The next elem to be run is upstream from this elem (i.e. buffers are pulled)
+ return *m_sinks[0].prev();
+}
+
+std::string SoftmaxPostProcessElement::description() const
+{
+ std::stringstream element_description;
+ element_description << "(" << this->name() << " | " << m_softmax_op->get_op_description() << ")";
+ return element_description.str();
+}
+
+Expected<PipelineBuffer> SoftmaxPostProcessElement::action(PipelineBuffer &&input, PipelineBuffer &&optional)
+{
+ std::map<std::string, MemoryView> inputs;
+ std::map<std::string, MemoryView> outputs;
+ auto &input_name = m_softmax_op->inputs_metadata().begin()->first;
+ auto &output_name = m_softmax_op->outputs_metadata().begin()->first;
+ inputs.insert({input_name, input.as_view()});
+ outputs.insert({output_name, optional.as_view()});
+ m_duration_collector.start_measurement();
+ auto post_process_result = m_softmax_op->execute(inputs, outputs);
+ CHECK_SUCCESS_AS_EXPECTED(post_process_result);
+ m_duration_collector.complete_measurement();
+
+ return std::move(optional);
+}
+
BaseVStream::BaseVStream(const hailo_vstream_info_t &vstream_info, const hailo_vstream_params_t &vstream_params,
std::shared_ptr<PipelineElement> pipeline_entry, std::vector<std::shared_ptr<PipelineElement>> &&pipeline,
std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
auto status = m_shutdown_event->reset();
CHECK_SUCCESS(status);
- LOGGER__DEBUG("Activating {}...", name());
- status = m_entry_element->activate();
- CHECK_SUCCESS(status);
-
status = resume();
CHECK(((status == HAILO_SUCCESS) || (status == HAILO_STREAM_NOT_ACTIVATED)), status,
"Failed to resume stream in {}", name());
+ LOGGER__DEBUG("Activating {}...", name());
+ status = m_entry_element->activate();
+ CHECK_SUCCESS(status);
+
m_is_activated = true;
return HAILO_SUCCESS;
}
hailo_status BaseVStream::abort()
{
+ auto status = m_entry_element->abort();
+ CHECK_SUCCESS(status);
m_is_aborted = true;
- return m_entry_element->abort();
+
+ return HAILO_SUCCESS;
}
hailo_status BaseVStream::resume()
{
+ auto status = m_entry_element->clear_abort();
+ CHECK_SUCCESS(status);
m_is_aborted = false;
- return m_entry_element->resume();
+
+ if (m_is_activated) {
+ status = m_entry_element->activate();
+ CHECK_SUCCESS(status);
+ }
+ return HAILO_SUCCESS;
}
hailo_status BaseVStream::stop_vstream()
LOGGER__WARNING("Failed deactivate of vstream {} status {}", name(), status);
}
- status = m_entry_element->post_deactivate();
+ // If VStream was aborted, do not clear low-level stream abortion,
+ // otherwise flush would be called on low-level stream d-tor when there is no receiver.
+ auto should_clear_abort = (!m_is_aborted);
+ status = m_entry_element->post_deactivate(should_clear_abort);
if (HAILO_SUCCESS != status) {
LOGGER__WARNING("Failed post deactivate of vstream {} status {}", name(), status);
}
hailo_status BaseVStream::stop_and_clear()
{
- auto status = m_core_op_activated_event->wait(std::chrono::milliseconds(0));
- CHECK(HAILO_TIMEOUT == status, HAILO_INVALID_OPERATION,
- "Trying to clear {} vstream before its network group is deactivated", name());
+ auto status = HAILO_SUCCESS;
+ if (nullptr != m_core_op_activated_event) {
+ status = m_core_op_activated_event->wait(std::chrono::milliseconds(0));
+ CHECK(HAILO_TIMEOUT == status, HAILO_INVALID_OPERATION,
+ "Trying to clear {} vstream before its network group is deactivated", name());
+ }
status = stop_vstream();
CHECK_SUCCESS(status);
LOGGER__TRACE("Overwritting current pipeline status {}", curr_pipeline_status);
m_pipeline_status->store(HAILO_SUCCESS);
}
-
- return HAILO_SUCCESS;
+
+ return status;
}
size_t BaseVStream::get_frame_size() const
return m_vstream->after_fork_in_child();
}
+bool InputVStream::is_aborted()
+{
+ return m_vstream->is_aborted();
+}
+
InputVStream::InputVStream(std::shared_ptr<InputVStreamInternal> vstream) : m_vstream(std::move(vstream)) {}
Expected<OutputVStream> OutputVStream::create(
return m_vstream->after_fork_in_child();
}
+bool OutputVStream::is_aborted()
+{
+ return m_vstream->is_aborted();
+}
+
OutputVStream::OutputVStream(std::shared_ptr<OutputVStreamInternal> vstream) : m_vstream(std::move(vstream)) {}
std::map<std::string, AccumulatorPtr> get_pipeline_accumulators_by_type(
InputVStreamImpl::~InputVStreamImpl()
{
(void)stop_vstream();
- if (m_is_aborted) {
- // If VStream was aborted, do not clear low-level stream abortion,
- // otherwise flush would be called on low-level stream d-tor when there is no receiver.
- (void)abort();
- }
}
hailo_status InputVStreamImpl::write(const MemoryView &buffer)
InputVStreamClient::~InputVStreamClient()
{
- auto reply = m_client->InputVStream_release(m_handle);
+ auto reply = m_client->InputVStream_release(m_handle, OsUtils::get_curr_pid());
if (reply != HAILO_SUCCESS) {
LOGGER__CRITICAL("InputVStream_release failed!");
}
return m_client->InputVStream_resume(m_handle);
}
+hailo_status InputVStreamClient::stop_and_clear()
+{
+ auto expected_client = HailoRtRpcClientUtils::create_client();
+ CHECK_EXPECTED_AS_STATUS(expected_client);
+ auto stop_and_clear_client = expected_client.release();
+
+ return stop_and_clear_client->InputVStream_stop_and_clear(m_handle);
+}
+
+hailo_status InputVStreamClient::start_vstream()
+{
+ auto expected_client = HailoRtRpcClientUtils::create_client();
+ CHECK_EXPECTED_AS_STATUS(expected_client);
+ auto start_vstream_client = expected_client.release();
+
+ return start_vstream_client->InputVStream_start_vstream(m_handle);
+}
+
size_t InputVStreamClient::get_frame_size() const
{
auto frame_size = m_client->InputVStream_get_frame_size(m_handle);
m_handle = expected_dup_handle.value();
return HAILO_SUCCESS;
}
+
+bool InputVStreamClient::is_aborted()
+{
+ auto is_aborted_exp = m_client->InputVStream_is_aborted(m_handle);
+ if (!is_aborted_exp) {
+ LOGGER__CRITICAL("InputVStream_is_aborted failed with status={}", is_aborted_exp.status());
+ return true;
+ }
+ return is_aborted_exp.release();
+}
+
#endif // HAILO_SUPPORT_MULTI_PROCESS
std::string InputVStreamInternal::get_pipeline_description() const
OutputVStreamImpl::~OutputVStreamImpl()
{
(void)stop_vstream();
- if (m_is_aborted) {
- // If VStream was aborted, do not clear low-level stream abortion,
- // otherwise flush would be called on low-level stream d-tor when there is no receiver.
- (void)abort();
- }
}
hailo_status OutputVStreamImpl::read(MemoryView buffer)
OutputVStreamClient::~OutputVStreamClient()
{
- auto reply = m_client->OutputVStream_release(m_handle);
+ auto reply = m_client->OutputVStream_release(m_handle, OsUtils::get_curr_pid());
if (reply != HAILO_SUCCESS) {
LOGGER__CRITICAL("OutputVStream_release failed!");
}
return m_client->OutputVStream_resume(m_handle);
}
+hailo_status OutputVStreamClient::stop_and_clear()
+{
+ auto expected_client = HailoRtRpcClientUtils::create_client();
+ CHECK_EXPECTED_AS_STATUS(expected_client);
+ auto stop_and_clear_client = expected_client.release();
+
+ return stop_and_clear_client->OutputVStream_stop_and_clear(m_handle);
+}
+
+hailo_status OutputVStreamClient::start_vstream()
+{
+ auto expected_client = HailoRtRpcClientUtils::create_client();
+ CHECK_EXPECTED_AS_STATUS(expected_client);
+ auto start_vstream_client = expected_client.release();
+
+ return start_vstream_client->OutputVStream_start_vstream(m_handle);
+}
+
size_t OutputVStreamClient::get_frame_size() const
{
auto frame_size = m_client->OutputVStream_get_frame_size(m_handle);
{
auto expected_name = m_client->OutputVStream_name(m_handle);
if (!expected_name) {
- LOGGER__CRITICAL("InputVStream_name failed with status={}", expected_name.status());
+ LOGGER__CRITICAL("OutputVStream_name failed with status={}", expected_name.status());
return "";
}
return expected_name.release();
{
auto expected_name = m_client->OutputVStream_network_name(m_handle);
if (!expected_name) {
- LOGGER__CRITICAL("InputVStream_name failed with status={}", expected_name.status());
+ LOGGER__CRITICAL("OutputVStream_name failed with status={}", expected_name.status());
return "";
}
return expected_name.release();
m_handle = expected_dup_handle.value();
return HAILO_SUCCESS;
}
+
+bool OutputVStreamClient::is_aborted()
+{
+ auto is_aborted_exp = m_client->OutputVStream_is_aborted(m_handle);
+ if (!is_aborted_exp) {
+ LOGGER__CRITICAL("OutputVStream_is_aborted failed with status={}", is_aborted_exp.status());
+ return true;
+ }
+ return is_aborted_exp.release();
+}
#endif // HAILO_SUPPORT_MULTI_PROCESS
Expected<std::shared_ptr<HwReadElement>> HwReadElement::create(std::shared_ptr<OutputStream> stream, const std::string &name, std::chrono::milliseconds timeout,
return element_description.str();
}
-hailo_status HwReadElement::execute_post_deactivate()
+hailo_status HwReadElement::execute_post_deactivate(bool should_clear_abort)
{
- auto status = m_stream->clear_abort();
- CHECK(((HAILO_SUCCESS == status) || (HAILO_STREAM_NOT_ACTIVATED == status)), status,
- "Failed to clear abort stream in {}", name());
+ if (should_clear_abort) {
+ auto status = m_stream->clear_abort();
+ CHECK(((HAILO_SUCCESS == status) || (HAILO_STREAM_NOT_ACTIVATED == status)), status,
+ "Failed to clear abort stream in {}", name());
+ }
return HAILO_SUCCESS;
}
return HAILO_SUCCESS;
}
-hailo_status HwReadElement::execute_resume()
+hailo_status HwReadElement::execute_clear_abort()
{
auto status = m_stream->clear_abort();
CHECK(((status == HAILO_SUCCESS) || (status == HAILO_STREAM_NOT_ACTIVATED)), status,
- "Failed to execute resume stream in {}", name());
+ "Failed to execute clear_abort stream in {}", name());
return HAILO_SUCCESS;
}
hailo_status flush_status = m_stream->flush();
if (HAILO_STREAM_ABORTED_BY_USER == flush_status) {
LOGGER__INFO("Failed flushing input stream {} because stream was aborted", m_stream->to_string());
- // TODO: HRT-3621
+ return HAILO_SUCCESS;
+ } else if (HAILO_STREAM_NOT_ACTIVATED == flush_status) {
+ LOGGER__INFO("Failed flushing input stream {} because stream is not activated", m_stream->to_string());
return HAILO_SUCCESS;
} else if (HAILO_SUCCESS != flush_status) {
LOGGER__ERROR("flush has failed in {} with status {}", name(), flush_status);
return HAILO_SUCCESS;
}
-hailo_status HwWriteElement::execute_post_deactivate()
+hailo_status HwWriteElement::execute_post_deactivate(bool should_clear_abort)
{
- auto status = m_stream->clear_abort();
- CHECK(((status == HAILO_SUCCESS) || (status == HAILO_STREAM_NOT_ACTIVATED)), status,
- "Failed to clear abort stream in {}", name());
+ if (should_clear_abort) {
+ auto status = m_stream->clear_abort();
+ CHECK(((status == HAILO_SUCCESS) || (status == HAILO_STREAM_NOT_ACTIVATED)), status,
+ "Failed to clear abort stream in {}", name());
+ }
return HAILO_SUCCESS;
}
return HAILO_SUCCESS;
}
-hailo_status HwWriteElement::execute_resume()
+hailo_status HwWriteElement::execute_clear_abort()
{
auto status = m_stream->clear_abort();
CHECK(((status == HAILO_SUCCESS) || (status == HAILO_STREAM_NOT_ACTIVATED)), status,
- "Failed to execute resume stream in {}", name());
+ "Failed to execute clear_abort stream in {}", name());
return HAILO_SUCCESS;
}
static hailo_vstream_params_t expand_vstream_params_autos(const hailo_stream_info_t &stream_info,
const hailo_vstream_params_t &vstream_params)
{
+ if (HAILO_FORMAT_ORDER_HAILO_NMS == stream_info.format.order) {
+ // TODO (HRT-11082): On NMS, return error if UINT16
+ if (HAILO_FORMAT_TYPE_UINT16 == vstream_params.user_buffer_format.type) {
+ LOGGER__WARNING("Passing 'HAILO_FORMAT_TYPE_UINT16' for NMS output is deprecated and will soon be unsupported. "\
+ "One should use HAILO_FORMAT_TYPE_FLOAT32");
+ }
+ }
auto local_vstream_params = vstream_params;
local_vstream_params.user_buffer_format = HailoRTDefaults::expand_auto_format(vstream_params.user_buffer_format,
stream_info.format);
return local_vstream_params;
}
+static hailo_vstream_params_t expand_vstream_params_autos_argmax(const hailo_vstream_params_t &vstream_params,
+ hailo_format_t &op_input_format)
+{
+ auto local_vstream_params = vstream_params;
+ if (local_vstream_params.user_buffer_format.type == HAILO_FORMAT_TYPE_AUTO) {
+ local_vstream_params.user_buffer_format.type = op_input_format.type;
+ }
+ if (local_vstream_params.user_buffer_format.order == HAILO_FORMAT_ORDER_AUTO) {
+ if (op_input_format.order == HAILO_FORMAT_ORDER_NHCW || op_input_format.order == HAILO_FORMAT_ORDER_NHWC) {
+ local_vstream_params.user_buffer_format.order = HAILO_FORMAT_ORDER_NHW;
+ }
+ if (op_input_format.order == HAILO_FORMAT_ORDER_NC) {
+ local_vstream_params.user_buffer_format.order = HAILO_FORMAT_ORDER_NC;
+ }
+ }
+ return local_vstream_params;
+}
+
+static hailo_vstream_params_t expand_vstream_params_autos_softmax(const hailo_vstream_params_t &vstream_params,
+ hailo_format_t &op_input_format)
+{
+ auto local_vstream_params = vstream_params;
+ // Type should be float32, after de-quantization, and order NHWC or NC in softmax
+ if (local_vstream_params.user_buffer_format.type == HAILO_FORMAT_TYPE_AUTO) {
+ local_vstream_params.user_buffer_format.type = HAILO_FORMAT_TYPE_FLOAT32;
+ }
+ if (local_vstream_params.user_buffer_format.order == HAILO_FORMAT_ORDER_AUTO) {
+ local_vstream_params.user_buffer_format.order = op_input_format.order;
+ }
+ return local_vstream_params;
+}
+
Expected<std::vector<InputVStream>> VStreamsBuilder::create_input_vstreams(ConfiguredNetworkGroup &net_group,
const std::map<std::string, hailo_vstream_params_t> &inputs_params)
{
CHECK_AS_EXPECTED(!(hw_read_stream_stats_flags & HAILO_VSTREAM_STATS_MEASURE_FPS), HAILO_NOT_IMPLEMENTED,
"Pipeline FPS statistics measurement is not implemented");
- auto hw_read_elem = HwReadElement::create(output_stream,
- PipelineObject::create_element_name("HwReadElement", output_stream->name(), output_stream->get_info().index),
- HAILO_INFINITE_TIMEOUT, buffer_pool_size, hw_read_element_stats_flags, hw_read_stream_stats_flags, shutdown_event, pipeline_status);
- CHECK_EXPECTED(hw_read_elem);
- elements.push_back(hw_read_elem.value());
+ auto hw_read_element = add_hw_read_element(output_stream, pipeline_status, elements, "HwReadElement", shutdown_event,
+ buffer_pool_size, hw_read_element_stats_flags, hw_read_stream_stats_flags);
+ CHECK_EXPECTED(hw_read_element);
if (output_stream->get_info().is_mux) {
- hailo_status status = add_demux(output_stream, vstreams_params_map, std::move(elements), vstreams, hw_read_elem.value(),
+ hailo_status status = add_demux(output_stream, vstreams_params_map, std::move(elements), vstreams, hw_read_element.value(),
shutdown_event, pipeline_status, output_vstream_infos);
CHECK_SUCCESS_AS_EXPECTED(status);
} else {
auto vstream_info = output_vstream_infos.find(output_stream->name());
CHECK_AS_EXPECTED(vstream_info != output_vstream_infos.end(), HAILO_NOT_FOUND,
"Failed to find vstream info of {}", output_stream->name());
-
assert(1 == vstreams_params_map.size());
auto vstream_params = expand_vstream_params_autos(output_stream->get_info(), vstreams_params_map.begin()->second);
vstream_params.user_buffer_format, output_stream->get_info().quant_info);
if (should_transform) {
- auto hw_read_queue_elem = PullQueueElement::create(
- PipelineObject::create_element_name("PullQueueElement_hw_read", output_stream->name(), output_stream->get_info().index),
- vstream_params, shutdown_event, pipeline_status);
- CHECK_EXPECTED(hw_read_queue_elem);
- elements.push_back(hw_read_queue_elem.value());
- CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(hw_read_elem.value(), hw_read_queue_elem.value()));
-
- auto post_infer_elem = PostInferElement::create(output_stream->get_info().hw_shape, output_stream->get_info().format,
- output_stream->get_info().shape, vstream_params.user_buffer_format, output_stream->get_info().quant_info, output_stream->get_info().nms_info,
- PipelineObject::create_element_name("PostInferElement", output_stream->name(), output_stream->get_info().index),
- vstream_params, pipeline_status);
- CHECK_EXPECTED(post_infer_elem);
- elements.push_back(post_infer_elem.value());
- CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(hw_read_queue_elem.value(), post_infer_elem.value()));
-
- auto post_infer_queue_elem = UserBufferQueueElement::create(
- PipelineObject::create_element_name("UserBufferQueueElement_post_infer", output_stream->name(), output_stream->get_info().index),
- vstream_params, shutdown_event, pipeline_status);
- CHECK_EXPECTED(post_infer_queue_elem);
- elements.push_back(post_infer_queue_elem.value());
- CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(post_infer_elem.value(), post_infer_queue_elem.value()));
-
+ auto hw_read_queue_element = add_pull_queue_element(output_stream, pipeline_status, elements, "PullQueueElement_hw_read",
+ shutdown_event, vstream_params);
+ CHECK_EXPECTED(hw_read_queue_element);
+ CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(hw_read_element.value(), hw_read_queue_element.value()));
+
+ auto post_infer_element = add_post_infer_element(output_stream, pipeline_status, elements,
+ "PostInferElement", vstream_params, shutdown_event);
+ CHECK_EXPECTED(post_infer_element);
+ CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(hw_read_queue_element.value(), post_infer_element.value()));
+ auto user_buffer_queue_element = add_user_buffer_queue_element(output_stream, pipeline_status, elements,
+ "UserBufferQueueElement", shutdown_event, vstream_params);
+ CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(post_infer_element.value(), user_buffer_queue_element.value()));
output_stream->set_timeout(std::chrono::milliseconds(HAILO_INFINITE));
- hw_read_queue_elem->get()->set_timeout(std::chrono::milliseconds(HAILO_INFINITE));
- auto vstream = OutputVStream::create(vstream_info->second, vstream_params, post_infer_queue_elem.release(), std::move(elements),
+ hw_read_queue_element->get()->set_timeout(std::chrono::milliseconds(HAILO_INFINITE));
+ auto vstream = OutputVStream::create(vstream_info->second, vstream_params, user_buffer_queue_element.release(), std::move(elements),
std::move(pipeline_status), shutdown_event, core_op_activated_event, pipeline_latency_accumulator.release());
CHECK_EXPECTED(vstream);
vstreams.emplace_back(vstream.release());
} else {
output_stream->set_timeout(std::chrono::milliseconds(vstream_params.timeout_ms));
- auto vstream = OutputVStream::create(vstream_info->second, vstream_params, hw_read_elem.release(), std::move(elements),
+ auto vstream = OutputVStream::create(vstream_info->second, vstream_params, hw_read_element.release(), std::move(elements),
std::move(pipeline_status), shutdown_event, core_op_activated_event, pipeline_latency_accumulator.release());
CHECK_EXPECTED(vstream);
vstreams.emplace_back(vstream.release());
return vstreams;
}
+Expected<std::vector<OutputVStream>> VStreamsBuilderUtils::create_output_post_process_softmax(std::shared_ptr<OutputStream> output_stream,
+ const NameToVStreamParamsMap &vstreams_params_map, const hailo_vstream_info_t &output_vstream_info, const NetFlowElement &softmax_op)
+{
+ std::vector<std::shared_ptr<PipelineElement>> elements;
+ std::vector<OutputVStream> vstreams;
+
+ EventPtr core_op_activated_event = nullptr;
+ if (!output_stream->is_scheduled()) {
+ core_op_activated_event = output_stream->get_core_op_activated_event();
+ }
+
+ auto shutdown_event = Event::create_shared(Event::State::not_signalled);
+ CHECK_AS_EXPECTED(nullptr != shutdown_event, HAILO_OUT_OF_HOST_MEMORY);
+
+ auto pipeline_status = make_shared_nothrow<std::atomic<hailo_status>>(HAILO_SUCCESS);
+ CHECK_AS_EXPECTED(nullptr != pipeline_status, HAILO_OUT_OF_HOST_MEMORY);
+
+ assert(!vstreams_params_map.empty());
+
+ // Note: In case of multiple values in vstreams_params_map (e.g. in the case of demux), we'll set the
+ // pipeline_elements_stats_flags for the hw_read_element as bitwise or of all the flags.
+ hailo_pipeline_elem_stats_flags_t hw_read_element_stats_flags = HAILO_PIPELINE_ELEM_STATS_NONE;
+ hailo_vstream_stats_flags_t hw_read_stream_stats_flags = HAILO_VSTREAM_STATS_NONE;
+ size_t buffer_pool_size = 0;
+ for (const auto &elem_name_params : vstreams_params_map) {
+ hw_read_element_stats_flags |= elem_name_params.second.pipeline_elements_stats_flags;
+ hw_read_stream_stats_flags |= elem_name_params.second.vstream_stats_flags;
+ buffer_pool_size += elem_name_params.second.queue_size;
+ }
+
+ // TODO (HRT-4522): Support this measurement
+ CHECK_AS_EXPECTED(!(hw_read_stream_stats_flags & HAILO_VSTREAM_STATS_MEASURE_FPS), HAILO_NOT_IMPLEMENTED,
+ "Pipeline FPS statistics measurement is not implemented");
+
+ assert(1 == vstreams_params_map.size());
+ auto op_input_format = softmax_op.op->inputs_metadata().begin()->second.format;
+ auto vstream_params = expand_vstream_params_autos_softmax(vstreams_params_map.begin()->second, op_input_format);
+ if (HAILO_FORMAT_FLAGS_QUANTIZED & vstream_params.user_buffer_format.flags) {
+ vstream_params.user_buffer_format.flags &= ~HAILO_FORMAT_FLAGS_QUANTIZED;
+ LOGGER__WARNING("Note: The output_vstream {} format flag is marked as quantized, which is not supported with {}. "
+ "flag has been automatically set to False.", softmax_op.output_vstream_info.name, softmax_op.op->get_name());
+ }
+
+ auto pipeline_latency_accumulator = create_pipeline_latency_accumulator(vstream_params);
+ CHECK_EXPECTED(pipeline_latency_accumulator);
+
+ auto hw_read_element = add_hw_read_element(output_stream, pipeline_status, elements, "HwReadElement", shutdown_event,
+ buffer_pool_size, hw_read_element_stats_flags, hw_read_stream_stats_flags);
+ CHECK_EXPECTED(hw_read_element);
+
+ auto hw_read_queue_element = add_pull_queue_element(output_stream, pipeline_status, elements, "PullQueueElement_hw_read",
+ shutdown_event, vstream_params);
+ CHECK_EXPECTED(hw_read_queue_element);
+ CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(hw_read_element.value(), hw_read_queue_element.value()));
+
+ auto post_infer_element = add_post_infer_element(output_stream, pipeline_status, elements,
+ "PostInferElement", vstream_params, shutdown_event);
+ CHECK_EXPECTED(post_infer_element);
+ CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(hw_read_queue_element.value(), post_infer_element.value()));
+
+ auto pre_softmax_queue_element = add_pull_queue_element(output_stream, pipeline_status, elements, "PullQueueElement_pre_softmax",
+ shutdown_event, vstream_params);
+ CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(post_infer_element.value(), pre_softmax_queue_element.value()));
+
+ auto softmax_element = add_softmax_element(output_stream, pipeline_status, elements, "SoftmaxPostProcessElement",
+ vstream_params, softmax_op);
+ CHECK_EXPECTED(softmax_element);
+ CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(pre_softmax_queue_element.value(), softmax_element.value()));
+ auto user_buffer_queue_element = add_user_buffer_queue_element(output_stream, pipeline_status, elements,
+ "UserBufferQueueElement", shutdown_event, vstream_params);
+ CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(softmax_element.value(), user_buffer_queue_element.value()));
+ output_stream->set_timeout(std::chrono::milliseconds(HAILO_INFINITE));
+ hw_read_queue_element->get()->set_timeout(std::chrono::milliseconds(HAILO_INFINITE));
+ auto vstream = OutputVStream::create(output_vstream_info, vstream_params, user_buffer_queue_element.release(), std::move(elements),
+ std::move(pipeline_status), shutdown_event, core_op_activated_event, pipeline_latency_accumulator.release());
+ CHECK_EXPECTED(vstream);
+ vstreams.emplace_back(vstream.release());
+
+ for (const auto &curr_vstream : vstreams) {
+ LOGGER__INFO("{}", curr_vstream.get_pipeline_description());
+ }
+
+ return vstreams;
+}
+
InputVStream VStreamsBuilderUtils::create_input(std::shared_ptr<InputVStreamInternal> input_vstream)
{
return InputVStream(std::move(input_vstream));
return ((format1.order == format2.order) && (format1.flags == format2.flags) && (format1.type == format2.type));
}
+Expected<std::vector<OutputVStream>> VStreamsBuilderUtils::create_output_vstreams_from_streams(const OutputStreamWithParamsVector &all_output_streams,
+ OutputStreamPtrVector &output_streams, const hailo_vstream_params_t &vstream_params,
+ const std::unordered_map<std::string, std::shared_ptr<NetFlowElement>> &post_process_ops,
+ const std::unordered_map<stream_name_t, op_name_t> &op_inputs_to_op_name, const std::map<std::string, hailo_vstream_info_t> &output_vstream_infos_map)
+{
+ auto first_stream_info = output_streams[0]->get_info();
+ if ((HAILO_FORMAT_ORDER_HAILO_NMS == first_stream_info.format.order) &&
+ (first_stream_info.nms_info.is_defused)) {
+ // Case defuse NMS
+ return create_output_nms(output_streams, vstream_params, output_vstream_infos_map);
+ } else if (contains(op_inputs_to_op_name, static_cast<stream_name_t>(first_stream_info.name))) {
+ // Case post-process on host
+ auto &op_name = op_inputs_to_op_name.at(first_stream_info.name);
+ auto &op = post_process_ops.at(op_name);
+ switch (op.get()->op_type) {
+ case HAILO_NET_FLOW_OP_TYPE_NMS:
+ {
+ assert(1 <= op->op->outputs_metadata().size());
+ auto updated_outputs_metadata = op->op->outputs_metadata();
+ updated_outputs_metadata.begin()->second.format = vstream_params.user_buffer_format;
+ if (HAILO_FORMAT_ORDER_AUTO == updated_outputs_metadata.begin()->second.format.order) {
+ updated_outputs_metadata.begin()->second.format.order = HAILO_FORMAT_ORDER_HAILO_NMS;
+ }
+ if (HAILO_FORMAT_TYPE_AUTO == updated_outputs_metadata.begin()->second.format.type) {
+ updated_outputs_metadata.begin()->second.format.type = HAILO_FORMAT_TYPE_FLOAT32;
+ }
+ if (HAILO_FORMAT_FLAGS_QUANTIZED & updated_outputs_metadata.begin()->second.format.flags) {
+ updated_outputs_metadata.begin()->second.format.flags &= ~HAILO_FORMAT_FLAGS_QUANTIZED;
+ LOGGER__WARNING("Note: The output_vstream {} format flag is marked as quantized, which is not supported with {}. "
+ "flag has been automatically set to False.", op->output_vstream_info.name, op->op->get_name());
+ }
+
+ op->op->set_outputs_metadata(updated_outputs_metadata);
+ CHECK_SUCCESS_AS_EXPECTED(op->op->validate_metadata());
+ return create_output_post_process_nms(output_streams, vstream_params, output_vstream_infos_map, *op);
+ }
+
+ case HAILO_NET_FLOW_OP_TYPE_ARGMAX:
+ {
+ assert(output_streams.size() == 1);
+ NameToVStreamParamsMap name_to_vstream_params_map;
+ for (auto &output_stream : all_output_streams) {
+ if (output_stream.first->get_info().name == output_streams[0]->get_info().name) {
+ for (auto &vstream : output_stream.second) {
+ name_to_vstream_params_map.insert(vstream);
+ }
+ }
+ }
+ auto output_vstream_info = output_vstream_infos_map.at(op.get()->name);
+ return create_output_post_process_argmax(output_streams[0], name_to_vstream_params_map, output_vstream_info, *op);
+ }
+
+ case HAILO_NET_FLOW_OP_TYPE_SOFTMAX:
+ {
+ assert(output_streams.size() == 1);
+ NameToVStreamParamsMap name_to_vstream_params_map;
+ for (auto &output_stream : all_output_streams) {
+ if (output_stream.first->get_info().name == output_streams[0]->get_info().name) {
+ for (auto &vstream : output_stream.second) {
+ name_to_vstream_params_map.insert(vstream);
+ }
+ }
+ }
+ auto output_vstream_info = output_vstream_infos_map.at(op.get()->name);
+ return create_output_post_process_softmax(output_streams[0], name_to_vstream_params_map, output_vstream_info, *op);
+ }
+
+ default:
+ LOGGER__ERROR("op type {} of op {} is not in any of the supported post process OP types", op.get()->op_type, op_name);
+ return make_unexpected(HAILO_INVALID_OPERATION);
+ }
+ } else {
+ // All other cases
+ assert(output_streams.size() == 1);
+ NameToVStreamParamsMap name_to_vstream_params_map;
+ for (auto &output_stream : all_output_streams) {
+ if (output_stream.first->get_info().name == output_streams[0]->get_info().name) {
+ for (auto &vstream : output_stream.second) {
+ name_to_vstream_params_map.insert(vstream);
+ }
+ }
+ }
+ return create_outputs(output_streams[0], name_to_vstream_params_map, output_vstream_infos_map);
+ }
+}
+
Expected<std::vector<OutputVStream>> VStreamsBuilderUtils::create_output_nms(OutputStreamPtrVector &output_streams,
hailo_vstream_params_t vstreams_params,
const std::map<std::string, hailo_vstream_info_t> &output_vstream_infos)
return vstreams;
}
+Expected<std::shared_ptr<HwReadElement>> VStreamsBuilderUtils::add_hw_read_element(std::shared_ptr<OutputStream> &output_stream,
+ std::shared_ptr<std::atomic<hailo_status>> &pipeline_status, std::vector<std::shared_ptr<PipelineElement>> &elements,
+ const std::string &element_name, EventPtr &shutdown_event, size_t buffer_pool_size,
+ const hailo_pipeline_elem_stats_flags_t &hw_read_element_stats_flags, const hailo_vstream_stats_flags_t &hw_read_stream_stats_flags)
+{
+ auto hw_read_elem = HwReadElement::create(output_stream,
+ PipelineObject::create_element_name(element_name, output_stream->name(), output_stream->get_info().index),
+ HAILO_INFINITE_TIMEOUT, buffer_pool_size, hw_read_element_stats_flags, hw_read_stream_stats_flags, shutdown_event, pipeline_status);
+ CHECK_EXPECTED(hw_read_elem);
+ elements.push_back(hw_read_elem.value());
+ return hw_read_elem;
+}
+
+Expected<std::shared_ptr<PullQueueElement>> VStreamsBuilderUtils::add_pull_queue_element(std::shared_ptr<OutputStream> &output_stream,
+ std::shared_ptr<std::atomic<hailo_status>> &pipeline_status, std::vector<std::shared_ptr<PipelineElement>> &elements,
+ const std::string &element_name, EventPtr &shutdown_event, const hailo_vstream_params_t &vstream_params)
+{
+ auto pull_queue_elem = PullQueueElement::create(
+ PipelineObject::create_element_name(element_name, output_stream->name(), output_stream->get_info().index),
+ vstream_params, shutdown_event, pipeline_status);
+ CHECK_EXPECTED(pull_queue_elem);
+ elements.push_back(pull_queue_elem.value());
+ return pull_queue_elem;
+}
+
+Expected<std::shared_ptr<ArgmaxPostProcessElement>> VStreamsBuilderUtils::add_argmax_element(std::shared_ptr<OutputStream> &output_stream,
+ std::shared_ptr<std::atomic<hailo_status>> &pipeline_status, std::vector<std::shared_ptr<PipelineElement>> &elements,
+ const std::string &element_name, hailo_vstream_params_t &vstream_params, const NetFlowElement &argmax_op)
+{
+ // Updating metadata according to user request. TODO: HRT-9737
+ auto updated_outputs_metadata = argmax_op.op.get()->outputs_metadata();
+ updated_outputs_metadata.begin()->second.format = vstream_params.user_buffer_format;
+ argmax_op.op.get()->set_outputs_metadata(updated_outputs_metadata);
+ CHECK_SUCCESS_AS_EXPECTED(argmax_op.op.get()->validate_metadata());
+ // Updating metadata according to use request. TODO: HRT-9737 - End
+ auto argmax_element = ArgmaxPostProcessElement::create(argmax_op.op,
+ PipelineObject::create_element_name(element_name, output_stream->name(), output_stream->get_info().index),
+ vstream_params.pipeline_elements_stats_flags, pipeline_status);
+ CHECK_EXPECTED(argmax_element);
+ elements.push_back(argmax_element.value());
+ return argmax_element;
+}
+
+Expected<std::shared_ptr<SoftmaxPostProcessElement>> VStreamsBuilderUtils::add_softmax_element(std::shared_ptr<OutputStream> &output_stream,
+ std::shared_ptr<std::atomic<hailo_status>> &pipeline_status, std::vector<std::shared_ptr<PipelineElement>> &elements,
+ const std::string &element_name, hailo_vstream_params_t &vstream_params, const NetFlowElement &softmax_op)
+{
+ // Updating metadata according to user request. TODO: HRT-9737
+ // Currently softmax only supports inputs to be float32 and order NHWC or NC
+ auto updated_inputs_metadata = softmax_op.op.get()->inputs_metadata();
+ updated_inputs_metadata.begin()->second.format = vstream_params.user_buffer_format;
+ softmax_op.op.get()->set_inputs_metadata(updated_inputs_metadata);
+
+ auto updated_outputs_metadata = softmax_op.op.get()->outputs_metadata();
+ updated_outputs_metadata.begin()->second.format = vstream_params.user_buffer_format;
+ softmax_op.op.get()->set_outputs_metadata(updated_outputs_metadata);
+ CHECK_SUCCESS_AS_EXPECTED(softmax_op.op.get()->validate_metadata());
+ // Updating metadata according to use request. TODO: HRT-9737 - End
+ auto softmax_element = SoftmaxPostProcessElement::create(softmax_op.op,
+ PipelineObject::create_element_name(element_name, output_stream->name(), output_stream->get_info().index),
+ vstream_params.pipeline_elements_stats_flags, pipeline_status);
+ CHECK_EXPECTED(softmax_element);
+ elements.push_back(softmax_element.value());
+ return softmax_element;
+}
+
+Expected<std::shared_ptr<UserBufferQueueElement>> VStreamsBuilderUtils::add_user_buffer_queue_element(std::shared_ptr<OutputStream> &output_stream,
+ std::shared_ptr<std::atomic<hailo_status>> &pipeline_status, std::vector<std::shared_ptr<PipelineElement>> &elements,
+ const std::string &element_name, EventPtr &shutdown_event, const hailo_vstream_params_t &vstream_params)
+{
+ auto post_argmax_queue_element = UserBufferQueueElement::create(
+ PipelineObject::create_element_name(element_name, output_stream->name(), output_stream->get_info().index),
+ vstream_params, shutdown_event, pipeline_status);
+ CHECK_EXPECTED(post_argmax_queue_element);
+ elements.push_back(post_argmax_queue_element.value());
+ return post_argmax_queue_element;
+}
+
+Expected<std::shared_ptr<PostInferElement>> VStreamsBuilderUtils::add_post_infer_element(std::shared_ptr<OutputStream> &output_stream,
+ std::shared_ptr<std::atomic<hailo_status>> &pipeline_status, std::vector<std::shared_ptr<PipelineElement>> &elements,
+ const std::string &element_name, const hailo_vstream_params_t &vstream_params, EventPtr shutdown_event)
+{
+ auto post_infer_element = PostInferElement::create(output_stream->get_info().hw_shape, output_stream->get_info().format,
+ output_stream->get_info().shape, vstream_params.user_buffer_format, output_stream->get_info().quant_info, output_stream->get_info().nms_info,
+ PipelineObject::create_element_name(element_name, output_stream->name(), output_stream->get_info().index),
+ vstream_params, pipeline_status, shutdown_event);
+ CHECK_EXPECTED(post_infer_element);
+ elements.push_back(post_infer_element.value());
+ return post_infer_element;
+}
+
+Expected<std::vector<OutputVStream>> VStreamsBuilderUtils::create_output_post_process_argmax(std::shared_ptr<OutputStream> output_stream,
+ const NameToVStreamParamsMap &vstreams_params_map, const hailo_vstream_info_t &output_vstream_info, const NetFlowElement &argmax_op)
+{
+ std::vector<std::shared_ptr<PipelineElement>> elements;
+ std::vector<OutputVStream> vstreams;
+
+ EventPtr core_op_activated_event = nullptr;
+ if (!output_stream->is_scheduled()) {
+ core_op_activated_event = output_stream->get_core_op_activated_event();
+ }
+
+ auto shutdown_event = Event::create_shared(Event::State::not_signalled);
+ CHECK_AS_EXPECTED(nullptr != shutdown_event, HAILO_OUT_OF_HOST_MEMORY);
+
+ auto pipeline_status = make_shared_nothrow<std::atomic<hailo_status>>(HAILO_SUCCESS);
+ CHECK_AS_EXPECTED(nullptr != pipeline_status, HAILO_OUT_OF_HOST_MEMORY);
+
+ assert(!vstreams_params_map.empty());
+
+ // Note: In case of multiple values in vstreams_params_map (e.g. in the case of demux), we'll set the
+ // pipeline_elements_stats_flags for the hw_read_element as bitwise or of all the flags.
+ hailo_pipeline_elem_stats_flags_t hw_read_element_stats_flags = HAILO_PIPELINE_ELEM_STATS_NONE;
+ hailo_vstream_stats_flags_t hw_read_stream_stats_flags = HAILO_VSTREAM_STATS_NONE;
+ size_t buffer_pool_size = 0;
+ for (const auto &elem_name_params : vstreams_params_map) {
+ hw_read_element_stats_flags |= elem_name_params.second.pipeline_elements_stats_flags;
+ hw_read_stream_stats_flags |= elem_name_params.second.vstream_stats_flags;
+ buffer_pool_size += elem_name_params.second.queue_size;
+ }
+
+ // TODO (HRT-4522): Support this measurement
+ CHECK_AS_EXPECTED(!(hw_read_stream_stats_flags & HAILO_VSTREAM_STATS_MEASURE_FPS), HAILO_NOT_IMPLEMENTED,
+ "Pipeline FPS statistics measurement is not implemented");
+
+ auto hw_read_element = add_hw_read_element(output_stream, pipeline_status, elements, "HwReadElement", shutdown_event,
+ buffer_pool_size, hw_read_element_stats_flags, hw_read_stream_stats_flags);
+ CHECK_EXPECTED(hw_read_element);
+
+ assert(1 == vstreams_params_map.size());
+ auto op_input_format = argmax_op.op->inputs_metadata().begin()->second.format;
+ auto vstream_params = expand_vstream_params_autos_argmax(vstreams_params_map.begin()->second, op_input_format);
+
+ auto hw_read_queue_element = add_pull_queue_element(output_stream, pipeline_status, elements, "PullQueueElement_hw_read",
+ shutdown_event, vstream_params);
+ CHECK_EXPECTED(hw_read_queue_element);
+
+ CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(hw_read_element.value(), hw_read_queue_element.value()));
+
+ auto argmax_element = add_argmax_element(output_stream, pipeline_status, elements, "ArgmaxPostProcessElement",
+ vstream_params, argmax_op);
+ CHECK_EXPECTED(argmax_element);
+
+ CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(hw_read_queue_element.value(), argmax_element.value()));
+
+ auto post_argmax_queue_element = add_user_buffer_queue_element(output_stream, pipeline_status, elements,
+ "UserBufferQueueElement_post_argmax", shutdown_event, vstream_params);
+ CHECK_EXPECTED(post_argmax_queue_element);
+
+ CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(argmax_element.value(), post_argmax_queue_element.value()));
+
+ auto pipeline_latency_accumulator = create_pipeline_latency_accumulator(vstream_params);
+ CHECK_EXPECTED(pipeline_latency_accumulator);
+
+ output_stream->set_timeout(std::chrono::milliseconds(HAILO_INFINITE));
+ hw_read_queue_element->get()->set_timeout(std::chrono::milliseconds(HAILO_INFINITE));
+ auto vstream = OutputVStream::create(output_vstream_info, vstream_params, post_argmax_queue_element.release(), std::move(elements),
+ std::move(pipeline_status), shutdown_event, core_op_activated_event, pipeline_latency_accumulator.release());
+ CHECK_EXPECTED(vstream);
+ vstreams.emplace_back(vstream.release());
+
+ for (const auto ¤t_vstream : vstreams) {
+ LOGGER__INFO("{}", current_vstream.get_pipeline_description());
+ }
+
+ return vstreams;
+}
+
hailo_status VStreamsBuilderUtils::add_demux(std::shared_ptr<OutputStream> output_stream, NameToVStreamParamsMap &vstreams_params_map,
std::vector<std::shared_ptr<PipelineElement>> &&base_elements, std::vector<OutputVStream> &vstreams,
std::shared_ptr<HwReadElement> hw_read_elem, EventPtr shutdown_event, std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
auto post_infer_elem = PostInferElement::create(edge_info.hw_shape, edge_info.format,
edge_info.shape, vstream_params.user_buffer_format, edge_info.quant_info, edge_info.nms_info,
PipelineObject::create_element_name("PostInferElement", edge_info.name, edge_info.index),
- vstream_params, pipeline_status);
+ vstream_params, pipeline_status, shutdown_event);
CHECK_EXPECTED_AS_STATUS(post_infer_elem);
current_vstream_elements.push_back(post_infer_elem.value());
CHECK_SUCCESS(PipelinePad::link_pads(demux_queue_elem.value(), post_infer_elem.value()));
auto vstream_info = output_vstream_infos.find(fused_layer_name);
CHECK(vstream_info != output_vstream_infos.end(), HAILO_NOT_FOUND,
- "Failed to find vstream info of {}", fused_layer_name);
+ "Failed to find vstream info of {}. Could be due to use of old HEF. Try to re-compile network with newer Dataflow Compiler version", fused_layer_name);
vstreams_params = expand_vstream_params_autos(first_defused_stream_info, vstreams_params);
auto nms_elem = NmsMuxElement::create(nms_infos,
auto post_infer_elem = PostInferElement::create({}, src_stream_format,
{}, vstreams_params.user_buffer_format, vstream_info->second.quant_info, fused_layer_nms_info,
- PipelineObject::create_element_name("PostInferElement", fused_layer_name, 0), vstreams_params, pipeline_status);
+ PipelineObject::create_element_name("PostInferElement", fused_layer_name, 0), vstreams_params, pipeline_status,
+ shutdown_event);
CHECK_EXPECTED_AS_STATUS(post_infer_elem);
elements.push_back(post_infer_elem.value());
virtual hailo_status before_fork() { return HAILO_SUCCESS; };
virtual hailo_status after_fork_in_parent() { return HAILO_SUCCESS; };
virtual hailo_status after_fork_in_child() { return HAILO_SUCCESS; };
+ virtual bool is_aborted() { return m_is_aborted; };
protected:
BaseVStream(const hailo_vstream_info_t &vstream_info, const hailo_vstream_params_t &vstream_params,
virtual hailo_status before_fork() override;
virtual hailo_status after_fork_in_parent() override;
virtual hailo_status after_fork_in_child() override;
+ virtual hailo_status stop_and_clear() override;
+ virtual hailo_status start_vstream() override;
+ virtual bool is_aborted() override;
private:
InputVStreamClient(std::unique_ptr<HailoRtRpcClient> client, uint32_t input_vstream_handle, hailo_format_t &&user_buffer_format,
virtual hailo_status before_fork() override;
virtual hailo_status after_fork_in_parent() override;
virtual hailo_status after_fork_in_child() override;
+ virtual hailo_status stop_and_clear() override;
+ virtual hailo_status start_vstream() override;
+ virtual bool is_aborted() override;
private:
OutputVStreamClient(std::unique_ptr<HailoRtRpcClient> client, uint32_t outputs_vstream_handle, hailo_format_t &&user_buffer_format,
static Expected<std::shared_ptr<PostInferElement>> create(const hailo_3d_image_shape_t &src_image_shape,
const hailo_format_t &src_format, const hailo_3d_image_shape_t &dst_image_shape, const hailo_format_t &dst_format,
const hailo_quant_info_t &dst_quant_info, const hailo_nms_info_t &nms_info, const std::string &name,
- hailo_pipeline_elem_stats_flags_t elem_flags, std::shared_ptr<std::atomic<hailo_status>> pipeline_status);
+ hailo_pipeline_elem_stats_flags_t elem_flags, std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
+ std::chrono::milliseconds timeout, hailo_vstream_stats_flags_t vstream_flags, EventPtr shutdown_event,
+ size_t buffer_pool_size);
static Expected<std::shared_ptr<PostInferElement>> create(const hailo_3d_image_shape_t &src_image_shape, const hailo_format_t &src_format,
const hailo_3d_image_shape_t &dst_image_shape, const hailo_format_t &dst_format, const hailo_quant_info_t &dst_quant_info, const hailo_nms_info_t &nms_info,
- const std::string &name, const hailo_vstream_params_t &vstream_params, std::shared_ptr<std::atomic<hailo_status>> pipeline_status);
+ const std::string &name, const hailo_vstream_params_t &vstream_params, std::shared_ptr<std::atomic<hailo_status>> pipeline_status, EventPtr shutdown_event);
PostInferElement(std::unique_ptr<OutputTransformContext> &&transform_context, const std::string &name,
- DurationCollector &&duration_collector, std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status);
+ DurationCollector &&duration_collector, std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, BufferPoolPtr buffer_pool,
+ std::chrono::milliseconds timeout);
virtual ~PostInferElement() = default;
virtual hailo_status run_push(PipelineBuffer &&buffer) override;
virtual PipelinePad &next_pad() override;
virtual std::string description() const override;
+ virtual std::vector<AccumulatorPtr> get_queue_size_accumulators() override;
protected:
virtual Expected<PipelineBuffer> action(PipelineBuffer &&input, PipelineBuffer &&optional) override;
private:
std::unique_ptr<OutputTransformContext> m_transform_context;
+ BufferPoolPtr m_pool;
+ std::chrono::milliseconds m_timeout;
+};
+
+class ArgmaxPostProcessElement : public FilterElement
+{
+public:
+ static Expected<std::shared_ptr<ArgmaxPostProcessElement>> create(std::shared_ptr<net_flow::Op> argmax_op,
+ const std::string &name, hailo_pipeline_elem_stats_flags_t elem_flags,
+ std::shared_ptr<std::atomic<hailo_status>> pipeline_status);
+ ArgmaxPostProcessElement(std::shared_ptr<net_flow::Op> argmax_op, const std::string &name,
+ DurationCollector &&duration_collector, std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status);
+ virtual ~ArgmaxPostProcessElement() = default;
+ virtual hailo_status run_push(PipelineBuffer &&buffer) override;
+ virtual PipelinePad &next_pad() override;
+ virtual std::string description() const override;
+
+protected:
+ virtual Expected<PipelineBuffer> action(PipelineBuffer &&input, PipelineBuffer &&optional) override;
+
+private:
+ std::shared_ptr<net_flow::Op> m_argmax_op;
+};
+
+class SoftmaxPostProcessElement : public FilterElement
+{
+public:
+ static Expected<std::shared_ptr<SoftmaxPostProcessElement>> create(std::shared_ptr<net_flow::Op> softmax_op,
+ const std::string &name, hailo_pipeline_elem_stats_flags_t elem_flags,
+ std::shared_ptr<std::atomic<hailo_status>> pipeline_status);
+ SoftmaxPostProcessElement(std::shared_ptr<net_flow::Op> softmax_op, const std::string &name,
+ DurationCollector &&duration_collector, std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status);
+ virtual ~SoftmaxPostProcessElement() = default;
+ virtual hailo_status run_push(PipelineBuffer &&buffer) override;
+ virtual PipelinePad &next_pad() override;
+ virtual std::string description() const override;
+
+protected:
+ virtual Expected<PipelineBuffer> action(PipelineBuffer &&input, PipelineBuffer &&optional) override;
+
+private:
+ std::shared_ptr<net_flow::Op> m_softmax_op;
};
class NmsPostProcessMuxElement : public BaseMuxElement
virtual Expected<PipelineBuffer> run_pull(PipelineBuffer &&optional, const PipelinePad &source) override;
virtual hailo_status execute_activate() override;
virtual hailo_status execute_deactivate() override;
- virtual hailo_status execute_post_deactivate() override;
+ virtual hailo_status execute_post_deactivate(bool should_clear_abort) override;
virtual hailo_status execute_clear() override;
virtual hailo_status execute_flush() override;
virtual hailo_status execute_abort() override;
- virtual hailo_status execute_resume() override;
+ virtual hailo_status execute_clear_abort() override;
virtual hailo_status execute_wait_for_finish() override;
uint32_t get_invalid_frames_count();
virtual std::string description() const override;
virtual Expected<PipelineBuffer> run_pull(PipelineBuffer &&optional, const PipelinePad &source) override;
virtual hailo_status execute_activate() override;
virtual hailo_status execute_deactivate() override;
- virtual hailo_status execute_post_deactivate() override;
+ virtual hailo_status execute_post_deactivate(bool should_clear_abort) override;
virtual hailo_status execute_clear() override;
virtual hailo_status execute_flush() override;
virtual hailo_status execute_abort() override;
- virtual hailo_status execute_resume() override;
+ virtual hailo_status execute_clear_abort() override;
virtual hailo_status execute_wait_for_finish() override;
virtual std::string description() const override;
static Expected<std::vector<OutputVStream>> create_output_nms(OutputStreamPtrVector &output_streams,
hailo_vstream_params_t vstreams_params,
const std::map<std::string, hailo_vstream_info_t> &output_vstream_infos);
+ static Expected<std::vector<OutputVStream>> create_output_vstreams_from_streams(const OutputStreamWithParamsVector &all_output_streams,
+ OutputStreamPtrVector &output_streams, const hailo_vstream_params_t &vstream_params,
+ const std::unordered_map<std::string, std::shared_ptr<NetFlowElement>> &post_process_ops,
+ const std::unordered_map<std::string, std::string> &op_inputs_to_op_name, const std::map<std::string, hailo_vstream_info_t> &output_vstream_infos_map);
static Expected<std::vector<OutputVStream>> create_output_post_process_nms(OutputStreamPtrVector &output_streams,
hailo_vstream_params_t vstreams_params,
const std::map<std::string, hailo_vstream_info_t> &output_vstream_infos,
const NetFlowElement &nms_op);
+ static Expected<std::shared_ptr<HwReadElement>> add_hw_read_element(std::shared_ptr<OutputStream> &output_stream,
+ std::shared_ptr<std::atomic<hailo_status>> &pipeline_status, std::vector<std::shared_ptr<PipelineElement>> &elements,
+ const std::string &element_name, EventPtr &shutdown_event, size_t buffer_pool_size,
+ const hailo_pipeline_elem_stats_flags_t &hw_read_element_stats_flags, const hailo_vstream_stats_flags_t &hw_read_stream_stats_flags);
+ static Expected<std::shared_ptr<PullQueueElement>> add_pull_queue_element(std::shared_ptr<OutputStream> &output_stream,
+ std::shared_ptr<std::atomic<hailo_status>> &pipeline_status, std::vector<std::shared_ptr<PipelineElement>> &elements,
+ const std::string &element_name, EventPtr &shutdown_event, const hailo_vstream_params_t &vstream_params);
+ static Expected<std::shared_ptr<ArgmaxPostProcessElement>> add_argmax_element(std::shared_ptr<OutputStream> &output_stream,
+ std::shared_ptr<std::atomic<hailo_status>> &pipeline_status, std::vector<std::shared_ptr<PipelineElement>> &elements,
+ const std::string &element_name, hailo_vstream_params_t &vstream_params, const NetFlowElement &argmax_op);
+ static Expected<std::shared_ptr<SoftmaxPostProcessElement>> add_softmax_element(std::shared_ptr<OutputStream> &output_stream,
+ std::shared_ptr<std::atomic<hailo_status>> &pipeline_status, std::vector<std::shared_ptr<PipelineElement>> &elements,
+ const std::string &element_name, hailo_vstream_params_t &vstream_params, const NetFlowElement &softmax_op);
+ static Expected<std::shared_ptr<UserBufferQueueElement>> add_user_buffer_queue_element(std::shared_ptr<OutputStream> &output_stream,
+ std::shared_ptr<std::atomic<hailo_status>> &pipeline_status, std::vector<std::shared_ptr<PipelineElement>> &elements,
+ const std::string &element_name, EventPtr &shutdown_event, const hailo_vstream_params_t &vstream_params);
+ static Expected<std::shared_ptr<PostInferElement>> add_post_infer_element(std::shared_ptr<OutputStream> &output_stream,
+ std::shared_ptr<std::atomic<hailo_status>> &pipeline_status, std::vector<std::shared_ptr<PipelineElement>> &elements,
+ const std::string &element_name, const hailo_vstream_params_t &vstream_params, EventPtr shutdown_event);
static hailo_status add_demux(std::shared_ptr<OutputStream> output_stream, NameToVStreamParamsMap &vstreams_params_map,
std::vector<std::shared_ptr<PipelineElement>> &&elements, std::vector<OutputVStream> &vstreams,
std::shared_ptr<HwReadElement> hw_read_elem, EventPtr shutdown_event, std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
const std::map<std::string, hailo_vstream_info_t> &output_vstream_infos,
const NetFlowElement &nms_op);
static Expected<AccumulatorPtr> create_pipeline_latency_accumulator(const hailo_vstream_params_t &vstreams_params);
+
+private:
+ static Expected<std::vector<OutputVStream>> create_output_post_process_argmax(std::shared_ptr<OutputStream> output_stream,
+ const NameToVStreamParamsMap &vstreams_params_map, const hailo_vstream_info_t &output_vstream_info, const NetFlowElement &argmax_op);
+ static Expected<std::vector<OutputVStream>> create_output_post_process_softmax(std::shared_ptr<OutputStream> output_stream,
+ const NameToVStreamParamsMap &vstreams_params_map, const hailo_vstream_info_t &output_vstream_info, const NetFlowElement &softmax_op);
};
} /* namespace hailort */
namespace hailort
{
+Expected<std::shared_ptr<ConfiguredNetworkGroup>> ConfiguredNetworkGroup::duplicate_network_group_client(uint32_t handle, const std::string &network_group_name)
+{
+#ifdef HAILO_SUPPORT_MULTI_PROCESS
+ auto net_group_client = ConfiguredNetworkGroupClient::duplicate_network_group_client(handle, network_group_name);
+ CHECK_EXPECTED(net_group_client);
+
+ return std::shared_ptr<ConfiguredNetworkGroup>(net_group_client.release());
+#else
+ (void)handle;
+ (void)network_group_name;
+ LOGGER__ERROR("`duplicate_network_group_client()` requires service compilation with HAILO_BUILD_SERVICE");
+ return make_unexpected(HAILO_INVALID_OPERATION);
+#endif // HAILO_SUPPORT_MULTI_PROCESS
+}
+
+Expected<uint32_t> ConfiguredNetworkGroup::get_client_handle() const
+{
+ LOGGER__ERROR("`get_client_handle()` is valid only when working with HailoRT Service!");
+ return make_unexpected(HAILO_INVALID_OPERATION);
+}
+
Expected<std::unique_ptr<ActivatedNetworkGroup>> ConfiguredNetworkGroup::activate()
{
const auto network_group_params = HailoRTDefaults::get_active_network_group_params();
}
/* Network group base functions */
+Expected<HwInferResults> ConfiguredNetworkGroupBase::run_hw_infer_estimator()
+{
+ return get_core_op()->run_hw_infer_estimator();
+}
+
Expected<LatencyMeasurementResult> ConfiguredNetworkGroupBase::get_latency_measurement(const std::string &network_name)
{
return get_core_op()->get_latency_measurement(network_name);
Expected<OutputStreamWithParamsVector> ConfiguredNetworkGroupBase::get_output_streams_from_vstream_names(
const std::map<std::string, hailo_vstream_params_t> &outputs_params)
{
- return get_core_op()->get_output_streams_from_vstream_names(outputs_params);
+ OutputStreamWithParamsVector results;
+ std::unordered_map<std::string, hailo_vstream_params_t> outputs_edges_params;
+ for (auto &name_params_pair : outputs_params) {
+ auto stream_names = m_network_group_metadata.get_stream_names_from_vstream_name(name_params_pair.first);
+ CHECK_EXPECTED(stream_names);
+
+ for (auto &stream_name : stream_names.value()) {
+ auto stream = get_shared_output_stream_by_name(stream_name);
+ CHECK_EXPECTED(stream);
+ if (stream.value()->get_info().is_mux) {
+ outputs_edges_params.emplace(name_params_pair);
+ }
+ else {
+ NameToVStreamParamsMap name_to_params = {name_params_pair};
+ results.emplace_back(stream.value(), name_to_params);
+ }
+ }
+ }
+ // Add non mux streams to result
+ hailo_status status = add_mux_streams_by_edges_names(results, outputs_edges_params);
+ CHECK_SUCCESS_AS_EXPECTED(status);
+
+ return results;
+}
+
+// This function adds to results the OutputStreams that correspond to the edges in outputs_edges_params.
+// If an edge name appears in outputs_edges_params then all of its predecessors must appear in outputs_edges_params as well, Otherwise, an error is returned.
+// We use the set seen_edges in order to mark the edges already evaluated by one of its' predecessor.
+hailo_status ConfiguredNetworkGroupBase::add_mux_streams_by_edges_names(OutputStreamWithParamsVector &results,
+ const std::unordered_map<std::string, hailo_vstream_params_t> &outputs_edges_params)
+{
+ std::unordered_set<std::string> seen_edges;
+ for (auto &name_params_pair : outputs_edges_params) {
+ if (seen_edges.end() != seen_edges.find(name_params_pair.first)) {
+ // Edge has already been seen by one of its predecessors
+ continue;
+ }
+ auto output_streams = get_output_streams_by_vstream_name(name_params_pair.first);
+ CHECK_EXPECTED_AS_STATUS(output_streams);
+ CHECK(output_streams->size() == 1, HAILO_INVALID_ARGUMENT,
+ "mux streams cannot be separated into multiple streams");
+ auto output_stream = output_streams.release()[0];
+
+ // TODO: Find a better way to get the mux edges without creating OutputDemuxer
+ auto expected_demuxer = OutputDemuxer::create(*output_stream);
+ CHECK_EXPECTED_AS_STATUS(expected_demuxer);
+
+ NameToVStreamParamsMap name_to_params;
+ for (auto &edge : expected_demuxer.value()->get_edges_stream_info()) {
+ auto edge_name_params_pair = outputs_edges_params.find(edge.name);
+ CHECK(edge_name_params_pair != outputs_edges_params.end(), HAILO_INVALID_ARGUMENT,
+ "All edges of stream {} must be in output vstream params. edge {} is missing.",
+ name_params_pair.first, edge.name);
+ seen_edges.insert(edge.name);
+ name_to_params.insert(*edge_name_params_pair);
+ }
+ results.emplace_back(output_stream, name_to_params);
+ }
+ return HAILO_SUCCESS;
}
Expected<OutputStreamPtrVector> ConfiguredNetworkGroupBase::get_output_streams_by_vstream_name(const std::string &name)
{
- return get_core_op()->get_output_streams_by_vstream_name(name);
+ auto stream_names = m_network_group_metadata.get_stream_names_from_vstream_name(name);
+ CHECK_EXPECTED(stream_names);
+
+ OutputStreamPtrVector output_streams;
+ output_streams.reserve(stream_names->size());
+ for (const auto &stream_name : stream_names.value()) {
+ auto stream = get_shared_output_stream_by_name(stream_name);
+ CHECK_EXPECTED(stream);
+ output_streams.emplace_back(stream.value());
+ }
+
+ return output_streams;
}
Expected<LayerInfo> ConfiguredNetworkGroupBase::get_layer_info(const std::string &stream_name)
ConfiguredNetworkGroupBase::ConfiguredNetworkGroupBase(
const ConfigureNetworkParams &config_params, std::vector<std::shared_ptr<CoreOp>> &&core_ops,
- std::vector<std::shared_ptr<NetFlowElement>> &&net_flow_ops) :
+ NetworkGroupMetadata &&metadata) :
m_config_params(config_params),
m_core_ops(std::move(core_ops)),
- m_net_flow_ops(std::move(net_flow_ops))
+ m_network_group_metadata(std::move(metadata))
{}
// static func
const std::string &ConfiguredNetworkGroupBase::get_network_group_name() const
{
- return get_core_op_metadata()->core_op_name();
+ return m_network_group_metadata.name();
}
const std::string &ConfiguredNetworkGroupBase::name() const
{
- return get_core_op_metadata()->core_op_name();
+ return m_network_group_metadata.name();
}
hailo_status ConfiguredNetworkGroupBase::activate_low_level_streams(uint16_t dynamic_batch_size, bool resume_pending_stream_transfers)
return get_core_op()->get_stream_batch_size(stream_name);
}
-bool ConfiguredNetworkGroupBase::is_multi_context() const
+Expected<std::vector<std::string>> ConfiguredNetworkGroupBase::get_sorted_output_names()
{
- return get_core_op()->is_multi_context();
+ auto res = m_network_group_metadata.get_sorted_output_names();
+ return res;
}
-const ConfigureNetworkParams ConfiguredNetworkGroupBase::get_config_params() const
+Expected<std::vector<std::string>> ConfiguredNetworkGroupBase::get_stream_names_from_vstream_name(const std::string &vstream_name)
{
- return get_core_op()->get_config_params();
+ auto res = m_network_group_metadata.get_stream_names_from_vstream_name(vstream_name);
+ return res;
}
Expected<std::vector<std::string>> ConfiguredNetworkGroupBase::get_vstream_names_from_stream_name(const std::string &stream_name)
{
- return get_core_op()->get_vstream_names_from_stream_name(stream_name);
+ auto res = m_network_group_metadata.get_vstream_names_from_stream_name(stream_name);
+ return res;
+}
+
+bool ConfiguredNetworkGroupBase::is_multi_context() const
+{
+ return get_core_op()->is_multi_context();
+}
+
+const ConfigureNetworkParams ConfiguredNetworkGroupBase::get_config_params() const
+{
+ return get_core_op()->get_config_params();
}
const SupportedFeatures &ConfiguredNetworkGroupBase::get_supported_features()
Expected<std::vector<std::vector<std::string>>> ConfiguredNetworkGroupBase::get_output_vstream_groups()
{
- return get_core_op()->get_output_vstream_groups();
+ std::vector<std::vector<std::string>> results;
+
+ for (auto output_stream : get_output_streams()) {
+ auto vstreams_group = m_network_group_metadata.get_vstream_names_from_stream_name(output_stream.get().name());
+ CHECK_EXPECTED(vstreams_group);
+ results.push_back(vstreams_group.release());
+ }
+
+ return results;
}
Expected<std::vector<std::map<std::string, hailo_vstream_params_t>>> ConfiguredNetworkGroupBase::make_output_vstream_params_groups(
bool quantized, hailo_format_type_t format_type, uint32_t timeout_ms, uint32_t queue_size)
{
- return get_core_op()->make_output_vstream_params_groups(quantized, format_type, timeout_ms, queue_size);
+ auto params = make_output_vstream_params(quantized, format_type, timeout_ms, queue_size);
+ CHECK_EXPECTED(params);
+
+ auto groups = get_output_vstream_groups();
+ CHECK_EXPECTED(groups);
+
+ std::vector<std::map<std::string, hailo_vstream_params_t>> results(groups->size(), std::map<std::string, hailo_vstream_params_t>());
+
+ size_t pipeline_group_index = 0;
+ for (const auto &group : groups.release()) {
+ for (const auto &name_pair : params.value()) {
+ if (contains(group, name_pair.first)) {
+ results[pipeline_group_index].insert(name_pair);
+ }
+ }
+ pipeline_group_index++;
+ }
+
+ return results;
}
Expected<std::map<std::string, hailo_vstream_params_t>> ConfiguredNetworkGroupBase::make_input_vstream_params(
bool quantized, hailo_format_type_t format_type, uint32_t timeout_ms, uint32_t queue_size,
const std::string &network_name)
{
- return get_core_op()->make_input_vstream_params(quantized, format_type, timeout_ms, queue_size, network_name);
+ auto input_vstream_infos = m_network_group_metadata.get_input_vstream_infos(network_name);
+ CHECK_EXPECTED(input_vstream_infos);
+
+ std::map<std::string, hailo_vstream_params_t> res;
+ auto status = Hef::Impl::fill_missing_vstream_params_with_default(res, input_vstream_infos.value(), quantized,
+ format_type, timeout_ms, queue_size);
+ CHECK_SUCCESS_AS_EXPECTED(status);
+ return res;
}
Expected<std::map<std::string, hailo_vstream_params_t>> ConfiguredNetworkGroupBase::make_output_vstream_params(
bool quantized, hailo_format_type_t format_type, uint32_t timeout_ms, uint32_t queue_size,
const std::string &network_name)
{
- return get_core_op()->make_output_vstream_params(quantized, format_type, timeout_ms, queue_size, network_name);
+ auto output_vstream_infos = m_network_group_metadata.get_output_vstream_infos(network_name);
+ CHECK_EXPECTED(output_vstream_infos);
+ std::map<std::string, hailo_vstream_params_t> res;
+ auto status = Hef::Impl::fill_missing_vstream_params_with_default(res, output_vstream_infos.value(), quantized,
+ format_type, timeout_ms, queue_size);
+ CHECK_SUCCESS_AS_EXPECTED(status);
+ return res;
}
Expected<std::vector<hailo_network_info_t>> ConfiguredNetworkGroupBase::get_network_infos() const
{
- return get_core_op()->get_network_infos();
+ return m_network_group_metadata.get_network_infos();
}
Expected<std::vector<hailo_stream_info_t>> ConfiguredNetworkGroupBase::get_all_stream_infos(
const std::string &network_name) const
{
- return get_core_op()->get_all_stream_infos(network_name);
+ return get_core_op_metadata()->get_all_stream_infos(network_name);
}
Expected<std::vector<hailo_vstream_info_t>> ConfiguredNetworkGroupBase::get_input_vstream_infos(
const std::string &network_name) const
{
- return get_core_op()->get_input_vstream_infos(network_name);
+ return m_network_group_metadata.get_input_vstream_infos(network_name);
}
Expected<std::vector<hailo_vstream_info_t>> ConfiguredNetworkGroupBase::get_output_vstream_infos(
const std::string &network_name) const
{
- return get_core_op()->get_output_vstream_infos(network_name);
+ return m_network_group_metadata.get_output_vstream_infos(network_name);
}
Expected<std::vector<hailo_vstream_info_t>> ConfiguredNetworkGroupBase::get_all_vstream_infos(
const std::string &network_name) const
{
- return get_core_op()->get_all_vstream_infos(network_name);
+ return m_network_group_metadata.get_all_vstream_infos(network_name);
}
AccumulatorPtr ConfiguredNetworkGroupBase::get_activation_time_accumulator() const
return vstreams;
}
-Expected<std::vector<OutputVStream>> ConfiguredNetworkGroupBase::create_output_vstreams(const std::map<std::string, hailo_vstream_params_t> &outputs_params)
+Expected<std::vector<OutputVStream>> ConfiguredNetworkGroupBase::create_output_vstreams(const std::map<std::string, hailo_vstream_params_t> &vstreams_params)
{
std::vector<OutputVStream> vstreams;
- vstreams.reserve(outputs_params.size());
- auto output_streams = get_output_streams_from_vstream_names(outputs_params);
- CHECK_EXPECTED(output_streams);
+ vstreams.reserve(vstreams_params.size());
+ auto all_output_streams_expected = get_output_streams_from_vstream_names(vstreams_params);
+ CHECK_EXPECTED(all_output_streams_expected);
+ auto all_output_streams = all_output_streams_expected.release();
auto output_vstream_infos = get_output_vstream_infos();
CHECK_EXPECTED(output_vstream_infos);
auto output_vstream_infos_map = vstream_infos_vector_to_map(output_vstream_infos.release());
- // We iterate through all output streams, and if they are nms, we collect them together by their original stream name.
- // We need this step because all nms output streams of the same original stream need to be fused together
-
- std::unordered_map<std::string, std::shared_ptr<NetFlowElement>> post_process_nms_ops;
- std::set<std::string> post_process_stream_inputs;
- for (auto &op : m_net_flow_ops) {
- post_process_nms_ops.insert({op->name, op});
- post_process_stream_inputs.insert(op->input_streams.begin(), op->input_streams.end());
- }
- std::map<std::string, std::pair<OutputStreamPtrVector, hailo_vstream_params_t>> nms_op_output_streams;
- std::map<std::string, std::pair<OutputStreamPtrVector, hailo_vstream_params_t>> nms_output_streams;
- for (auto &stream_params_pair : output_streams.value()) {
- if ((HAILO_FORMAT_ORDER_HAILO_NMS == stream_params_pair.first->get_info().format.order && stream_params_pair.first->get_info().nms_info.is_defused) &&
- (outputs_params.end() != outputs_params.find(stream_params_pair.first->get_info().nms_info.defuse_info.original_name))) {
- auto original_name = stream_params_pair.first->get_info().nms_info.defuse_info.original_name;
- nms_output_streams.emplace(original_name, std::pair<OutputStreamPtrVector, hailo_vstream_params_t>(
- OutputStreamPtrVector(), outputs_params.at(original_name)));
- nms_output_streams[original_name].first.push_back(stream_params_pair.first);
- } else if (post_process_stream_inputs.count(stream_params_pair.first->get_info().name)) {
- for (auto &op : m_net_flow_ops) {
- if (op->input_streams.count(stream_params_pair.first->get_info().name)) {
- assert(op->op->outputs_metadata().size() == 1);
- nms_op_output_streams.emplace(op->name, std::pair<OutputStreamPtrVector, hailo_vstream_params_t>(
- OutputStreamPtrVector(), outputs_params.at(op->op->outputs_metadata().begin()->first)));
- nms_op_output_streams[op->name].first.push_back(stream_params_pair.first);
- }
- }
- } else {
- auto outputs = VStreamsBuilderUtils::create_outputs(stream_params_pair.first, stream_params_pair.second, output_vstream_infos_map);
- CHECK_EXPECTED(outputs);
- vstreams.insert(vstreams.end(), std::make_move_iterator(outputs->begin()), std::make_move_iterator(outputs->end()));
+ // Building DBs that connect output_vstreams, output_streams and ops.
+ // Note: Assuming each post process op has a unique output streams.
+ // In other words, not possible for an output stream to be connected to more than one op
+ std::unordered_map<std::string, std::shared_ptr<NetFlowElement>> post_process_ops;
+ std::unordered_map<stream_name_t, op_name_t> op_inputs_to_op_name;
+ for (auto &op : m_network_group_metadata.m_net_flow_ops) {
+ post_process_ops.insert({op->name, op});
+ for (auto &input_stream : op->input_streams) {
+ op_inputs_to_op_name.insert({input_stream, op->name});
}
}
- for (auto &nms_output_stream_pair : nms_output_streams) {
- auto outputs = VStreamsBuilderUtils::create_output_nms(nms_output_stream_pair.second.first, nms_output_stream_pair.second.second,
- output_vstream_infos_map);
- CHECK_EXPECTED(outputs);
- vstreams.insert(vstreams.end(), std::make_move_iterator(outputs->begin()), std::make_move_iterator(outputs->end()));
- }
- for (auto &nms_output_stream_pair : nms_op_output_streams) {
- auto op = post_process_nms_ops.at(nms_output_stream_pair.first);
- auto outputs = VStreamsBuilderUtils::create_output_post_process_nms(nms_output_stream_pair.second.first,
- nms_output_stream_pair.second.second, output_vstream_infos_map,
- *op);
+
+ // streams_added is a vector which holds all stream names which vstreams connected to them were already added (for demux cases)
+ std::vector<std::string> streams_added;
+ for (auto &vstream_params : vstreams_params) {
+ auto output_streams = get_output_streams_by_vstream_name(vstream_params.first);
+ CHECK_EXPECTED(output_streams);
+ if (contains(streams_added, static_cast<std::string>(output_streams.value()[0]->get_info().name))) {
+ continue;
+ }
+ for (auto &output_stream : output_streams.value()) {
+ streams_added.push_back(output_stream->get_info().name);
+ }
+
+ auto outputs = VStreamsBuilderUtils::create_output_vstreams_from_streams(all_output_streams, output_streams.value(), vstream_params.second,
+ post_process_ops, op_inputs_to_op_name, output_vstream_infos_map);
CHECK_EXPECTED(outputs);
vstreams.insert(vstreams.end(), std::make_move_iterator(outputs->begin()), std::make_move_iterator(outputs->end()));
}
get_core_op()->set_vstreams_multiplexer_callbacks(vstreams);
-
return vstreams;
}
namespace hailort
{
+using stream_name_t = std::string;
+using op_name_t = std::string;
class ConfiguredNetworkGroupBase : public ConfiguredNetworkGroup
{
public:
static Expected<std::shared_ptr<ConfiguredNetworkGroupBase>> create(const ConfigureNetworkParams &config_params,
- std::vector<std::shared_ptr<CoreOp>> &&core_ops, std::vector<std::shared_ptr<NetFlowElement>> &&net_flow_ops)
+ std::vector<std::shared_ptr<CoreOp>> &&core_ops, NetworkGroupMetadata &&metadata)
{
auto net_group_ptr = std::shared_ptr<ConfiguredNetworkGroupBase>(new (std::nothrow)
- ConfiguredNetworkGroupBase(config_params, std::move(core_ops), std::move(net_flow_ops)));
- // auto net_group_ptr = make_shared_nothrow<ConfiguredNetworkGroupBase>(config_params, std::move(core_ops), std::move(net_flow_ops));
+ ConfiguredNetworkGroupBase(config_params, std::move(core_ops), std::move(metadata)));
CHECK_NOT_NULL_AS_EXPECTED(net_group_ptr, HAILO_OUT_OF_HOST_MEMORY);
return net_group_ptr;
virtual bool is_multi_context() const override;
virtual const ConfigureNetworkParams get_config_params() const override;
+ virtual Expected<HwInferResults> run_hw_infer_estimator() override;
+
// TODO: HRT-9551 - Change to get_core_op_by_name() when multiple core_ops supported
std::shared_ptr<CoreOp> get_core_op() const;
// TODO: HRT-9546 Remove
const std::shared_ptr<CoreOpMetadata> get_core_op_metadata() const;
- Expected<std::vector<std::string>> get_vstream_names_from_stream_name(const std::string &stream_name);
const SupportedFeatures &get_supported_features();
Expected<uint16_t> get_stream_batch_size(const std::string &stream_name);
+ virtual Expected<std::vector<std::string>> get_sorted_output_names() override;
+ virtual Expected<std::vector<std::string>> get_stream_names_from_vstream_name(const std::string &vstream_name) override;
+ virtual Expected<std::vector<std::string>> get_vstream_names_from_stream_name(const std::string &stream_name) override;
+
virtual Expected<std::vector<InputVStream>> create_input_vstreams(const std::map<std::string, hailo_vstream_params_t> &inputs_params) override;
virtual Expected<std::vector<OutputVStream>> create_output_vstreams(const std::map<std::string, hailo_vstream_params_t> &outputs_params) override;
private:
ConfiguredNetworkGroupBase(const ConfigureNetworkParams &config_params,
- std::vector<std::shared_ptr<CoreOp>> &&core_ops, std::vector<std::shared_ptr<NetFlowElement>> &&net_flow_ops);
+ std::vector<std::shared_ptr<CoreOp>> &&core_ops, NetworkGroupMetadata &&metadata);
static uint16_t get_smallest_configured_batch_size(const ConfigureNetworkParams &config_params);
hailo_status create_vdma_input_stream(Device &device, const std::string &stream_name,
const ConfigureNetworkParams m_config_params;
std::vector<std::shared_ptr<CoreOp>> m_core_ops;
- std::vector<std::shared_ptr<NetFlowElement>> m_net_flow_ops;
+ NetworkGroupMetadata m_network_group_metadata;
friend class VDeviceCoreOp;
friend class VDeviceActivatedCoreOp;
virtual bool is_multi_context() const override;
virtual const ConfigureNetworkParams get_config_params() const override;
+ virtual Expected<std::vector<std::string>> get_sorted_output_names() override;
+ virtual Expected<std::vector<std::string>> get_stream_names_from_vstream_name(const std::string &vstream_name) override;
+ virtual Expected<std::vector<std::string>> get_vstream_names_from_stream_name(const std::string &stream_name) override;
+
+ virtual Expected<HwInferResults> run_hw_infer_estimator() override;
+
virtual Expected<std::vector<InputVStream>> create_input_vstreams(const std::map<std::string, hailo_vstream_params_t> &inputs_params);
virtual Expected<std::vector<OutputVStream>> create_output_vstreams(const std::map<std::string, hailo_vstream_params_t> &outputs_params);
virtual hailo_status after_fork_in_parent() override;
virtual hailo_status after_fork_in_child() override;
+ virtual Expected<uint32_t> get_client_handle() const override
+ {
+ auto val = m_handle;
+ return val;
+ };
+
+ static Expected<std::shared_ptr<ConfiguredNetworkGroupClient>> duplicate_network_group_client(uint32_t handle, const std::string &network_group_name);
+
private:
+ ConfiguredNetworkGroupClient(uint32_t handle, const std::string &network_group_name);
hailo_status create_client();
std::unique_ptr<HailoRtRpcClient> m_client;
if (CMAKE_SYSTEM_NAME STREQUAL QNX)
set(HAILO_FULL_OS_DIR ${HAILO_OS_DIR}/qnx)
else()
- set(HAILO_FULL_OS_DIR ${HAILO_OS_DIR}/unix)
+ set(HAILO_FULL_OS_DIR ${HAILO_OS_DIR}/linux)
endif()
else()
message(FATAL_ERROR "Unexpeced platform target, stopping build")
set(HAILO_FULL_OS_DIR ${HAILO_FULL_OS_DIR} PARENT_SCOPE)
-set(files
- ${HAILO_OS_DIR}/microsec_timer.cpp
- ${HAILO_OS_DIR}/file_descriptor.cpp
- ${HAILO_OS_DIR}/mmap_buffer.cpp
- ${HAILO_OS_DIR}/hailort_driver.cpp
- ${HAILO_FULL_OS_DIR}/event.cpp
- ${HAILO_FULL_OS_DIR}/driver_scan.cpp
-)
+if(WIN32)
+ add_subdirectory(windows)
+elseif(UNIX)
+ add_subdirectory(posix)
+else()
+ message(FATAL_ERROR "Unexpeced platform target, stopping build")
+endif()
-set(HAILORT_CPP_OS_SOURCES ${files} PARENT_SCOPE)
+set(HAILORT_CPP_SOURCES ${HAILORT_CPP_SOURCES} ${SRC_FILES} PARENT_SCOPE)
\ No newline at end of file
constexpr uint8_t MIN_D2H_CHANNEL_INDEX = MAX_H2D_CHANNEL_INDEX + 1;
constexpr uint8_t MAX_D2H_CHANNEL_INDEX = 31;
+constexpr size_t SIZE_OF_SINGLE_DESCRIPTOR = 0x10;
// NOTE: don't change members from this struct without updating all code using it (platform specific)
struct ChannelInterruptTimestamp {
using ChannelsBitmap = std::array<uint32_t, MAX_VDMA_ENGINES_COUNT>;
#if defined(__linux__) || defined(_MSC_VER)
+// Unique handle returned from the driver.
using vdma_mapped_buffer_driver_identifier = uintptr_t;
#elif defined(__QNX__)
-struct vdma_mapped_buffer_driver_identifier {
- shm_handle_t shm_handle;
- int shm_fd;
-};
+// Identifier is the shared memory file descriptor.
+using vdma_mapped_buffer_driver_identifier = int;
#else
#error "unsupported platform!"
#endif // defined(__linux__) || defined(_MSC_VER)
+struct DescriptorsListInfo {
+ uintptr_t handle; // Unique identifier for the driver.
+ uint64_t dma_address;
+ size_t desc_count;
+ void *user_address;
+};
+
class HailoRTDriver final
{
public:
BOTH
};
+ enum class DmaSyncDirection {
+ TO_HOST = 0,
+ TO_DEVICE
+ };
+
enum class DmaType {
PCIE,
DRAM
using VdmaBufferHandle = size_t;
- static Expected<HailoRTDriver> create(const std::string &dev_path);
+ static Expected<HailoRTDriver> create(const DeviceInfo &device_info);
// TODO: HRT-7309 add implementation for Windows
#if defined(__linux__) || defined(__QNX__)
hailo_status write_vdma_channel_register(vdma::ChannelId channel_id, DmaDirection data_direction, size_t offset,
size_t reg_size, uint32_t data);
- hailo_status vdma_buffer_sync(VdmaBufferHandle buffer, DmaDirection sync_direction, size_t offset, size_t count);
+ hailo_status vdma_buffer_sync(VdmaBufferHandle buffer, DmaSyncDirection sync_direction, size_t offset, size_t count);
hailo_status vdma_interrupts_enable(const ChannelsBitmap &channels_bitmap, bool enable_timestamps_measure);
hailo_status vdma_interrupts_disable(const ChannelsBitmap &channel_id);
hailo_status vdma_buffer_unmap(VdmaBufferHandle handle);
/**
- * Allocate vdma descriptors buffer that is accessable via kernel mode, user mode and the given board (using DMA).
- *
+ * Allocate vdma descriptors list object that can bind to some buffer. Used for scatter gather vdma.
+ *
* @param[in] desc_count - number of descriptors to allocate. The descriptor max size is DESC_MAX_SIZE.
- * @return Upon success, returns Expected of a pair <desc_handle, dma_address>.
- * Otherwise, returns Unexpected of ::hailo_status error.
+ * @param[in] is_circular - if true, the descriptors list can be used in a circular (and desc_count must be power
+ * of 2)
*/
- Expected<std::pair<uintptr_t, uint64_t>> descriptors_list_create(size_t desc_count);
-
+ Expected<DescriptorsListInfo> descriptors_list_create(size_t desc_count, bool is_circular);
+
/**
- * Frees a vdma descriptors buffer allocated by 'create_descriptors_buffer'.
+ * Frees a vdma descriptors buffer allocated by 'descriptors_list_create'.
*/
- hailo_status descriptors_list_release(uintptr_t desc_handle);
+ hailo_status descriptors_list_release(const DescriptorsListInfo &descriptors_list_info);
/**
* Configure vdma channel descriptors to point to the given user address.
hailo_status vdma_continuous_buffer_free(uintptr_t buffer_handle);
/**
- * The actual desc page size might be smaller than the once requested, depends on the host capabilities.
+ * Marks the device as used for vDMA operations. Only one open FD can be marked at once.
+ * The device is "unmarked" only on FD close.
*/
- uint16_t calc_desc_page_size(uint16_t requested_size) const
+ hailo_status mark_as_used();
+
+ const std::string &device_id() const
{
- if (m_desc_max_page_size < requested_size) {
- LOGGER__WARNING("Requested desc page size ({}) is bigger than max on this host ({}).",
- requested_size, m_desc_max_page_size);
- }
- return static_cast<uint16_t>(std::min(static_cast<uint32_t>(requested_size), static_cast<uint32_t>(m_desc_max_page_size)));
+ return m_device_info.device_id;
}
inline DmaType dma_type() const
FileDescriptor& fd() {return m_fd;}
- const std::string &dev_path() const
+ inline bool allocate_driver_buffer() const
{
- return m_dev_path;
- }
-
- hailo_status mark_as_used();
-
-#ifdef __QNX__
- inline pid_t resource_manager_pid() const
- {
- return m_resource_manager_pid;
- }
-#endif // __QNX__
-
- inline bool allocate_driver_buffer() const {
return m_allocate_driver_buffer;
}
hailo_status read_memory_ioctl(MemoryType memory_type, uint64_t address, void *buf, size_t size);
hailo_status write_memory_ioctl(MemoryType memory_type, uint64_t address, const void *buf, size_t size);
- HailoRTDriver(const std::string &dev_path, FileDescriptor &&fd, hailo_status &status);
+ Expected<std::pair<uintptr_t, uint64_t>> descriptors_list_create_ioctl(size_t desc_count, bool is_circular);
+ hailo_status descriptors_list_release_ioctl(uintptr_t desc_handle);
+ Expected<void *> descriptors_list_create_mmap(uintptr_t desc_handle, size_t desc_count);
+ hailo_status descriptors_list_create_munmap(void *address, size_t desc_count);
+
+ HailoRTDriver(const DeviceInfo &device_info, FileDescriptor &&fd, hailo_status &status);
bool is_valid_channel_id(const vdma::ChannelId &channel_id);
bool is_valid_channels_bitmap(const ChannelsBitmap &bitmap)
}
FileDescriptor m_fd;
- std::string m_dev_path;
+ DeviceInfo m_device_info;
uint16_t m_desc_max_page_size;
DmaType m_dma_type;
bool m_allocate_driver_buffer;
static Expected<MmapBufferImpl> create_shared_memory(size_t length);
static Expected<MmapBufferImpl> create_file_map(size_t length, FileDescriptor &file, uintptr_t offset);
+#if defined(__QNX__)
+ static Expected<MmapBufferImpl> create_file_map_nocache(size_t length, FileDescriptor &file, uintptr_t offset);
+#endif /* defined(__QNX__) */
+
MmapBufferImpl() : m_address(INVALID_ADDR), m_length(0), m_unmappable(false) {}
~MmapBufferImpl()
return m_address;
}
+ size_t size() const { return m_length; }
+
bool is_mapped() const
{
return (INVALID_ADDR != m_address);
return MmapBuffer<T>(std::move(mmap.release()));
}
+#if defined(__QNX__)
+ static Expected<MmapBuffer<T>> create_file_map_nocache(size_t length, FileDescriptor &file, uintptr_t offset)
+ {
+ auto mmap = MmapBufferImpl::create_file_map_nocache(length, file, offset);
+ CHECK_EXPECTED(mmap);
+ return MmapBuffer<T>(mmap.release());
+ }
+#endif /* defined(__QNX__) */
+
MmapBuffer() = default;
~MmapBuffer() = default;
return reinterpret_cast<T*>(m_mmap.address());
}
+ size_t size() const { return m_mmap.size(); }
+
template<typename U=T>
std::enable_if_t<!std::is_void<U>::value, U&> operator*()
{
--- /dev/null
+cmake_minimum_required(VERSION 3.0.0)
+
+if (CMAKE_SYSTEM_NAME STREQUAL QNX)
+ add_subdirectory(qnx)
+else()
+ add_subdirectory(linux)
+endif()
+
+set(files
+ ${CMAKE_CURRENT_SOURCE_DIR}/microsec_timer.cpp
+ ${CMAKE_CURRENT_SOURCE_DIR}/file_descriptor.cpp
+ ${CMAKE_CURRENT_SOURCE_DIR}/mmap_buffer.cpp
+ ${CMAKE_CURRENT_SOURCE_DIR}/hailort_driver.cpp
+)
+
+set(HAILORT_CPP_SOURCES ${HAILORT_CPP_SOURCES} ${files} PARENT_SCOPE)
const size_t HailoRTDriver::INVALID_DRIVER_VDMA_MAPPING_HANDLE_VALUE = INVALID_DRIVER_HANDLE_VALUE;
const uint8_t HailoRTDriver::INVALID_VDMA_CHANNEL_INDEX = INVALID_VDMA_CHANNEL;
-Expected<HailoRTDriver> HailoRTDriver::create(const std::string &dev_path)
+Expected<HailoRTDriver> HailoRTDriver::create(const DeviceInfo &device_info)
{
- hailo_status status = HAILO_UNINITIALIZED;
+ auto fd = FileDescriptor(open(device_info.dev_path.c_str(), O_RDWR));
+ CHECK_AS_EXPECTED(fd >= 0, HAILO_DRIVER_FAIL,
+ "Failed to open device file {} with error {}", device_info.dev_path, errno);
- auto fd = FileDescriptor(open(dev_path.c_str(), O_RDWR));
- if (0 > fd) {
- LOGGER__ERROR("Failed to open board {}", dev_path);
- return make_unexpected(HAILO_OPEN_FILE_FAILURE);
- }
+ hailo_status status = HAILO_UNINITIALIZED;
+ HailoRTDriver object(device_info, std::move(fd), status);
+ CHECK_SUCCESS_AS_EXPECTED(status);
- HailoRTDriver object(dev_path, std::move(fd), status);
- if (HAILO_SUCCESS != status) {
- return make_unexpected(status);
- }
return object;
}
return HAILO_SUCCESS;
}
-HailoRTDriver::HailoRTDriver(const std::string &dev_path, FileDescriptor &&fd, hailo_status &status) :
+HailoRTDriver::HailoRTDriver(const DeviceInfo &device_info, FileDescriptor &&fd, hailo_status &status) :
m_fd(std::move(fd)),
- m_dev_path(dev_path),
+ m_device_info(device_info),
m_allocate_driver_buffer(false)
{
hailo_driver_info driver_info = {};
return HAILO_SUCCESS;
}
-hailo_status HailoRTDriver::vdma_buffer_sync(VdmaBufferHandle handle, DmaDirection sync_direction, size_t offset, size_t count)
+hailo_status HailoRTDriver::vdma_buffer_sync(VdmaBufferHandle handle, DmaSyncDirection sync_direction,
+ size_t offset, size_t count)
{
#if defined(__linux__)
- CHECK(sync_direction != DmaDirection::BOTH, HAILO_INVALID_ARGUMENT, "Can't sync vdma data both host and device");
hailo_vdma_buffer_sync_params sync_info{
.handle = handle,
- .sync_type = (sync_direction == DmaDirection::H2D) ? HAILO_SYNC_FOR_DEVICE : HAILO_SYNC_FOR_HOST,
+ .sync_type = (sync_direction == DmaSyncDirection::TO_HOST) ? HAILO_SYNC_FOR_CPU : HAILO_SYNC_FOR_DEVICE,
.offset = offset,
.count = count
};
return HAILO_SUCCESS;
}
-
+
+#if defined(__linux__)
Expected<HailoRTDriver::VdmaBufferHandle> HailoRTDriver::vdma_buffer_map(void *user_address, size_t required_size,
DmaDirection data_direction, const vdma_mapped_buffer_driver_identifier &driver_buff_handle)
{
-
-#if defined(__linux__)
hailo_vdma_buffer_map_params map_user_buffer_info {
.user_address = user_address,
.size = required_size,
.allocated_buffer_handle = driver_buff_handle,
.mapped_handle = 0
};
+
+ int err = 0;
+ auto status = hailo_ioctl(this->m_fd, HAILO_VDMA_BUFFER_MAP, &map_user_buffer_info, err);
+ if (HAILO_SUCCESS != status) {
+ LOGGER__ERROR("Failed to map user buffer with errno:{}", err);
+ return make_unexpected(HAILO_DRIVER_FAIL);
+ }
+
+ return VdmaBufferHandle(map_user_buffer_info.mapped_handle);
+}
#elif defined( __QNX__)
+Expected<HailoRTDriver::VdmaBufferHandle> HailoRTDriver::vdma_buffer_map(void *user_address, size_t required_size,
+ DmaDirection data_direction, const vdma_mapped_buffer_driver_identifier &driver_buff_handle)
+{
+ // Mapping is done by the driver_buff_handle (shm file descriptor), and not by address.
+ (void)user_address;
+
+ // Create shared memory handle to send to driver
+ shm_handle_t shm_handle;
+ int err = shm_create_handle(driver_buff_handle, m_resource_manager_pid, O_RDWR,
+ &shm_handle, 0);
+ if (0 != err) {
+ LOGGER__ERROR("Error creating shm object handle, errno is: {}", errno);
+ return make_unexpected(HAILO_INTERNAL_FAILURE);
+ }
+
hailo_vdma_buffer_map_params map_user_buffer_info {
- .shared_memory_handle = driver_buff_handle.shm_handle,
+ .shared_memory_handle = shm_handle,
.size = required_size,
.data_direction = direction_to_dma_data_direction(data_direction),
.allocated_buffer_handle = INVALID_DRIVER_HANDLE_VALUE,
.mapped_handle = 0
};
- (void)user_address;
-#else
-#error "unsupported platform!"
-#endif // __linux__
-
- int err = 0;
+ // Note: The driver will accept the shm_handle, and will mmap it to its own address space. After the driver maps the
+ // the shm, calling shm_delete_handle is not needed (but can't harm on the otherhand).
+ // If the ioctl fails, we can't tell if the shm was mapped or not, so we delete it ourself.
auto status = hailo_ioctl(this->m_fd, HAILO_VDMA_BUFFER_MAP, &map_user_buffer_info, err);
if (HAILO_SUCCESS != status) {
LOGGER__ERROR("Failed to map user buffer with errno:{}", err);
+ shm_delete_handle(shm_handle);
return make_unexpected(HAILO_DRIVER_FAIL);
}
return VdmaBufferHandle(map_user_buffer_info.mapped_handle);
}
+#else
+#error "unsupported platform!"
+#endif // __linux__
hailo_status HailoRTDriver::vdma_buffer_unmap(VdmaBufferHandle handle)
{
return HAILO_SUCCESS;
}
-Expected<std::pair<uintptr_t, uint64_t>> HailoRTDriver::descriptors_list_create(size_t desc_count)
+Expected<DescriptorsListInfo> HailoRTDriver::descriptors_list_create(size_t desc_count, bool is_circular)
+{
+ auto handle_to_dma_address_pair = descriptors_list_create_ioctl(desc_count, is_circular);
+ CHECK_EXPECTED(handle_to_dma_address_pair);
+
+ const auto desc_handle = handle_to_dma_address_pair->first;
+ const auto dma_address = handle_to_dma_address_pair->second;
+
+ auto user_address = descriptors_list_create_mmap(desc_handle, desc_count);
+ if (!user_address) {
+ auto status = descriptors_list_release_ioctl(desc_handle);
+ if (HAILO_SUCCESS != status) {
+ LOGGER__ERROR("Failed releasing descriptors list, status {}", status);
+ // continue
+ }
+ return make_unexpected(user_address.status());
+ }
+
+ return DescriptorsListInfo{desc_handle, dma_address, desc_count, user_address.release()};
+}
+
+hailo_status HailoRTDriver::descriptors_list_release(const DescriptorsListInfo &descriptors_list_info)
{
- hailo_desc_list_create_params create_desc_info {.desc_count = desc_count, .desc_handle = 0, .dma_address = 0 };
+ hailo_status status = HAILO_SUCCESS;
+
+ auto unmap_status = descriptors_list_create_munmap(descriptors_list_info.user_address, descriptors_list_info.desc_count);
+ if (HAILO_SUCCESS != unmap_status) {
+ LOGGER__ERROR("Descriptors list unmap failed with {}", unmap_status);
+ status = unmap_status;
+ // continue
+ }
+
+ auto release_status = descriptors_list_release_ioctl(descriptors_list_info.handle);
+ if (HAILO_SUCCESS != release_status) {
+ LOGGER__ERROR("Descriptors list release status failed with {}", release_status);
+ status = release_status;
+ // continue
+ }
+
+ return status;
+}
+
+Expected<std::pair<uintptr_t, uint64_t>> HailoRTDriver::descriptors_list_create_ioctl(size_t desc_count, bool is_circular)
+{
+ hailo_desc_list_create_params create_desc_info{};
+ create_desc_info.desc_count = desc_count;
+ create_desc_info.is_circular = is_circular;
int err = 0;
auto status = hailo_ioctl(this->m_fd, HAILO_DESC_LIST_CREATE, &create_desc_info, err);
return std::make_pair(create_desc_info.desc_handle, create_desc_info.dma_address);
}
-hailo_status HailoRTDriver::descriptors_list_release(uintptr_t desc_handle)
+hailo_status HailoRTDriver::descriptors_list_release_ioctl(uintptr_t desc_handle)
{
int err = 0;
auto status = hailo_ioctl(this->m_fd, HAILO_DESC_LIST_RELEASE, &desc_handle, err);
return HAILO_DRIVER_FAIL;
}
- return HAILO_SUCCESS;
+ return HAILO_SUCCESS;
+}
+
+#if defined(__linux__)
+Expected<void *> HailoRTDriver::descriptors_list_create_mmap(uintptr_t desc_handle, size_t desc_count)
+{
+ const size_t buffer_size = desc_count * SIZE_OF_SINGLE_DESCRIPTOR;
+ void *address = mmap(nullptr, buffer_size, PROT_WRITE | PROT_READ, MAP_SHARED, m_fd, (off_t)desc_handle);
+ if (MAP_FAILED == address) {
+ LOGGER__ERROR("Failed to map descriptors list buffer with errno: {}", errno);
+ return make_unexpected(HAILO_DRIVER_FAIL);
+ }
+ return address;
+}
+
+hailo_status HailoRTDriver::descriptors_list_create_munmap(void *address, size_t desc_count)
+{
+ const size_t buffer_size = desc_count * SIZE_OF_SINGLE_DESCRIPTOR;
+ if (0 != munmap(address, buffer_size)) {
+ LOGGER__ERROR("munmap of address {}, length: {} failed with errno: {}", address, buffer_size, errno);
+ return HAILO_DRIVER_FAIL;
+ }
+ return HAILO_SUCCESS;
}
+#elif defined(__QNX__)
+
+Expected<void *> HailoRTDriver::descriptors_list_create_mmap(uintptr_t desc_handle, size_t desc_count)
+{
+ const size_t buffer_size = desc_count * SIZE_OF_SINGLE_DESCRIPTOR;
+ struct hailo_non_linux_desc_list_mmap_params map_vdma_list_params {
+ .desc_handle = desc_handle,
+ .size = buffer_size,
+ .user_address = nullptr,
+ };
+
+ int err = 0;
+ auto status = HailoRTDriver::hailo_ioctl(m_fd, HAILO_NON_LINUX_DESC_LIST_MMAP, &map_vdma_list_params, err);
+ if (HAILO_SUCCESS != status) {
+ LOGGER__ERROR("Mmap descriptors list ioctl failed with errno:{}", err);
+ return make_unexpected(HAILO_DRIVER_FAIL);
+ }
+
+ void *address = mmap(nullptr, buffer_size, PROT_WRITE | PROT_READ | PROT_NOCACHE, MAP_SHARED | MAP_PHYS, NOFD,
+ (off_t)map_vdma_list_params.user_address);
+ CHECK_AS_EXPECTED(MAP_FAILED != address, HAILO_INTERNAL_FAILURE, "Failed to mmap buffer fd with errno:{}", errno);
+
+ return address;
+}
+
+hailo_status HailoRTDriver::descriptors_list_create_munmap(void *address, size_t desc_count)
+{
+ const size_t buffer_size = desc_count * SIZE_OF_SINGLE_DESCRIPTOR;
+ if (0 != munmap(address, buffer_size)) {
+ LOGGER__ERROR("munmap of address {}, length: {} failed with errno: {}", address, buffer_size, errno);
+ return HAILO_DRIVER_FAIL;
+ }
+ return HAILO_SUCCESS;
+}
+
+#else
+#error "unsupported platform!"
+#endif
+
hailo_status HailoRTDriver::descriptors_list_bind_vdma_buffer(uintptr_t desc_handle, VdmaBufferHandle buffer_handle,
uint16_t desc_page_size, uint8_t channel_index, uint32_t starting_desc)
{
int err = 0;
auto status = hailo_ioctl(this->m_fd, HAILO_VDMA_CONTINUOUS_BUFFER_ALLOC, ¶ms, err);
if (HAILO_SUCCESS != status) {
+ if (ENOMEM == err) {
+ LOGGER__WARN("Failed to allocate continuous buffer, size 0x{:x}. This failure means there is not a sufficient amount of CMA memory",
+ size);
+ return make_unexpected(HAILO_OUT_OF_HOST_CMA_MEMORY);
+ }
LOGGER__ERROR("Failed allocate continuous buffer with errno:{}", err);
return make_unexpected(HAILO_DRIVER_FAIL);
}
--- /dev/null
+cmake_minimum_required(VERSION 3.0.0)
+
+set(files
+ ${CMAKE_CURRENT_SOURCE_DIR}/driver_scan.cpp
+ ${CMAKE_CURRENT_SOURCE_DIR}/event.cpp
+)
+
+set(HAILORT_CPP_SOURCES ${HAILORT_CPP_SOURCES} ${files} PARENT_SCOPE)
--- /dev/null
+/**
+ * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+ **/
+/**
+ * @file driver_scan.cpp
+ * @brief Parse pcie driver sysfs
+ **/
+
+#include "os/driver_scan.hpp"
+#include <stdarg.h>
+#include <dirent.h>
+#include <fstream>
+
+namespace hailort
+{
+
+#define HAILO_CLASS_PATH ("/sys/class/hailo_chardev")
+#define HAILO_BOARD_LOCATION_FILENAME ("board_location")
+
+
+Expected<std::vector<std::string>> list_devices()
+{
+ DIR *dir_iter = opendir(HAILO_CLASS_PATH);
+ if (!dir_iter) {
+ if (ENOENT == errno) {
+ LOGGER__ERROR("Can't find hailo pcie class, this may happen if the driver is not installed (this may happen"
+ " if the kernel was updated), or if there is no connected Hailo board");
+ return make_unexpected(HAILO_PCIE_DRIVER_NOT_INSTALLED);
+ }
+ else {
+ LOGGER__ERROR("Failed to open hailo pcie class ({}), errno {}", HAILO_CLASS_PATH, errno);
+ return make_unexpected(HAILO_DRIVER_FAIL);
+ }
+ }
+
+ std::vector<std::string> devices;
+ struct dirent *dir = nullptr;
+ while ((dir = readdir(dir_iter)) != nullptr) {
+ std::string device_name(dir->d_name);
+ if (device_name == "." || device_name == "..") {
+ continue;
+ }
+ devices.push_back(device_name);
+ }
+
+ closedir(dir_iter);
+ return devices;
+}
+
+Expected<HailoRTDriver::DeviceInfo> query_device_info(const std::string &device_name)
+{
+ const std::string device_id_path = std::string(HAILO_CLASS_PATH) + "/" +
+ device_name + "/" + HAILO_BOARD_LOCATION_FILENAME;
+ std::ifstream device_id_file(device_id_path);
+ CHECK_AS_EXPECTED(device_id_file.good(), HAILO_DRIVER_FAIL, "Failed open {}", device_id_path);
+
+ std::string device_id;
+ std::getline(device_id_file, device_id);
+ CHECK_AS_EXPECTED(device_id_file.eof(), HAILO_DRIVER_FAIL, "Failed read {}", device_id_path);
+
+ HailoRTDriver::DeviceInfo device_info = {};
+ device_info.dev_path = std::string("/dev/") + device_name;
+ device_info.device_id = device_id;
+
+ return device_info;
+}
+
+} /* namespace hailort */
--- /dev/null
+/**
+ * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+ **/
+/**
+ * @file event.cpp
+ * @brief Event & Semaphore wrapper for Unix
+ **/
+
+#include "hailo/hailort.h"
+#include "hailo/event.hpp"
+
+#include "common/utils.hpp"
+#include "common/event_internal.hpp"
+
+#include <sys/eventfd.h>
+#include <poll.h>
+#include <utility>
+
+
+namespace hailort
+{
+
+Waitable::~Waitable()
+{
+ if (-1 != m_handle) {
+ (void) close(m_handle);
+ }
+}
+
+Waitable::Waitable(Waitable&& other) :
+ m_handle(std::exchange(other.m_handle, -1))
+{}
+
+hailo_status Waitable::wait_for_single_object(underlying_waitable_handle_t handle, std::chrono::milliseconds timeout)
+{
+ return eventfd_poll(handle, timeout);
+}
+
+hailo_status Waitable::eventfd_poll(underlying_waitable_handle_t fd, std::chrono::milliseconds timeout)
+{
+ hailo_status status = HAILO_UNINITIALIZED;
+ struct pollfd pfd{};
+ int poll_ret = -1;
+
+ assert(-1 != fd);
+
+ if (UINT32_MAX < timeout.count()) {
+ status = HAILO_INVALID_ARGUMENT;
+ LOGGER__ERROR("Invalid timeout value: {}", timeout.count());
+ goto l_exit;
+ }
+ if (INT_MAX < timeout.count()) {
+ timeout = std::chrono::milliseconds(INT_MAX);
+ }
+
+ pfd.fd = fd;
+ pfd.events = POLLIN;
+ do {
+ poll_ret = poll(&pfd, 1, static_cast<int>(timeout.count()));
+ } while ((0 > poll_ret) && (EINTR == poll_ret));
+
+ if (0 == poll_ret) {
+ LOGGER__TRACE("Timeout");
+ status = HAILO_TIMEOUT;
+ goto l_exit;
+ }
+ if (0 > poll_ret) {
+ LOGGER__ERROR("poll failed with errno={}", errno);
+ status = HAILO_INTERNAL_FAILURE;
+ goto l_exit;
+ }
+ if (0 == (pfd.revents & POLLIN)) {
+ LOGGER__ERROR("pfd not in read state. revents={}", pfd.revents);
+ status = HAILO_INTERNAL_FAILURE;
+ goto l_exit;
+ }
+
+ status = HAILO_SUCCESS;
+l_exit:
+ return status;
+}
+
+hailo_status Waitable::eventfd_read(underlying_waitable_handle_t fd)
+{
+ hailo_status status = HAILO_UNINITIALIZED;
+ ssize_t read_ret = -1;
+ uint64_t dummy;
+
+ assert(-1 != fd);
+
+ read_ret = read(fd, &dummy, sizeof(dummy));
+ if (sizeof(dummy) != read_ret) {
+ LOGGER__ERROR("read failed. bytes_read={}, expected={}, errno={}", read_ret, sizeof(dummy), errno);
+ status = HAILO_INTERNAL_FAILURE;
+ goto l_exit;
+ }
+
+ status = HAILO_SUCCESS;
+l_exit:
+ return status;
+}
+
+hailo_status Waitable::eventfd_write(underlying_waitable_handle_t fd)
+{
+ hailo_status status = HAILO_UNINITIALIZED;
+ ssize_t write_ret = -1;
+ uint64_t buffer = 1;
+
+ assert(-1 != fd);
+
+ write_ret = write(fd, &buffer, sizeof(buffer));
+ if (sizeof(buffer) != write_ret) {
+ LOGGER__ERROR("write failed. bytes_written={}, expected={}, errno={}", write_ret, sizeof(buffer), errno);
+ status = HAILO_INTERNAL_FAILURE;
+ goto l_exit;
+ }
+
+ status = HAILO_SUCCESS;
+l_exit:
+ return status;
+}
+
+Expected<Event> Event::create(const State& initial_state)
+{
+ const auto handle = open_event_handle(initial_state);
+ if (-1 == handle) {
+ return make_unexpected(HAILO_INTERNAL_FAILURE);
+ }
+ return Event(handle);
+}
+
+EventPtr Event::create_shared(const State& initial_state)
+{
+ const auto handle = open_event_handle(initial_state);
+ if (-1 == handle) {
+ return nullptr;
+ }
+
+ return make_shared_nothrow<Event>(handle);
+}
+
+hailo_status Event::signal()
+{
+ return eventfd_write(m_handle);
+}
+
+bool Event::is_auto_reset()
+{
+ return false;
+}
+
+hailo_status Event::reset()
+{
+ if (HAILO_TIMEOUT == wait(std::chrono::seconds(0))) {
+ // Event is not set nothing to do, otherwise `eventfd_read` would block forever
+ return HAILO_SUCCESS;
+ }
+ return eventfd_read(m_handle);
+}
+
+underlying_waitable_handle_t Event::open_event_handle(const State& initial_state)
+{
+ static const int NO_FLAGS = 0;
+ const int state = initial_state == State::signalled ? 1 : 0;
+ const auto handle = eventfd(state, NO_FLAGS);
+ if (-1 == handle) {
+ LOGGER__ERROR("Call to eventfd failed with errno={}", errno);
+ }
+ return handle;
+}
+
+Expected<Semaphore> Semaphore::create(uint32_t initial_count)
+{
+ const auto handle = open_semaphore_handle(initial_count);
+ if (-1 == handle) {
+ return make_unexpected(HAILO_INTERNAL_FAILURE);
+ }
+ return Semaphore(handle);
+}
+
+SemaphorePtr Semaphore::create_shared(uint32_t initial_count)
+{
+ const auto handle = open_semaphore_handle(initial_count);
+ if (-1 == handle) {
+ return nullptr;
+ }
+
+ return make_shared_nothrow<Semaphore>(handle);
+}
+
+hailo_status Semaphore::signal()
+{
+ return eventfd_write(m_handle);
+}
+
+bool Semaphore::is_auto_reset()
+{
+ return true;
+}
+
+hailo_status Semaphore::post_wait()
+{
+ return eventfd_read(m_handle);
+}
+
+underlying_waitable_handle_t Semaphore::open_semaphore_handle(uint32_t initial_count)
+{
+ static const int SEMAPHORE = EFD_SEMAPHORE;
+ const auto handle = eventfd(initial_count, SEMAPHORE);
+ if (-1 == handle) {
+ LOGGER__ERROR("Call to eventfd failed with errno={}", errno);
+ }
+ return handle;
+}
+
+Expected<size_t> WaitableGroup::wait_any(std::chrono::milliseconds timeout)
+{
+ int poll_ret = -1;
+ do {
+ poll_ret = poll(m_waitable_handles.data(), m_waitable_handles.size(), static_cast<int>(timeout.count()));
+ } while ((0 > poll_ret) && (EINTR == poll_ret));
+
+ if (0 == poll_ret) {
+ LOGGER__TRACE("Timeout");
+ return make_unexpected(HAILO_TIMEOUT);
+ }
+ CHECK_AS_EXPECTED(poll_ret > 0, HAILO_INTERNAL_FAILURE, "poll failed with errno={}", errno);
+
+ for (size_t index = 0; index < m_waitable_handles.size(); index++) {
+ if (m_waitable_handles[index].revents & POLLIN) {
+ auto status = m_waitables[index].get().post_wait();
+ CHECK_SUCCESS_AS_EXPECTED(status);
+
+ return index;
+ }
+ }
+
+ LOGGER__ERROR("None of the pollfd are in read state");
+ return make_unexpected(HAILO_INTERNAL_FAILURE);
+}
+
+} /* namespace hailort */
Expected<MmapBufferImpl> MmapBufferImpl::create_file_map(size_t length, FileDescriptor &file, uintptr_t offset)
{
-#ifdef __linux__
void *address = mmap(nullptr, length, PROT_WRITE | PROT_READ, MAP_SHARED, file, (off_t)offset);
CHECK_AS_EXPECTED(INVALID_ADDR != address, HAILO_INTERNAL_FAILURE, "Failed to mmap buffer fd with errno:{}", errno);
-#elif defined(__QNX__)
-
- // TODO change name of struct - using this sturct because itis exact fields we need ro qnx mmap too (where user address is physical addr)
- struct hailo_non_linux_desc_list_mmap_params map_vdma_list_params {
- .desc_handle = offset,
- .size = length,
- .user_address = nullptr,
- };
-
- int err = 0;
- auto status = HailoRTDriver::hailo_ioctl(file, HAILO_NON_LINUX_DESC_LIST_MMAP, &map_vdma_list_params, err);
- if (HAILO_SUCCESS != status) {
- LOGGER__ERROR("HAILO_NON_LINUX_DESC_LIST_MMAP failed with errno:{}", err);
- return make_unexpected(HAILO_DRIVER_FAIL);
- }
+ return MmapBufferImpl(address, length);
+}
- void *address = mmap(nullptr, length, PROT_WRITE | PROT_READ | PROT_NOCACHE, MAP_SHARED | MAP_PHYS, NOFD, (off_t)map_vdma_list_params.user_address);
+#if defined(__QNX__)
+Expected<MmapBufferImpl> MmapBufferImpl::create_file_map_nocache(size_t length, FileDescriptor &file, uintptr_t offset)
+{
+ void *address = mmap(nullptr, length, PROT_WRITE | PROT_READ | PROT_NOCACHE, MAP_SHARED, file, (off_t)offset);
CHECK_AS_EXPECTED(INVALID_ADDR != address, HAILO_INTERNAL_FAILURE, "Failed to mmap buffer fd with errno:{}", errno);
-#else
-#error "unsupported platform!"
-#endif // __linux__
-
return MmapBufferImpl(address, length);
}
+#endif /* defined(__QNX__) */
hailo_status MmapBufferImpl::unmap()
{
--- /dev/null
+cmake_minimum_required(VERSION 3.0.0)
+
+set(files
+ ${CMAKE_CURRENT_SOURCE_DIR}/driver_scan.cpp
+ ${CMAKE_CURRENT_SOURCE_DIR}/event.cpp
+)
+
+set(HAILORT_CPP_SOURCES ${HAILORT_CPP_SOURCES} ${files} PARENT_SCOPE)
#include "hailo/event.hpp"
#include "common/utils.hpp"
-
-#include "utils/event_internal.hpp"
+#include "common/event_internal.hpp"
#include <poll.h>
#include <utility>
namespace hailort
{
-Waitable::Waitable(underlying_waitable_handle_t handle) :
- m_handle(handle)
-{}
-
Waitable::~Waitable()
{
if (INVALID_EVENT_HANDLE != m_handle) {
m_handle(std::exchange(other.m_handle, INVALID_EVENT_HANDLE))
{}
-underlying_waitable_handle_t Waitable::get_underlying_handle()
-{
- return m_handle;
-}
-
hailo_status Waitable::wait_for_single_object(underlying_waitable_handle_t handle, std::chrono::milliseconds timeout)
{
const size_t timeout_ms = (timeout.count() > INT_MAX) ? INT_MAX : static_cast<size_t>(timeout.count());
return make_shared_nothrow<Event>(handle);
}
-hailo_status Event::wait(std::chrono::milliseconds timeout)
-{
- return wait_for_single_object(m_handle, timeout);
-}
-
hailo_status Event::signal()
{
const auto result = neosmart::SetEvent(m_handle);
return make_shared_nothrow<Semaphore>(handle, initial_count);
}
-hailo_status Semaphore::wait(std::chrono::milliseconds timeout)
-{
- auto wait_result = wait_for_single_object(m_handle, timeout);
- if (HAILO_SUCCESS == wait_result) {
- m_sem_mutex.lock();
- if (0 == m_count.load()) {
- LOGGER__ERROR("Waiting on semaphore with 0 value");
- }
- if (m_count > 0) {
- m_count--;
- }
- // After decrementing the value of the semaphore - check if the new value is bigger than 0 and if it is signal the event
- if (m_count > 0) {
- neosmart::SetEvent(m_handle);
- }
- m_sem_mutex.unlock();
- }
-
- return wait_result;
-}
-
hailo_status Semaphore::signal()
{
m_sem_mutex.lock();
other.m_sem_mutex.unlock();
}
-WaitOrShutdown::WaitOrShutdown(WaitablePtr waitable, EventPtr shutdown_event) :
- m_waitable(waitable),
- m_shutdown_event(shutdown_event),
- m_wait_handle_array(create_wait_handle_array(waitable, shutdown_event))
-{}
+hailo_status Semaphore::post_wait()
+{
+ std::unique_lock<std::mutex> lock(m_sem_mutex);
+ CHECK(m_count.load() > 0, HAILO_INTERNAL_FAILURE, "Wait returned on semaphore with 0 value");
-void Event::post_wait()
-{}
+ m_count--;
-void Semaphore::post_wait(){
- m_sem_mutex.lock();
- if (0 == m_count.load()) {
- LOGGER__ERROR("Wait Returned on semaphore with 0 value");
- }
- if (m_count > 0) {
- m_count--;
- }
// After decrementing the value of the semaphore - check if the new value is bigger than 0 and if it is signal the event
if (m_count > 0) {
neosmart::SetEvent(m_handle);
}
- m_sem_mutex.unlock();
+
+ return HAILO_SUCCESS;
}
-hailo_status WaitOrShutdown::wait(std::chrono::milliseconds timeout)
+Expected<size_t> WaitableGroup::wait_any(std::chrono::milliseconds timeout)
{
int wait_index = -1;
const uint64_t timeout_ms = (timeout.count() > INT_MAX) ? INT_MAX : static_cast<uint64_t>(timeout.count());
- const auto wait_result = neosmart::WaitForMultipleEvents(m_wait_handle_array.data(), static_cast<int>(m_wait_handle_array.size()),
- false, timeout_ms, wait_index);
- // If semaphore need to subtract from counter
+ const bool WAIT_FOR_ANY = false;
+ const auto wait_result = neosmart::WaitForMultipleEvents(m_waitable_handles.data(),
+ static_cast<int>(m_waitable_handles.size()), WAIT_FOR_ANY, timeout_ms, wait_index);
if (0 != wait_result) {
if (ETIMEDOUT == wait_result) {
- return HAILO_TIMEOUT;
+ return make_unexpected(HAILO_TIMEOUT);
} else {
LOGGER__ERROR("WaitForMultipleEvents Failed, error: {}", wait_result);
- return HAILO_INTERNAL_FAILURE;
+ return make_unexpected(HAILO_INTERNAL_FAILURE);
}
}
-
- if (WAITABLE_INDEX == wait_index) {
- // Meaning it can be a semaphore object
- m_waitable->post_wait();
- return HAILO_SUCCESS;
- } else if (SHUTDOWN_INDEX == wait_index) {
- return HAILO_SHUTDOWN_EVENT_SIGNALED;
- } else {
- LOGGER__ERROR("Invalid event index signalled in WaitForMultipleEventsFailed, index: {}", wait_index);
- return HAILO_INTERNAL_FAILURE;
- }
-}
-hailo_status WaitOrShutdown::signal()
-{
- return m_waitable->signal();
-}
+ auto status = m_waitables[wait_index].get().post_wait();
+ CHECK_SUCCESS_AS_EXPECTED(status);
-WaitOrShutdown::WaitHandleArray WaitOrShutdown::create_wait_handle_array(WaitablePtr waitable, EventPtr shutdown_event)
-{
- // Note the order!
- WaitHandleArray handles{
- shutdown_event->get_underlying_handle(),
- waitable->get_underlying_handle()
- };
- return handles;
+ return wait_index;
}
} /* namespace hailort */
+++ /dev/null
-/**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
- * Distributed under the MIT license (https://opensource.org/licenses/MIT)
- **/
-/**
- * @file driver_scan.cpp
- * @brief Parse pcie driver sysfs
- **/
-
-#include "os/driver_scan.hpp"
-#include <stdarg.h>
-#include <dirent.h>
-#include <fstream>
-
-namespace hailort
-{
-
-#define HAILO_CLASS_PATH ("/sys/class/hailo_chardev")
-#define HAILO_BOARD_LOCATION_FILENAME ("board_location")
-
-
-Expected<std::vector<std::string>> list_devices()
-{
- DIR *dir_iter = opendir(HAILO_CLASS_PATH);
- if (!dir_iter) {
- if (ENOENT == errno) {
- LOGGER__ERROR("Can't find hailo pcie class, this may happen if the driver is not installed (this may happen"
- " if the kernel was updated), or if there is no connected Hailo board");
- return make_unexpected(HAILO_PCIE_DRIVER_NOT_INSTALLED);
- }
- else {
- LOGGER__ERROR("Failed to open hailo pcie class ({}), errno {}", HAILO_CLASS_PATH, errno);
- return make_unexpected(HAILO_DRIVER_FAIL);
- }
- }
-
- std::vector<std::string> devices;
- struct dirent *dir = nullptr;
- while ((dir = readdir(dir_iter)) != nullptr) {
- std::string device_name(dir->d_name);
- if (device_name == "." || device_name == "..") {
- continue;
- }
- devices.push_back(device_name);
- }
-
- closedir(dir_iter);
- return devices;
-}
-
-Expected<HailoRTDriver::DeviceInfo> query_device_info(const std::string &device_name)
-{
- const std::string device_id_path = std::string(HAILO_CLASS_PATH) + "/" +
- device_name + "/" + HAILO_BOARD_LOCATION_FILENAME;
- std::ifstream device_id_file(device_id_path);
- CHECK_AS_EXPECTED(device_id_file.good(), HAILO_DRIVER_FAIL, "Failed open {}", device_id_path);
-
- std::string device_id;
- std::getline(device_id_file, device_id);
- CHECK_AS_EXPECTED(device_id_file.eof(), HAILO_DRIVER_FAIL, "Failed read {}", device_id_path);
-
- HailoRTDriver::DeviceInfo device_info = {};
- device_info.dev_path = std::string("/dev/") + device_name;
- device_info.device_id = device_id;
-
- return device_info;
-}
-
-} /* namespace hailort */
+++ /dev/null
-/**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
- * Distributed under the MIT license (https://opensource.org/licenses/MIT)
- **/
-/**
- * @file event.cpp
- * @brief Event & Semaphore wrapper for Unix
- *
- * TODO: doc
- **/
-
-#include "hailo/hailort.h"
-#include "hailo/event.hpp"
-
-#include "common/utils.hpp"
-
-#include "utils/event_internal.hpp"
-
-#include <sys/eventfd.h>
-#include <poll.h>
-#include <utility>
-
-
-namespace hailort
-{
-
-Waitable::Waitable(underlying_waitable_handle_t handle) :
- m_handle(handle)
-{}
-
-Waitable::~Waitable()
-{
- if (-1 != m_handle) {
- (void) close(m_handle);
- }
-}
-
-Waitable::Waitable(Waitable&& other) :
- m_handle(std::exchange(other.m_handle, -1))
-{}
-
-underlying_waitable_handle_t Waitable::get_underlying_handle()
-{
- return m_handle;
-}
-
-hailo_status Waitable::eventfd_poll(underlying_waitable_handle_t fd, std::chrono::milliseconds timeout)
-{
- hailo_status status = HAILO_UNINITIALIZED;
- struct pollfd pfd{};
- int poll_ret = -1;
-
- assert(-1 != fd);
-
- if (UINT32_MAX < timeout.count()) {
- status = HAILO_INVALID_ARGUMENT;
- LOGGER__ERROR("Invalid timeout value: {}", timeout.count());
- goto l_exit;
- }
- if (INT_MAX < timeout.count()) {
- timeout = std::chrono::milliseconds(INT_MAX);
- }
-
- pfd.fd = fd;
- pfd.events = POLLIN;
- do {
- poll_ret = poll(&pfd, 1, static_cast<int>(timeout.count()));
- } while ((0 > poll_ret) && (EINTR == poll_ret));
-
- if (0 == poll_ret) {
- LOGGER__TRACE("Timeout");
- status = HAILO_TIMEOUT;
- goto l_exit;
- }
- if (0 > poll_ret) {
- LOGGER__ERROR("poll failed with errno={}", errno);
- status = HAILO_INTERNAL_FAILURE;
- goto l_exit;
- }
- if (0 == (pfd.revents & POLLIN)) {
- LOGGER__ERROR("pfd not in read state. revents={}", pfd.revents);
- status = HAILO_INTERNAL_FAILURE;
- goto l_exit;
- }
-
- status = HAILO_SUCCESS;
-l_exit:
- return status;
-}
-
-hailo_status Waitable::eventfd_read(underlying_waitable_handle_t fd)
-{
- hailo_status status = HAILO_UNINITIALIZED;
- ssize_t read_ret = -1;
- uint64_t dummy;
-
- assert(-1 != fd);
-
- read_ret = read(fd, &dummy, sizeof(dummy));
- if (sizeof(dummy) != read_ret) {
- LOGGER__ERROR("read failed. bytes_read={}, expected={}, errno={}", read_ret, sizeof(dummy), errno);
- status = HAILO_INTERNAL_FAILURE;
- goto l_exit;
- }
-
- status = HAILO_SUCCESS;
-l_exit:
- return status;
-}
-
-hailo_status Waitable::eventfd_write(underlying_waitable_handle_t fd)
-{
- hailo_status status = HAILO_UNINITIALIZED;
- ssize_t write_ret = -1;
- uint64_t buffer = 1;
-
- assert(-1 != fd);
-
- write_ret = write(fd, &buffer, sizeof(buffer));
- if (sizeof(buffer) != write_ret) {
- LOGGER__ERROR("write failed. bytes_written={}, expected={}, errno={}", write_ret, sizeof(buffer), errno);
- status = HAILO_INTERNAL_FAILURE;
- goto l_exit;
- }
-
- status = HAILO_SUCCESS;
-l_exit:
- return status;
-}
-
-Expected<Event> Event::create(const State& initial_state)
-{
- const auto handle = open_event_handle(initial_state);
- if (-1 == handle) {
- return make_unexpected(HAILO_INTERNAL_FAILURE);
- }
- return Event(handle);
-}
-
-EventPtr Event::create_shared(const State& initial_state)
-{
- const auto handle = open_event_handle(initial_state);
- if (-1 == handle) {
- return nullptr;
- }
-
- return make_shared_nothrow<Event>(handle);
-}
-
-hailo_status Event::wait(std::chrono::milliseconds timeout)
-{
- return eventfd_poll(m_handle, timeout);
-}
-
-hailo_status Event::signal()
-{
- return eventfd_write(m_handle);
-}
-
-bool Event::is_auto_reset()
-{
- return false;
-}
-
-hailo_status Event::reset()
-{
- if (HAILO_TIMEOUT == wait(std::chrono::seconds(0))) {
- // Event is not set nothing to do, otherwise `eventfd_read` would block forever
- return HAILO_SUCCESS;
- }
- return eventfd_read(m_handle);
-}
-
-underlying_waitable_handle_t Event::open_event_handle(const State& initial_state)
-{
- static const int NO_FLAGS = 0;
- const int state = initial_state == State::signalled ? 1 : 0;
- const auto handle = eventfd(state, NO_FLAGS);
- if (-1 == handle) {
- LOGGER__ERROR("Call to eventfd failed with errno={}", errno);
- }
- return handle;
-}
-
-Expected<Semaphore> Semaphore::create(uint32_t initial_count)
-{
- const auto handle = open_semaphore_handle(initial_count);
- if (-1 == handle) {
- return make_unexpected(HAILO_INTERNAL_FAILURE);
- }
- return Semaphore(handle);
-}
-
-SemaphorePtr Semaphore::create_shared(uint32_t initial_count)
-{
- const auto handle = open_semaphore_handle(initial_count);
- if (-1 == handle) {
- return nullptr;
- }
-
- return make_shared_nothrow<Semaphore>(handle);
-}
-
-hailo_status Semaphore::wait(std::chrono::milliseconds timeout)
-{
- // TODO: See SDK-16568 (might be necessary in the future)
- hailo_status status = eventfd_poll(m_handle, timeout);
- if (HAILO_TIMEOUT == status) {
- LOGGER__INFO("eventfd_poll failed, status = {}", status);
- return status;
- }
- CHECK_SUCCESS(status);
-
- status = eventfd_read(m_handle);
- CHECK_SUCCESS(status);
-
- return HAILO_SUCCESS;
-}
-
-hailo_status Semaphore::signal()
-{
- return eventfd_write(m_handle);
-}
-
-bool Semaphore::is_auto_reset()
-{
- return true;
-}
-
-underlying_waitable_handle_t Semaphore::open_semaphore_handle(uint32_t initial_count)
-{
- static const int SEMAPHORE = EFD_SEMAPHORE;
- const auto handle = eventfd(initial_count, SEMAPHORE);
- if (-1 == handle) {
- LOGGER__ERROR("Call to eventfd failed with errno={}", errno);
- }
- return handle;
-}
-
-WaitOrShutdown::WaitOrShutdown(WaitablePtr waitable, EventPtr shutdown_event) :
- m_waitable(waitable),
- m_shutdown_event(shutdown_event),
- m_wait_handle_array(create_wait_handle_array(waitable, shutdown_event))
-{}
-
-hailo_status WaitOrShutdown::wait(std::chrono::milliseconds timeout)
-{
- int poll_ret = -1;
- do {
- poll_ret = poll(m_wait_handle_array.data(), m_wait_handle_array.size(), static_cast<int>(timeout.count()));
- } while ((0 > poll_ret) && (EINTR == poll_ret));
-
- if (0 == poll_ret) {
- LOGGER__TRACE("Timeout");
- return HAILO_TIMEOUT;
- }
- if (0 > poll_ret) {
- LOGGER__ERROR("poll failed with errno={}", errno);
- return HAILO_INTERNAL_FAILURE;
- }
- if ((0 == (m_wait_handle_array[WAITABLE_INDEX].revents & POLLIN)) &&
- (0 == (m_wait_handle_array[SHUTDOWN_INDEX].revents & POLLIN))) {
- LOGGER__ERROR("Both pfds not in read state: waitable.revents={}, shutdown.revents={}",
- m_wait_handle_array[WAITABLE_INDEX].revents, m_wait_handle_array[SHUTDOWN_INDEX].revents);
- return HAILO_INTERNAL_FAILURE;
- }
-
- if (m_wait_handle_array[SHUTDOWN_INDEX].revents & POLLIN) {
- return HAILO_SHUTDOWN_EVENT_SIGNALED;
- }
-
- if (m_waitable->is_auto_reset() && (m_wait_handle_array[WAITABLE_INDEX].revents & POLLIN)) {
- uint64_t dummy;
- ssize_t read_ret = read(m_wait_handle_array[WAITABLE_INDEX].fd, &dummy, sizeof(dummy));
- if (sizeof(dummy) != read_ret) {
- LOGGER__ERROR("read failed. bytes_read={}, expected={}, errno={}", read_ret, sizeof(dummy), errno);
- return HAILO_INTERNAL_FAILURE;
- }
- }
-
- return HAILO_SUCCESS;
-}
-
-hailo_status WaitOrShutdown::signal()
-{
- return m_waitable->signal();
-}
-
-WaitOrShutdown::WaitHandleArray WaitOrShutdown::create_wait_handle_array(WaitablePtr waitable, EventPtr shutdown_event)
-{
- // Note the order!
- WaitHandleArray pfds{{
- {shutdown_event->get_underlying_handle(), POLLIN, 0},
- {waitable->get_underlying_handle(), POLLIN, 0}
- }};
- return pfds;
-}
-
-} /* namespace hailort */
--- /dev/null
+cmake_minimum_required(VERSION 3.0.0)
+
+set(files
+ ${CMAKE_CURRENT_SOURCE_DIR}/microsec_timer.cpp
+ ${CMAKE_CURRENT_SOURCE_DIR}/file_descriptor.cpp
+ ${CMAKE_CURRENT_SOURCE_DIR}/mmap_buffer.cpp
+ ${CMAKE_CURRENT_SOURCE_DIR}/hailort_driver.cpp
+ ${CMAKE_CURRENT_SOURCE_DIR}/event.cpp
+ ${CMAKE_CURRENT_SOURCE_DIR}/driver_scan.cpp
+ ${CMAKE_CURRENT_SOURCE_DIR}/virtual_alloc_guard.cpp
+)
+
+set(HAILORT_CPP_SOURCES ${HAILORT_CPP_SOURCES} ${files} PARENT_SCOPE)
&guid,
NULL,
CM_GET_DEVICE_INTERFACE_LIST_PRESENT);
- CHECK_AS_EXPECTED(cr == CR_SUCCESS && len >= 2, HAILO_PCIE_DRIVER_NOT_INSTALLED,
+ CHECK_AS_EXPECTED((cr == CR_SUCCESS) && (len > 0), HAILO_PCIE_DRIVER_NOT_INSTALLED,
"Driver interface not found error {}", cr);
std::string names_str;
#include "hailo/event.hpp"
#include "common/utils.hpp"
-
-#include "utils/event_internal.hpp"
+#include "common/event_internal.hpp"
#include <utility>
#include <limits>
namespace hailort
{
-Waitable::Waitable(underlying_waitable_handle_t handle) :
- m_handle(handle)
-{}
-
Waitable::~Waitable()
{
if (nullptr != m_handle) {
m_handle(std::exchange(other.m_handle, nullptr))
{}
-underlying_waitable_handle_t Waitable::get_underlying_handle()
-{
- return m_handle;
-}
-
static DWORD timeout_millies(long long value)
{
DWORD millies = static_cast<DWORD>(value);
return make_shared_nothrow<Event>(handle);
}
-hailo_status Event::wait(std::chrono::milliseconds timeout)
-{
- return wait_for_single_object(m_handle, timeout);
-}
-
hailo_status Event::signal()
{
const auto result = SetEvent(m_handle);
return make_shared_nothrow<Semaphore>(handle);
}
-hailo_status Semaphore::wait(std::chrono::milliseconds timeout)
-{
- return wait_for_single_object(m_handle, timeout);
-}
-
hailo_status Semaphore::signal()
{
static const LONG INCREMENT_BY_ONE = 1;
return true;
}
+hailo_status Semaphore::post_wait()
+{
+ // On windows, after wait on semaphore the counters decrease automatically.
+ return HAILO_SUCCESS;
+}
+
underlying_waitable_handle_t Semaphore::open_semaphore_handle(uint32_t initial_count)
{
static const LPSECURITY_ATTRIBUTES NO_INHERITANCE = nullptr;
return handle;
}
-WaitOrShutdown::WaitOrShutdown(WaitablePtr waitable, EventPtr shutdown_event) :
- m_waitable(waitable),
- m_shutdown_event(shutdown_event),
- m_wait_handle_array(create_wait_handle_array(waitable, shutdown_event))
-{}
-
-hailo_status WaitOrShutdown::wait(std::chrono::milliseconds timeout)
+Expected<size_t> WaitableGroup::wait_any(std::chrono::milliseconds timeout)
{
DWORD wait_millies = timeout_millies(timeout.count());
- static const BOOL WAIT_FOR_ANY = false;
- const auto wait_result = WaitForMultipleObjects(static_cast<DWORD>(m_wait_handle_array.size()),
- m_wait_handle_array.data(), WAIT_FOR_ANY, wait_millies);
- switch (wait_result) {
- case WAIT_OBJECT_0 + WAITABLE_INDEX:
- return HAILO_SUCCESS;
- case WAIT_OBJECT_0 + SHUTDOWN_INDEX:
- return HAILO_SHUTDOWN_EVENT_SIGNALED;
- case WAIT_TIMEOUT:
- return HAILO_TIMEOUT;
- default:
- LOGGER__ERROR("WaitForMultipleObjects returned {}, last_error={}", wait_result, GetLastError());
- return HAILO_INTERNAL_FAILURE;
+ const auto WAIT_OBJECT_N = WAIT_OBJECT_0 + m_waitable_handles.size();
+ const bool WAIT_FOR_ANY = false;
+ const auto wait_result = WaitForMultipleObjects(static_cast<DWORD>(m_waitable_handles.size()),
+ m_waitable_handles.data(), WAIT_FOR_ANY, wait_millies);
+ if (wait_result == WAIT_TIMEOUT) {
+ return make_unexpected(HAILO_TIMEOUT);
+ } else if ((wait_result >= WAIT_OBJECT_0) && (wait_result < WAIT_OBJECT_N)) {
+ // Object is signaled.
+ // Note! On windows there is no need to call post_wait() because it is done automatically.
+ return wait_result - WAIT_OBJECT_0;
+ } else {
+ LOGGER__ERROR("WaitForMultipleObjects returned {}, last_error={}", wait_result, GetLastError());
+ return make_unexpected(HAILO_INTERNAL_FAILURE);
}
}
-hailo_status WaitOrShutdown::signal()
-{
- return m_waitable->signal();
-}
-
-WaitOrShutdown::WaitHandleArray WaitOrShutdown::create_wait_handle_array(WaitablePtr waitable, EventPtr shutdown_event)
-{
- // Note the order!
- WaitHandleArray handles{
- shutdown_event->get_underlying_handle(),
- waitable->get_underlying_handle()
- };
- return handles;
-}
-
} /* namespace hailort */
return HAILO_SUCCESS;
}
-HailoRTDriver::HailoRTDriver(const std::string &dev_path, FileDescriptor &&fd, hailo_status &status) :
+HailoRTDriver::HailoRTDriver(const DeviceInfo &device_info, FileDescriptor &&fd, hailo_status &status) :
m_fd(std::move(fd)),
- m_dev_path(dev_path),
+ m_device_info(device_info),
m_allocate_driver_buffer(false)
{
tCompatibleHailoIoctlData data = {};
return devices_info;
}
-Expected<HailoRTDriver> HailoRTDriver::create(const std::string &dev_path)
+Expected<HailoRTDriver> HailoRTDriver::create(const DeviceInfo &device_info)
{
hailo_status status = HAILO_UNINITIALIZED;
- CDeviceFile f(dev_path);
+ CDeviceFile f(device_info.dev_path);
if (!f.Present()) {
- LOGGER__ERROR("Failed to open board {}", dev_path);
+ LOGGER__ERROR("Failed to open board {}", device_info.dev_path);
return make_unexpected(HAILO_OPEN_FILE_FAILURE);
}
FileDescriptor fd(f.Detach());
- HailoRTDriver platform(dev_path, std::move(fd), status);
+ HailoRTDriver platform(device_info, std::move(fd), status);
if (HAILO_SUCCESS != status) {
return make_unexpected(status);
}
return HAILO_SUCCESS;
}
-hailo_status HailoRTDriver::vdma_buffer_sync(VdmaBufferHandle handle, DmaDirection sync_direction, size_t offset, size_t count)
+hailo_status HailoRTDriver::vdma_buffer_sync(VdmaBufferHandle handle, DmaSyncDirection sync_direction,
+ size_t offset, size_t count)
{
- CHECK(sync_direction != DmaDirection::BOTH, HAILO_INVALID_ARGUMENT, "Can't sync vdma data both host and device");
tCompatibleHailoIoctlData data = {};
hailo_vdma_buffer_sync_params& sync_info = data.Buffer.VdmaBufferSync;
sync_info.handle = handle;
- sync_info.sync_type = (sync_direction == DmaDirection::H2D) ? HAILO_SYNC_FOR_DEVICE : HAILO_SYNC_FOR_HOST;
+ sync_info.sync_type = (sync_direction == DmaSyncDirection::TO_HOST) ? HAILO_SYNC_FOR_CPU : HAILO_SYNC_FOR_DEVICE;
sync_info.offset = offset;
sync_info.count = count;
if (0 > ioctl(this->m_fd, HAILO_VDMA_BUFFER_SYNC, &data)) {
return HAILO_SUCCESS;
}
-Expected<std::pair<uintptr_t, uint64_t>> HailoRTDriver::descriptors_list_create(size_t desc_count)
+Expected<DescriptorsListInfo> HailoRTDriver::descriptors_list_create(size_t desc_count, bool is_circular)
+{
+ auto handle_to_dma_address_pair = descriptors_list_create_ioctl(desc_count, is_circular);
+ CHECK_EXPECTED(handle_to_dma_address_pair);
+
+ const auto desc_handle = handle_to_dma_address_pair->first;
+ const auto dma_address = handle_to_dma_address_pair->second;
+
+ auto user_address = descriptors_list_create_mmap(desc_handle, desc_count);
+ if (!user_address) {
+ auto status = descriptors_list_release_ioctl(desc_handle);
+ if (HAILO_SUCCESS != status) {
+ LOGGER__ERROR("Failed releasing descriptors list, status {}", status);
+ // continue
+ }
+ return make_unexpected(user_address.status());
+ }
+
+ return DescriptorsListInfo{desc_handle, dma_address, desc_count, user_address.release()};
+}
+
+hailo_status HailoRTDriver::descriptors_list_release(const DescriptorsListInfo &descriptors_list_info)
+{
+ hailo_status status = HAILO_SUCCESS;
+
+ auto unmap_status = descriptors_list_create_munmap(descriptors_list_info.user_address, descriptors_list_info.desc_count);
+ if (HAILO_SUCCESS != unmap_status) {
+ LOGGER__ERROR("Descriptors list unmap failed with {}", unmap_status);
+ status = unmap_status;
+ // continue
+ }
+
+ auto release_status = descriptors_list_release_ioctl(descriptors_list_info.handle);
+ if (HAILO_SUCCESS != release_status) {
+ LOGGER__ERROR("Descriptors list release status failed with {}", release_status);
+ status = release_status;
+ // continue
+ }
+
+ return status;
+}
+
+Expected<std::pair<uintptr_t, uint64_t>> HailoRTDriver::descriptors_list_create_ioctl(size_t desc_count, bool is_circular)
{
tCompatibleHailoIoctlData data = {};
hailo_desc_list_create_params& create_desc_info = data.Buffer.DescListCreate;
create_desc_info.desc_count = desc_count;
- create_desc_info.desc_handle = 0;
- create_desc_info.dma_address = 0;
+ create_desc_info.is_circular = is_circular;
if (0 > ioctl(this->m_fd, HAILO_DESC_LIST_CREATE, &data)) {
LOGGER__ERROR("Failed to create descriptors list with errno: {}", errno);
return std::move(std::make_pair(create_desc_info.desc_handle, create_desc_info.dma_address));
}
-hailo_status HailoRTDriver::descriptors_list_release(uintptr_t desc_handle)
+hailo_status HailoRTDriver::descriptors_list_release_ioctl(uintptr_t desc_handle)
{
tCompatibleHailoIoctlData data = {};
- uintptr_t& release_desc_info = data.Buffer.DescListReleaseParam;
+ uintptr_t& release_desc_info = data.Buffer.DescListReleaseParam;
release_desc_info = desc_handle;
if (0 > ioctl(this->m_fd, HAILO_DESC_LIST_RELEASE, &data)) {
LOGGER__ERROR("Failed to release descriptors list with errno: {}", errno);
return HAILO_SUCCESS;
}
+Expected<void *> HailoRTDriver::descriptors_list_create_mmap(uintptr_t desc_handle, size_t desc_count)
+{
+ tCompatibleHailoIoctlData data = {};
+ data.Buffer.DescListMmap.desc_handle = desc_handle;
+ data.Buffer.DescListMmap.size = desc_count * SIZE_OF_SINGLE_DESCRIPTOR;
+ if (0 > ioctl(m_fd, HAILO_NON_LINUX_DESC_LIST_MMAP, &data)) {
+ LOGGER__ERROR("Failed to map physical memory with errno: {}", errno);
+ return make_unexpected(HAILO_DRIVER_FAIL);
+ }
+
+ void *user_address = data.Buffer.DescListMmap.user_address;
+ return user_address;
+}
+
+hailo_status HailoRTDriver::descriptors_list_create_munmap(void *, size_t )
+{
+ // On windows, the unmap is done on the release ioctl
+ return HAILO_SUCCESS;
+}
+
hailo_status HailoRTDriver::descriptors_list_bind_vdma_buffer(uintptr_t desc_handle, VdmaBufferHandle buffer_handle,
uint16_t desc_page_size, uint8_t channel_index, uint32_t starting_desc)
{
return HAILO_NOT_IMPLEMENTED;
}
-Expected<MmapBufferImpl> MmapBufferImpl::create_file_map(size_t length, FileDescriptor &file, uintptr_t offset)
-{
- tCompatibleHailoIoctlData data = {};
- data.Buffer.DescListMmap.desc_handle = offset;
- data.Buffer.DescListMmap.size = length;
- if (0 > ioctl(file, HAILO_NON_LINUX_DESC_LIST_MMAP, &data)) {
- LOGGER__ERROR("Failed to map physical memory with errno: {}", errno);
- return make_unexpected(HAILO_DRIVER_FAIL);
- }
- // this mapping will be deleted automatically with the physical allocation
- return MmapBufferImpl(data.Buffer.DescListMmap.user_address, length, false);
-}
-
Expected<uintptr_t> HailoRTDriver::vdma_low_memory_buffer_alloc(size_t size) {
(void) size;
return make_unexpected(HAILO_INVALID_OPERATION);
void * const MmapBufferImpl::INVALID_ADDR = NULL;
-Expected<MmapBufferImpl> MmapBufferImpl::create_shared_memory(size_t length)
+Expected<MmapBufferImpl> MmapBufferImpl::create_shared_memory(size_t)
{
- void *address = VirtualAlloc(NULL, length, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
- CHECK_AS_EXPECTED(INVALID_ADDR != address, HAILO_OUT_OF_HOST_MEMORY, "Failed to mmap buffer with error:{}", GetLastError());
- return MmapBufferImpl(address, length, true);
+ LOGGER__ERROR("Creating shared memory is not implemented on windows");
+ return make_unexpected(HAILO_NOT_IMPLEMENTED);
+}
+
+Expected<MmapBufferImpl> MmapBufferImpl::create_file_map(size_t, FileDescriptor &, uintptr_t )
+{
+ LOGGER__ERROR("Creating file mapping is not implemented on windows");
+ return make_unexpected(HAILO_NOT_IMPLEMENTED);
}
hailo_status MmapBufferImpl::unmap()
{
- if (m_unmappable) {
- VirtualFree(m_address, m_length, MEM_RELEASE);
- }
- return HAILO_SUCCESS;
+ LOGGER__ERROR("Unmapping is not implemented on windows");
+ return HAILO_NOT_IMPLEMENTED;
}
} /* namespace hailort */
--- /dev/null
+/**
+ * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+**/
+/**
+ * @file virtual_alloc_guard.cpp
+ * @brief Guard object for VirtualAlloc and VirtualFree
+ **/
+
+#include "os/windows/virtual_alloc_guard.hpp"
+#include "common/logger_macros.hpp"
+#include "common/utils.hpp"
+
+namespace hailort
+{
+
+Expected<VirtualAllocGuard> VirtualAllocGuard::create(size_t size)
+{
+ hailo_status status = HAILO_UNINITIALIZED;
+ VirtualAllocGuard guard(size, status);
+ CHECK_SUCCESS_AS_EXPECTED(status);
+ return guard;
+}
+
+VirtualAllocGuard::VirtualAllocGuard(size_t size, hailo_status &status) :
+ m_address(VirtualAlloc(NULL, size, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE)),
+ m_size(size)
+{
+ if (nullptr == m_address) {
+ status = HAILO_OUT_OF_HOST_MEMORY;
+ return;
+ }
+
+ status = HAILO_SUCCESS;
+}
+
+VirtualAllocGuard::~VirtualAllocGuard()
+{
+ if (nullptr != m_address) {
+ // From msdn - when passing MEM_RELEASE to VirtualFree, 0 must be passed as size.
+ static constexpr size_t ZERO_SIZE = 0;
+ if (!VirtualFree(m_address, ZERO_SIZE, MEM_RELEASE)) {
+ LOGGER__ERROR("VirtualFree failed with error {}", GetLastError());
+ }
+ }
+}
+
+} /* namespace hailort */
--- /dev/null
+/**
+ * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+**/
+/**
+ * @file virtual_alloc_guard.hpp
+ * @brief Guard object for VirtualAlloc and VirtualFree (only for windows os).
+ **/
+
+#ifndef _HAILO_VIRTUAL_ALLOC_GUARD_HPP_
+#define _HAILO_VIRTUAL_ALLOC_GUARD_HPP_
+
+#include "hailo/expected.hpp"
+
+#include <utility>
+
+namespace hailort
+{
+
+class VirtualAllocGuard final {
+public:
+ static Expected<VirtualAllocGuard> create(size_t size);
+ ~VirtualAllocGuard();
+
+ VirtualAllocGuard(const VirtualAllocGuard &other) = delete;
+ VirtualAllocGuard &operator=(const VirtualAllocGuard &other) = delete;
+ VirtualAllocGuard(VirtualAllocGuard &&other) :
+ m_address(std::exchange(other.m_address, nullptr)),
+ m_size(other.m_size)
+ {}
+ VirtualAllocGuard &operator=(VirtualAllocGuard &&other) = delete;
+
+ void *address() { return m_address; }
+ size_t size() const { return m_size; }
+
+private:
+ VirtualAllocGuard(size_t size, hailo_status &status);
+
+ void *m_address;
+ const size_t m_size;
+};
+
+} /* namespace hailort */
+
+#endif /* _HAILO_VIRTUAL_ALLOC_GUARD_HPP_ */
keepalive_Request request;
request.set_pid(pid);
empty reply;
- grpc::ClientContext context;
+ ClientContextWithTimeout context;
grpc::Status status = m_stub->client_keep_alive(&context, request, &reply);
CHECK_GRPC_STATUS(status);
return HAILO_SUCCESS;
{
get_service_version_Request request;
get_service_version_Reply reply;
- grpc::ClientContext context;
+ ClientContextWithTimeout context;
grpc::Status status = m_stub->get_service_version(&context, request, &reply);
CHECK_GRPC_STATUS_AS_EXPECTED(status);
assert(reply.status() < HAILO_STATUS_COUNT);
proto_vdevice_params->set_group_id(params.group_id == nullptr ? "" : std::string(params.group_id));
VDevice_create_Reply reply;
- grpc::ClientContext context;
+ ClientContextWithTimeout context;
grpc::Status status = m_stub->VDevice_create(&context, request, &reply);
CHECK_GRPC_STATUS_AS_EXPECTED(status);
assert(reply.status() < HAILO_STATUS_COUNT);
request.set_pid(pid);
request.set_handle(handle);
dup_handle_Reply reply;
- grpc::ClientContext context;
+ ClientContextWithTimeout context;
grpc::Status status = m_stub->VDevice_dup_handle(&context, request, &reply);
CHECK_GRPC_STATUS_AS_EXPECTED(status);
return reply.handle();
}
-hailo_status HailoRtRpcClient::VDevice_release(uint32_t handle)
+hailo_status HailoRtRpcClient::VDevice_release(uint32_t handle, uint32_t pid)
{
Release_Request request;
request.set_handle(handle);
+ request.set_pid(pid);
Release_Reply reply;
- grpc::ClientContext context;
+ ClientContextWithTimeout context;
grpc::Status status = m_stub->VDevice_release(&context, request, &reply);
CHECK_GRPC_STATUS(status);
assert(reply.status() < HAILO_STATUS_COUNT);
}
VStreams_create_Reply reply;
- grpc::ClientContext context;
+ ClientContextWithTimeout context;
grpc::Status status = m_stub->InputVStreams_create(&context, request, &reply);
CHECK_GRPC_STATUS_AS_EXPECTED(status);
assert(reply.status() < HAILO_STATUS_COUNT);
return input_vstreams_handles;
}
-hailo_status HailoRtRpcClient::InputVStream_release(uint32_t handle)
+hailo_status HailoRtRpcClient::InputVStream_release(uint32_t handle, uint32_t pid)
{
Release_Request request;
request.set_handle(handle);
+ request.set_pid(pid);
Release_Reply reply;
- grpc::ClientContext context;
+ ClientContextWithTimeout context;
grpc::Status status = m_stub->InputVStream_release(&context, request, &reply);
CHECK_GRPC_STATUS(status);
assert(reply.status() < HAILO_STATUS_COUNT);
}
VStreams_create_Reply reply;
- grpc::ClientContext context;
+ ClientContextWithTimeout context;
grpc::Status status = m_stub->OutputVStreams_create(&context, request, &reply);
CHECK_GRPC_STATUS_AS_EXPECTED(status);
assert(reply.status() < HAILO_STATUS_COUNT);
return output_vstreams_handles;
}
-hailo_status HailoRtRpcClient::OutputVStream_release(uint32_t handle)
+hailo_status HailoRtRpcClient::OutputVStream_release(uint32_t handle, uint32_t pid)
{
Release_Request request;
request.set_handle(handle);
+ request.set_pid(pid);
Release_Reply reply;
- grpc::ClientContext context;
+ ClientContextWithTimeout context;
grpc::Status status = m_stub->OutputVStream_release(&context, request, &reply);
CHECK_GRPC_STATUS(status);
assert(reply.status() < HAILO_STATUS_COUNT);
request.set_pid(pid);
request.set_handle(handle);
dup_handle_Reply reply;
- grpc::ClientContext context;
+ ClientContextWithTimeout context;
grpc::Status status = m_stub->InputVStream_dup_handle(&context, request, &reply);
CHECK_GRPC_STATUS_AS_EXPECTED(status);
return reply.handle();
request.set_pid(pid);
request.set_handle(handle);
dup_handle_Reply reply;
- grpc::ClientContext context;
+ ClientContextWithTimeout context;
grpc::Status status = m_stub->OutputVStream_dup_handle(&context, request, &reply);
CHECK_GRPC_STATUS_AS_EXPECTED(status);
return reply.handle();
}
VDevice_configure_Reply reply;
- grpc::ClientContext context;
+ ClientContextWithTimeout context;
grpc::Status status = m_stub->VDevice_configure(&context, request, &reply);
CHECK_GRPC_STATUS_AS_EXPECTED(status);
assert(reply.status() < HAILO_STATUS_COUNT);
request.set_handle(handle);
VDevice_get_physical_devices_ids_Reply reply;
- grpc::ClientContext context;
+ ClientContextWithTimeout context;
grpc::Status status = m_stub->VDevice_get_physical_devices_ids(&context, request, &reply);
CHECK_GRPC_STATUS_AS_EXPECTED(status);
assert(reply.status() < HAILO_STATUS_COUNT);
return result;
}
+Expected<std::vector<std::unique_ptr<Device>>> HailoRtRpcClient::VDevice_get_physical_devices(uint32_t handle)
+{
+ std::vector<std::unique_ptr<Device>> devices;
+
+ auto device_ids = VDevice_get_physical_devices_ids(handle);
+ CHECK_EXPECTED(device_ids);
+ devices.reserve(device_ids->size());
+
+ for (const auto &device_id : device_ids.value()) {
+ auto device = Device::create(device_id);
+ CHECK_EXPECTED(device);
+ devices.push_back(std::move(device.release())) ;
+ }
+
+ return devices;
+}
+
Expected<hailo_stream_interface_t> HailoRtRpcClient::VDevice_get_default_streams_interface(uint32_t handle)
{
VDevice_get_default_streams_interface_Request request;
request.set_handle(handle);
VDevice_get_default_streams_interface_Reply reply;
- grpc::ClientContext context;
+ ClientContextWithTimeout context;
grpc::Status status = m_stub->VDevice_get_default_streams_interface(&context, request, &reply);
CHECK_GRPC_STATUS_AS_EXPECTED(status);
assert(reply.status() < HAILO_STATUS_COUNT);
request.set_pid(pid);
request.set_handle(handle);
dup_handle_Reply reply;
- grpc::ClientContext context;
+ ClientContextWithTimeout context;
grpc::Status status = m_stub->ConfiguredNetworkGroup_dup_handle(&context, request, &reply);
CHECK_GRPC_STATUS_AS_EXPECTED(status);
return reply.handle();
}
-hailo_status HailoRtRpcClient::ConfiguredNetworkGroup_release(uint32_t handle)
+hailo_status HailoRtRpcClient::ConfiguredNetworkGroup_release(uint32_t handle, uint32_t pid)
{
Release_Request request;
request.set_handle(handle);
+ request.set_pid(pid);
Release_Reply reply;
- grpc::ClientContext context;
+ ClientContextWithTimeout context;
grpc::Status status = m_stub->ConfiguredNetworkGroup_release(&context, request, &reply);
CHECK_GRPC_STATUS(status);
assert(reply.status() < HAILO_STATUS_COUNT);
request.set_network_name(network_name);
ConfiguredNetworkGroup_make_input_vstream_params_Reply reply;
- grpc::ClientContext context;
+ ClientContextWithTimeout context;
grpc::Status status = m_stub->ConfiguredNetworkGroup_make_input_vstream_params(&context, request, &reply);
CHECK_GRPC_STATUS_AS_EXPECTED(status);
assert(reply.status() < HAILO_STATUS_COUNT);
request.set_queue_size(queue_size);
ConfiguredNetworkGroup_make_output_vstream_params_groups_Reply reply;
- grpc::ClientContext context;
+ ClientContextWithTimeout context;
grpc::Status status = m_stub->ConfiguredNetworkGroup_make_output_vstream_params_groups(&context, request, &reply);
CHECK_GRPC_STATUS_AS_EXPECTED(status);
assert(reply.status() < HAILO_STATUS_COUNT);
request.set_network_name(network_name);
ConfiguredNetworkGroup_make_output_vstream_params_Reply reply;
- grpc::ClientContext context;
+ ClientContextWithTimeout context;
grpc::Status status = m_stub->ConfiguredNetworkGroup_make_output_vstream_params(&context, request, &reply);
CHECK_GRPC_STATUS_AS_EXPECTED(status);
assert(reply.status() < HAILO_STATUS_COUNT);
request.set_handle(handle);
ConfiguredNetworkGroup_name_Reply reply;
- grpc::ClientContext context;
+ ClientContextWithTimeout context;
grpc::Status status = m_stub->ConfiguredNetworkGroup_name(&context, request, &reply);
CHECK_GRPC_STATUS_AS_EXPECTED(status);
assert(reply.status() < HAILO_STATUS_COUNT);
request.set_handle(handle);
ConfiguredNetworkGroup_get_network_infos_Reply reply;
- grpc::ClientContext context;
+ ClientContextWithTimeout context;
grpc::Status status = m_stub->ConfiguredNetworkGroup_get_network_infos(&context, request, &reply);
CHECK_GRPC_STATUS_AS_EXPECTED(status);
assert(reply.status() < HAILO_STATUS_COUNT);
request.set_network_name(network_name);
ConfiguredNetworkGroup_get_all_stream_infos_Reply reply;
- grpc::ClientContext context;
+ ClientContextWithTimeout context;
grpc::Status status = m_stub->ConfiguredNetworkGroup_get_all_stream_infos(&context, request, &reply);
CHECK_GRPC_STATUS_AS_EXPECTED(status);
assert(reply.status() < HAILO_STATUS_COUNT);
proto_stream_info.nms_info().chunks_per_frame(),
proto_stream_info.nms_info().is_defused(),
nms_defuse_info,
+ proto_stream_info.nms_info().burst_size(),
+ static_cast<hailo_nms_burst_type_t>(proto_stream_info.nms_info().burst_type()),
};
hailo_format_t format{
static_cast<hailo_format_type_t>(proto_stream_info.format().type()),
request.set_handle(handle);
ConfiguredNetworkGroup_get_default_stream_interface_Reply reply;
- grpc::ClientContext context;
+ ClientContextWithTimeout context;
grpc::Status status = m_stub->ConfiguredNetworkGroup_get_default_stream_interface(&context, request, &reply);
CHECK_GRPC_STATUS_AS_EXPECTED(status);
assert(reply.status() < HAILO_STATUS_COUNT);
request.set_handle(handle);
ConfiguredNetworkGroup_get_output_vstream_groups_Reply reply;
- grpc::ClientContext context;
+ ClientContextWithTimeout context;
grpc::Status status = m_stub->ConfiguredNetworkGroup_get_output_vstream_groups(&context, request, &reply);
CHECK_GRPC_STATUS_AS_EXPECTED(status);
assert(reply.status() < HAILO_STATUS_COUNT);
request.set_network_name(network_name);
ConfiguredNetworkGroup_get_vstream_infos_Reply reply;
- grpc::ClientContext context;
+ ClientContextWithTimeout context;
grpc::Status status = m_stub->ConfiguredNetworkGroup_get_input_vstream_infos(&context, request, &reply);
CHECK_GRPC_STATUS_AS_EXPECTED(status);
assert(reply.status() < HAILO_STATUS_COUNT);
request.set_network_name(network_name);
ConfiguredNetworkGroup_get_vstream_infos_Reply reply;
- grpc::ClientContext context;
+ ClientContextWithTimeout context;
grpc::Status status = m_stub->ConfiguredNetworkGroup_get_output_vstream_infos(&context, request, &reply);
CHECK_GRPC_STATUS_AS_EXPECTED(status);
assert(reply.status() < HAILO_STATUS_COUNT);
request.set_network_name(network_name);
ConfiguredNetworkGroup_get_vstream_infos_Reply reply;
- grpc::ClientContext context;
+ ClientContextWithTimeout context;
grpc::Status status = m_stub->ConfiguredNetworkGroup_get_all_vstream_infos(&context, request, &reply);
CHECK_GRPC_STATUS_AS_EXPECTED(status);
assert(reply.status() < HAILO_STATUS_COUNT);
ConfiguredNetworkGroup_is_scheduled_Request request;
ConfiguredNetworkGroup_is_scheduled_Reply reply;
request.set_handle(handle);
- grpc::ClientContext context;
+ ClientContextWithTimeout context;
grpc::Status status = m_stub->ConfiguredNetworkGroup_is_scheduled(&context, request, &reply);
CHECK_GRPC_STATUS_AS_EXPECTED(status);
assert(reply.status() < HAILO_STATUS_COUNT);
request.set_network_name(network_name);
ConfiguredNetworkGroup_set_scheduler_timeout_Reply reply;
- grpc::ClientContext context;
+ ClientContextWithTimeout context;
grpc::Status status = m_stub->ConfiguredNetworkGroup_set_scheduler_timeout(&context, request, &reply);
CHECK_GRPC_STATUS(status);
assert(reply.status() < HAILO_STATUS_COUNT);
request.set_network_name(network_name);
ConfiguredNetworkGroup_set_scheduler_threshold_Reply reply;
- grpc::ClientContext context;
+ ClientContextWithTimeout context;
grpc::Status status = m_stub->ConfiguredNetworkGroup_set_scheduler_threshold(&context, request, &reply);
CHECK_GRPC_STATUS(status);
assert(reply.status() < HAILO_STATUS_COUNT);
request.set_network_name(network_name);
ConfiguredNetworkGroup_set_scheduler_priority_Reply reply;
- grpc::ClientContext context;
+ ClientContextWithTimeout context;
grpc::Status status = m_stub->ConfiguredNetworkGroup_set_scheduler_priority(&context, request, &reply);
CHECK_GRPC_STATUS(status);
assert(reply.status() < HAILO_STATUS_COUNT);
ConfiguredNetworkGroup_get_latency_measurement_Reply reply;
request.set_handle(handle);
request.set_network_name(network_name);
- grpc::ClientContext context;
+ ClientContextWithTimeout context;
grpc::Status status = m_stub->ConfiguredNetworkGroup_get_latency_measurement(&context, request, &reply);
CHECK_GRPC_STATUS_AS_EXPECTED(status);
assert(reply.status() < HAILO_STATUS_COUNT);
+ if (HAILO_NOT_AVAILABLE == reply.status()) {
+ return make_unexpected(HAILO_NOT_AVAILABLE);
+ }
CHECK_SUCCESS_AS_EXPECTED(static_cast<hailo_status>(reply.status()));
LatencyMeasurementResult result{
std::chrono::nanoseconds(reply.avg_hw_latency())
ConfiguredNetworkGroup_is_multi_context_Request request;
ConfiguredNetworkGroup_is_multi_context_Reply reply;
request.set_handle(handle);
- grpc::ClientContext context;
+ ClientContextWithTimeout context;
grpc::Status status = m_stub->ConfiguredNetworkGroup_is_multi_context(&context, request, &reply);
CHECK_GRPC_STATUS_AS_EXPECTED(status);
assert(reply.status() < HAILO_STATUS_COUNT);
ConfiguredNetworkGroup_get_config_params_Request request;
ConfiguredNetworkGroup_get_config_params_Reply reply;
request.set_handle(handle);
- grpc::ClientContext context;
+ ClientContextWithTimeout context;
grpc::Status status = m_stub->ConfiguredNetworkGroup_get_config_params(&context, request, &reply);
CHECK_GRPC_STATUS_AS_EXPECTED(status);
assert(reply.status() < HAILO_STATUS_COUNT);
return network_configure_params;
}
+Expected<std::vector<std::string>> HailoRtRpcClient::ConfiguredNetworkGroup_get_sorted_output_names(uint32_t handle)
+{
+ ConfiguredNetworkGroup_get_sorted_output_names_Request request;
+ ConfiguredNetworkGroup_get_sorted_output_names_Reply reply;
+ request.set_handle(handle);
+ ClientContextWithTimeout context;
+ grpc::Status status = m_stub->ConfiguredNetworkGroup_get_sorted_output_names(&context, request, &reply);
+ CHECK_GRPC_STATUS_AS_EXPECTED(status);
+ assert(reply.status() < HAILO_STATUS_COUNT);
+ CHECK_SUCCESS_AS_EXPECTED(static_cast<hailo_status>(reply.status()));
+ std::vector<std::string> result;
+ for (auto &name : reply.sorted_output_names()) {
+ result.push_back(name);
+ }
+ return result;
+}
+
+Expected<std::vector<std::string>> HailoRtRpcClient::ConfiguredNetworkGroup_get_stream_names_from_vstream_name(uint32_t handle, const std::string &vstream_name)
+{
+ ConfiguredNetworkGroup_get_stream_names_from_vstream_name_Request request;
+ ConfiguredNetworkGroup_get_stream_names_from_vstream_name_Reply reply;
+ request.set_handle(handle);
+ request.set_vstream_name(vstream_name);
+ ClientContextWithTimeout context;
+ grpc::Status status = m_stub->ConfiguredNetworkGroup_get_stream_names_from_vstream_name(&context, request, &reply);
+ CHECK_GRPC_STATUS_AS_EXPECTED(status);
+ assert(reply.status() < HAILO_STATUS_COUNT);
+ CHECK_SUCCESS_AS_EXPECTED(static_cast<hailo_status>(reply.status()));
+ std::vector<std::string> result;
+ for (auto &name : reply.streams_names()) {
+ result.push_back(name);
+ }
+ return result;
+}
+
+Expected<std::vector<std::string>> HailoRtRpcClient::ConfiguredNetworkGroup_get_vstream_names_from_stream_name(uint32_t handle, const std::string &stream_name)
+{
+ ConfiguredNetworkGroup_get_vstream_names_from_stream_name_Request request;
+ ConfiguredNetworkGroup_get_vstream_names_from_stream_name_Reply reply;
+ request.set_handle(handle);
+ request.set_stream_name(stream_name);
+ ClientContextWithTimeout context;
+ grpc::Status status = m_stub->ConfiguredNetworkGroup_get_vstream_names_from_stream_name(&context, request, &reply);
+ CHECK_GRPC_STATUS_AS_EXPECTED(status);
+ assert(reply.status() < HAILO_STATUS_COUNT);
+ CHECK_SUCCESS_AS_EXPECTED(static_cast<hailo_status>(reply.status()));
+ std::vector<std::string> result;
+ for (auto &name : reply.vstreams_names()) {
+ result.push_back(name);
+ }
+ return result;
+}
+
hailo_status HailoRtRpcClient::InputVStream_write(uint32_t handle, const MemoryView &buffer)
{
InputVStream_write_Request request;
request.set_handle(handle);
request.set_data(buffer.data(), buffer.size());
- grpc::ClientContext context;
+ ClientContextWithTimeout context;
InputVStream_write_Reply reply;
grpc::Status status = m_stub->InputVStream_write(&context, request, &reply);
CHECK_GRPC_STATUS(status);
OutputVStream_read_Request request;
request.set_handle(handle);
request.set_size(static_cast<uint32_t>(buffer.size()));
- grpc::ClientContext context;
+ ClientContextWithTimeout context;
OutputVStream_read_Reply reply;
grpc::Status status = m_stub->OutputVStream_read(&context, request, &reply);
CHECK_GRPC_STATUS(status);
{
VStream_get_frame_size_Request request;
request.set_handle(handle);
- grpc::ClientContext context;
+ ClientContextWithTimeout context;
VStream_get_frame_size_Reply reply;
grpc::Status status = m_stub->InputVStream_get_frame_size(&context, request, &reply);
CHECK_GRPC_STATUS_AS_EXPECTED(status);
{
VStream_get_frame_size_Request request;
request.set_handle(handle);
- grpc::ClientContext context;
+ ClientContextWithTimeout context;
VStream_get_frame_size_Reply reply;
grpc::Status status = m_stub->OutputVStream_get_frame_size(&context, request, &reply);
CHECK_GRPC_STATUS_AS_EXPECTED(status);
{
InputVStream_flush_Request request;
request.set_handle(handle);
- grpc::ClientContext context;
+ ClientContextWithTimeout context;
InputVStream_flush_Reply reply;
grpc::Status status = m_stub->InputVStream_flush(&context, request, &reply);
CHECK_GRPC_STATUS(status);
{
VStream_name_Request request;
request.set_handle(handle);
- grpc::ClientContext context;
+ ClientContextWithTimeout context;
VStream_name_Reply reply;
grpc::Status status = m_stub->InputVStream_name(&context, request, &reply);
CHECK_GRPC_STATUS_AS_EXPECTED(status);
{
VStream_name_Request request;
request.set_handle(handle);
- grpc::ClientContext context;
+ ClientContextWithTimeout context;
VStream_name_Reply reply;
grpc::Status status = m_stub->OutputVStream_name(&context, request, &reply);
CHECK_GRPC_STATUS_AS_EXPECTED(status);
{
VStream_network_name_Request request;
request.set_handle(handle);
- grpc::ClientContext context;
+ ClientContextWithTimeout context;
VStream_network_name_Reply reply;
grpc::Status status = m_stub->InputVStream_network_name(&context, request, &reply);
CHECK_GRPC_STATUS_AS_EXPECTED(status);
{
VStream_network_name_Request request;
request.set_handle(handle);
- grpc::ClientContext context;
+ ClientContextWithTimeout context;
VStream_network_name_Reply reply;
grpc::Status status = m_stub->OutputVStream_network_name(&context, request, &reply);
CHECK_GRPC_STATUS_AS_EXPECTED(status);
{
VStream_abort_Request request;
request.set_handle(handle);
- grpc::ClientContext context;
+ ClientContextWithTimeout context;
VStream_abort_Reply reply;
grpc::Status status = m_stub->InputVStream_abort(&context, request, &reply);
CHECK_GRPC_STATUS(status);
{
VStream_abort_Request request;
request.set_handle(handle);
- grpc::ClientContext context;
+ ClientContextWithTimeout context;
VStream_abort_Reply reply;
grpc::Status status = m_stub->OutputVStream_abort(&context, request, &reply);
CHECK_GRPC_STATUS(status);
{
VStream_resume_Request request;
request.set_handle(handle);
- grpc::ClientContext context;
+ ClientContextWithTimeout context;
VStream_resume_Reply reply;
grpc::Status status = m_stub->InputVStream_resume(&context, request, &reply);
CHECK_GRPC_STATUS(status);
{
VStream_resume_Request request;
request.set_handle(handle);
- grpc::ClientContext context;
+ ClientContextWithTimeout context;
VStream_resume_Reply reply;
grpc::Status status = m_stub->OutputVStream_resume(&context, request, &reply);
CHECK_GRPC_STATUS(status);
return static_cast<hailo_status>(reply.status());
}
+hailo_status HailoRtRpcClient::InputVStream_stop_and_clear(uint32_t handle)
+{
+ VStream_stop_and_clear_Request request;
+ request.set_handle(handle);
+ ClientContextWithTimeout context;
+ VStream_stop_and_clear_Reply reply;
+ grpc::Status status = m_stub->InputVStream_stop_and_clear(&context, request, &reply);
+ CHECK_GRPC_STATUS(status);
+ assert(reply.status() < HAILO_STATUS_COUNT);
+ return static_cast<hailo_status>(reply.status());
+}
+
+hailo_status HailoRtRpcClient::OutputVStream_stop_and_clear(uint32_t handle)
+{
+ VStream_stop_and_clear_Request request;
+ request.set_handle(handle);
+ ClientContextWithTimeout context;
+ VStream_stop_and_clear_Reply reply;
+ grpc::Status status = m_stub->OutputVStream_stop_and_clear(&context, request, &reply);
+ CHECK_GRPC_STATUS(status);
+ assert(reply.status() < HAILO_STATUS_COUNT);
+ return static_cast<hailo_status>(reply.status());
+}
+
+hailo_status HailoRtRpcClient::InputVStream_start_vstream(uint32_t handle)
+{
+ VStream_start_vstream_Request request;
+ request.set_handle(handle);
+ ClientContextWithTimeout context;
+ VStream_start_vstream_Reply reply;
+ grpc::Status status = m_stub->InputVStream_start_vstream(&context, request, &reply);
+ CHECK_GRPC_STATUS(status);
+ assert(reply.status() < HAILO_STATUS_COUNT);
+ return static_cast<hailo_status>(reply.status());
+}
+
+hailo_status HailoRtRpcClient::OutputVStream_start_vstream(uint32_t handle)
+{
+ VStream_start_vstream_Request request;
+ request.set_handle(handle);
+ ClientContextWithTimeout context;
+ VStream_start_vstream_Reply reply;
+ grpc::Status status = m_stub->OutputVStream_start_vstream(&context, request, &reply);
+ CHECK_GRPC_STATUS(status);
+ assert(reply.status() < HAILO_STATUS_COUNT);
+ return static_cast<hailo_status>(reply.status());
+}
+
Expected<hailo_format_t> HailoRtRpcClient::InputVStream_get_user_buffer_format(uint32_t handle)
{
VStream_get_user_buffer_format_Request request;
request.set_handle(handle);
- grpc::ClientContext context;
+ ClientContextWithTimeout context;
VStream_get_user_buffer_format_Reply reply;
grpc::Status status = m_stub->InputVStream_get_user_buffer_format(&context, request, &reply);
CHECK_GRPC_STATUS_AS_EXPECTED(status);
{
VStream_get_user_buffer_format_Request request;
request.set_handle(handle);
- grpc::ClientContext context;
+ ClientContextWithTimeout context;
VStream_get_user_buffer_format_Reply reply;
grpc::Status status = m_stub->OutputVStream_get_user_buffer_format(&context, request, &reply);
CHECK_GRPC_STATUS_AS_EXPECTED(status);
{
VStream_get_info_Request request;
request.set_handle(handle);
- grpc::ClientContext context;
+ ClientContextWithTimeout context;
VStream_get_info_Reply reply;
grpc::Status status = m_stub->InputVStream_get_info(&context, request, &reply);
CHECK_GRPC_STATUS_AS_EXPECTED(status);
{
VStream_get_info_Request request;
request.set_handle(handle);
- grpc::ClientContext context;
+ ClientContextWithTimeout context;
VStream_get_info_Reply reply;
grpc::Status status = m_stub->OutputVStream_get_info(&context, request, &reply);
CHECK_GRPC_STATUS_AS_EXPECTED(status);
return deserialize_vstream_info(info_proto);
}
+Expected<bool> HailoRtRpcClient::InputVStream_is_aborted(uint32_t handle)
+{
+ VStream_is_aborted_Request request;
+ request.set_handle(handle);
+ ClientContextWithTimeout context;
+ VStream_is_aborted_Reply reply;
+ grpc::Status status = m_stub->InputVStream_is_aborted(&context, request, &reply);
+ CHECK_GRPC_STATUS_AS_EXPECTED(status);
+ assert(reply.status() < HAILO_STATUS_COUNT);
+ CHECK_SUCCESS_AS_EXPECTED(static_cast<hailo_status>(reply.status()));
+ auto is_aborted = reply.is_aborted();
+ return is_aborted;
+}
+
+Expected<bool> HailoRtRpcClient::OutputVStream_is_aborted(uint32_t handle)
+{
+ VStream_is_aborted_Request request;
+ request.set_handle(handle);
+ ClientContextWithTimeout context;
+ VStream_is_aborted_Reply reply;
+ grpc::Status status = m_stub->OutputVStream_is_aborted(&context, request, &reply);
+ CHECK_GRPC_STATUS_AS_EXPECTED(status);
+ assert(reply.status() < HAILO_STATUS_COUNT);
+ CHECK_SUCCESS_AS_EXPECTED(static_cast<hailo_status>(reply.status()));
+ auto is_aborted = reply.is_aborted();
+ return is_aborted;
+}
+
}
\ No newline at end of file
#include "hailo/hailort.h"
#include "hailo/expected.hpp"
+#include "hailo/device.hpp"
#if defined(_MSC_VER)
#pragma warning(push)
namespace hailort
{
+// Higher then default-hrt-timeout so we can differentiate errors
+static const std::chrono::milliseconds CONTEXT_TIMEOUT(HAILO_DEFAULT_VSTREAM_TIMEOUT_MS + 500);
+
+class ClientContextWithTimeout : public grpc::ClientContext {
+public:
+ ClientContextWithTimeout()
+ {
+ set_deadline(std::chrono::system_clock::now() + CONTEXT_TIMEOUT);
+ }
+};
+
class HailoRtRpcClient final {
public:
HailoRtRpcClient(std::shared_ptr<grpc::Channel> channel)
Expected<uint32_t> VDevice_create(const hailo_vdevice_params_t ¶ms, uint32_t pid);
Expected<uint32_t> VDevice_dup_handle(uint32_t pid, uint32_t handle);
- hailo_status VDevice_release(uint32_t handle);
+ hailo_status VDevice_release(uint32_t handle, uint32_t pid);
Expected<std::vector<std::string>> VDevice_get_physical_devices_ids(uint32_t handle);
+ Expected<std::vector<std::unique_ptr<Device>>> VDevice_get_physical_devices(uint32_t handle);
Expected<hailo_stream_interface_t> VDevice_get_default_streams_interface(uint32_t handle);
Expected<std::vector<uint32_t>> VDevice_configure(uint32_t vdevice_handle, const Hef &hef, uint32_t pid, const NetworkGroupsParamsMap &configure_params={});
Expected<uint32_t> ConfiguredNetworkGroup_dup_handle(uint32_t pid, uint32_t handle);
- hailo_status ConfiguredNetworkGroup_release(uint32_t handle);
+ hailo_status ConfiguredNetworkGroup_release(uint32_t handle, uint32_t pid);
Expected<std::map<std::string, hailo_vstream_params_t>> ConfiguredNetworkGroup_make_input_vstream_params(uint32_t handle,
bool quantized, hailo_format_type_t format_type, uint32_t timeout_ms, uint32_t queue_size,
const std::string &network_name);
Expected<LatencyMeasurementResult> ConfiguredNetworkGroup_get_latency_measurement(uint32_t handle, const std::string &network_name);
Expected<bool> ConfiguredNetworkGroup_is_multi_context(uint32_t handle);
Expected<ConfigureNetworkParams> ConfiguredNetworkGroup_get_config_params(uint32_t handle);
+ Expected<std::vector<std::string>> ConfiguredNetworkGroup_get_sorted_output_names(uint32_t handle);
+ Expected<std::vector<std::string>> ConfiguredNetworkGroup_get_stream_names_from_vstream_name(uint32_t handle, const std::string &vstream_name);
+ Expected<std::vector<std::string>> ConfiguredNetworkGroup_get_vstream_names_from_stream_name(uint32_t handle, const std::string &stream_name);
Expected<std::vector<uint32_t>> InputVStreams_create(uint32_t net_group_handle,
const std::map<std::string, hailo_vstream_params_t> &inputs_params, uint32_t pid);
Expected<uint32_t> InputVStream_dup_handle(uint32_t pid, uint32_t handle);
Expected<uint32_t> OutputVStream_dup_handle(uint32_t pid, uint32_t handle);
- hailo_status InputVStream_release(uint32_t handle);
+ hailo_status InputVStream_release(uint32_t handle, uint32_t pid);
Expected<std::vector<uint32_t>> OutputVStreams_create(uint32_t net_group_handle,
const std::map<std::string, hailo_vstream_params_t> &output_params, uint32_t pid);
- hailo_status OutputVStream_release(uint32_t handle);
+ hailo_status OutputVStream_release(uint32_t handle, uint32_t pid);
hailo_status InputVStream_write(uint32_t handle, const MemoryView &buffer);
hailo_status OutputVStream_read(uint32_t handle, MemoryView buffer);
Expected<size_t> InputVStream_get_frame_size(uint32_t handle);
hailo_status OutputVStream_abort(uint32_t handle);
hailo_status InputVStream_resume(uint32_t handle);
hailo_status OutputVStream_resume(uint32_t handle);
+ hailo_status InputVStream_stop_and_clear(uint32_t handle);
+ hailo_status OutputVStream_stop_and_clear(uint32_t handle);
+ hailo_status InputVStream_start_vstream(uint32_t handle);
+ hailo_status OutputVStream_start_vstream(uint32_t handle);
Expected<hailo_format_t> InputVStream_get_user_buffer_format(uint32_t handle);
Expected<hailo_format_t> OutputVStream_get_user_buffer_format(uint32_t handle);
Expected<hailo_vstream_info_t> InputVStream_get_info(uint32_t handle);
Expected<hailo_vstream_info_t> OutputVStream_get_info(uint32_t handle);
+ Expected<bool> InputVStream_is_aborted(uint32_t handle);
+ Expected<bool> OutputVStream_is_aborted(uint32_t handle);
+
private:
std::unique_ptr<ProtoHailoRtRpc::Stub> m_stub;
};
m_network_group_name = reply.value();
}
+ConfiguredNetworkGroupClient::ConfiguredNetworkGroupClient(uint32_t handle, const std::string &network_group_name) :
+ m_handle(handle),
+ m_network_group_name(network_group_name)
+{}
+
+Expected<std::shared_ptr<ConfiguredNetworkGroupClient>> ConfiguredNetworkGroupClient::duplicate_network_group_client(uint32_t handle,
+ const std::string &network_group_name)
+{
+ auto duplicated_net_group = std::shared_ptr<ConfiguredNetworkGroupClient>(new (std::nothrow) ConfiguredNetworkGroupClient(handle, network_group_name));
+ CHECK_ARG_NOT_NULL_AS_EXPECTED(duplicated_net_group);
+ auto status = duplicated_net_group->after_fork_in_child();
+ CHECK_SUCCESS_AS_EXPECTED(status);
+
+ return duplicated_net_group;
+}
+
ConfiguredNetworkGroupClient::~ConfiguredNetworkGroupClient()
{
- auto reply = m_client->ConfiguredNetworkGroup_release(m_handle);
+ auto reply = m_client->ConfiguredNetworkGroup_release(m_handle, OsUtils::get_curr_pid());
if (reply != HAILO_SUCCESS) {
LOGGER__CRITICAL("ConfiguredNetworkGroup_release failed with status: {}", reply);
}
{
auto status = create_client();
CHECK_SUCCESS(status);
+
auto expected_dup_handle = m_client->ConfiguredNetworkGroup_dup_handle(OsUtils::get_curr_pid(), m_handle);
CHECK_EXPECTED_AS_STATUS(expected_dup_handle);
m_handle = expected_dup_handle.value();
+
return HAILO_SUCCESS;
}
const hailo_activate_network_group_params_t &/* network_group_params */)
{
LOGGER__WARNING("ConfiguredNetworkGroup::activate function is not supported when using multi-process service or HailoRT Scheduler.");
- return make_unexpected(HAILO_NOT_IMPLEMENTED);
+ return make_unexpected(HAILO_INVALID_OPERATION);
}
/* Network group base functions */
hailo_status ConfiguredNetworkGroupClient::wait_for_activation(const std::chrono::milliseconds&)
{
LOGGER__WARNING("ConfiguredNetworkGroup::wait_for_activation function is not supported when using multi-process service or HailoRT Scheduler.");
- return HAILO_NOT_IMPLEMENTED;
+ return HAILO_INVALID_OPERATION;
}
Expected<std::vector<std::vector<std::string>>> ConfiguredNetworkGroupClient::get_output_vstream_groups()
return reply.value();
}
+Expected<HwInferResults> ConfiguredNetworkGroupClient::run_hw_infer_estimator()
+{
+ LOGGER__ERROR("ConfiguredNetworkGroupClient::run_hw_infer_estimator function is not supported when using multi-process service.");
+ return make_unexpected(HAILO_NOT_IMPLEMENTED);
+}
+
const ConfigureNetworkParams ConfiguredNetworkGroupClient::get_config_params() const
{
auto reply = m_client->ConfiguredNetworkGroup_get_config_params(m_handle);
return reply.value();
}
+Expected<std::vector<std::string>> ConfiguredNetworkGroupClient::get_sorted_output_names()
+{
+ return m_client->ConfiguredNetworkGroup_get_sorted_output_names(m_handle);
+}
+
+Expected<std::vector<std::string>> ConfiguredNetworkGroupClient::get_stream_names_from_vstream_name(const std::string &vstream_name)
+{
+ return m_client->ConfiguredNetworkGroup_get_stream_names_from_vstream_name(m_handle, vstream_name);
+}
+
+Expected<std::vector<std::string>> ConfiguredNetworkGroupClient::get_vstream_names_from_stream_name(const std::string &stream_name)
+{
+ return m_client->ConfiguredNetworkGroup_get_vstream_names_from_stream_name(m_handle, stream_name);
+}
+
Expected<std::vector<InputVStream>> ConfiguredNetworkGroupClient::create_input_vstreams(const std::map<std::string, hailo_vstream_params_t> &inputs_params)
{
auto reply = m_client->InputVStreams_create(m_handle, inputs_params, OsUtils::get_curr_pid());
* Distributed under the MIT license (https://opensource.org/licenses/MIT)
**/
/**
- * @file hailort_common.hpp
+ * @file rpc_client_utils.hpp
* @brief Utility functions for rpc client communication
**/
return instance;
}
- HailoRtRpcClientUtils()
- : m_mutex(std::make_shared<std::mutex>())
- , m_forking(false)
- {}
+ HailoRtRpcClientUtils() :
+ m_mutex(std::make_shared<std::mutex>())
+ {
+ auto status = init_keep_alive_shutdown_event();
+ if (HAILO_SUCCESS != status) {
+ LOGGER__ERROR("Failed to initialize RPC Client's keep-alive shutdown event with status {}", status);
+ }
+ }
static Expected<std::unique_ptr<HailoRtRpcClient>> create_client()
{
// Create client
auto channel = grpc::CreateChannel(hailort::HAILORT_SERVICE_DEFAULT_ADDR, grpc::InsecureChannelCredentials());
auto client = make_unique_nothrow<HailoRtRpcClient>(channel);
- CHECK(client != nullptr, HAILO_OUT_OF_HOST_MEMORY);
+ CHECK_NOT_NULL(client, HAILO_OUT_OF_HOST_MEMORY);
// Check service version
auto reply = client->get_service_version();
m_pid = OsUtils::get_curr_pid();
// Trigger client keep-alive
- m_keep_alive_thread = make_unique_nothrow<AsyncThread<hailo_status>>([this] () {
- return this->keep_alive();
- });
- CHECK(nullptr != m_keep_alive_thread, HAILO_OUT_OF_HOST_MEMORY);
+ status = start_keep_alive_thread();
+ CHECK_SUCCESS(status);
+
m_initialized = true;
}
return HAILO_SUCCESS;
}
- hailo_status before_fork()
+ void before_fork()
{
- m_forking = true;
- return m_keep_alive_thread->get();
+ stop_keep_alive_thread();
}
hailo_status after_fork_in_parent()
{
- m_forking = false;
+ m_keep_alive_shutdown_event->reset();
std::unique_lock<std::mutex> lock(*m_mutex);
if (m_initialized) {
- // Trigger client keep-alive
- m_keep_alive_thread = make_unique_nothrow<AsyncThread<hailo_status>>([this] () {
- return this->keep_alive();
- });
+ return start_keep_alive_thread();
}
return HAILO_SUCCESS;
}
hailo_status after_fork_in_child()
{
- m_forking = false;
m_mutex = std::make_shared<std::mutex>();
+ auto status = init_keep_alive_shutdown_event();
+ CHECK_SUCCESS(status);
+
std::unique_lock<std::mutex> lock(*m_mutex);
if (m_initialized) {
m_pid = OsUtils::get_curr_pid();
- // Trigger client keep-alive
- m_keep_alive_thread = make_unique_nothrow<AsyncThread<hailo_status>>([this] () {
- return this->keep_alive();
- });
+ return start_keep_alive_thread();
}
return HAILO_SUCCESS;
}
private:
~HailoRtRpcClientUtils()
{
- m_keep_alive_thread.release();
+ stop_keep_alive_thread();
+ }
+
+ void stop_keep_alive_thread()
+ {
+ if (m_keep_alive_shutdown_event) {
+ (void)m_keep_alive_shutdown_event->signal();
+ }
+
+ m_keep_alive_thread.reset();
+ }
+
+ hailo_status start_keep_alive_thread()
+ {
+ m_keep_alive_thread = make_unique_nothrow<AsyncThread<hailo_status>>("SVC_KEEPALIVE", [this] () {
+ return this->keep_alive();
+ });
+ CHECK_NOT_NULL(m_keep_alive_thread, HAILO_OUT_OF_HOST_MEMORY);
+ return HAILO_SUCCESS;
}
hailo_status keep_alive()
{
auto channel = grpc::CreateChannel(hailort::HAILORT_SERVICE_DEFAULT_ADDR, grpc::InsecureChannelCredentials());
auto client = make_unique_nothrow<HailoRtRpcClient>(channel);
- CHECK(client != nullptr, HAILO_OUT_OF_HOST_MEMORY);
- while (!m_forking) {
+ CHECK_NOT_NULL(client, HAILO_OUT_OF_HOST_MEMORY);
+
+ while (true) {
+ auto shutdown_status = m_keep_alive_shutdown_event->wait(hailort::HAILO_KEEPALIVE_INTERVAL / 2);
+ if (HAILO_TIMEOUT != shutdown_status) {
+ // shutdown event is signal (or we have another error)
+ return shutdown_status;
+ }
+
+ // keep alive interval
auto status = client->client_keep_alive(m_pid);
CHECK_SUCCESS(status);
- std::this_thread::sleep_for(hailort::HAILO_KEEPALIVE_INTERVAL / 2);
}
+ }
+
+ hailo_status init_keep_alive_shutdown_event()
+ {
+ m_keep_alive_shutdown_event = Event::create_shared(Event::State::not_signalled);
+ CHECK(nullptr != m_keep_alive_shutdown_event, HAILO_OUT_OF_HOST_MEMORY);
+
return HAILO_SUCCESS;
}
std::shared_ptr<std::mutex> m_mutex;
AsyncThreadPtr<hailo_status> m_keep_alive_thread;
bool m_initialized = false;
- std::atomic<bool> m_forking;
uint32_t m_pid;
+ EventPtr m_keep_alive_shutdown_event;
};
} /* namespace hailort */
set(SRC_FILES
${CMAKE_CURRENT_SOURCE_DIR}/stream.cpp
${CMAKE_CURRENT_SOURCE_DIR}/stream_internal.cpp
+ ${CMAKE_CURRENT_SOURCE_DIR}/nms_stream_reader.cpp
)
set(HAILORT_CPP_SOURCES ${HAILORT_CPP_SOURCES} ${SRC_FILES} PARENT_SCOPE)
--- /dev/null
+/**
+ * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+**/
+/**
+ * @file async_common.hpp
+ * @brief Common types/functions for async api
+ **/
+
+#ifndef _HAILO_ASYNC_COMMON_HPP_
+#define _HAILO_ASYNC_COMMON_HPP_
+
+#include "hailo/stream.hpp"
+
+namespace hailort
+{
+
+// Internal function, wrapper to the user callbacks, accepts the callback status as an argument.
+using InternalTransferDoneCallback = std::function<void(hailo_status)>;
+
+struct TransferRequest {
+ MemoryView buffer;
+ InternalTransferDoneCallback callback;
+
+ // Optional pre-mapped user buffer. If set, mapped_buffer must be the same as the "buffer"
+ BufferPtr mapped_buffer = nullptr;
+};
+
+} /* namespace hailort */
+
+#endif /* _HAILO_ASYNC_COMMON_HPP_ */
--- /dev/null
+/**
+ * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+ **/
+/**
+ * @file nms_stream_reader.cpp
+ * @brief static class that helps receive and read the nms ouput stream according to the different burst mode, type and size.
+ *
+ * Explanation of state machine and logic:
+ * This class supports the following 5 nms cases:
+ * 1) Hailo-8 bbox mode (non burst mode)
+ * 2) Hailo-15 bbox mode
+ * 3) Hailo-8 Burst mode
+ * 4) Hailo-15 Burst per class mode
+ * 5) Hailo15 Burst per frame mode
+ *
+ * Lets explain each mode and the state machine of each mode:
+ * 1)-2) Hailo-8 bbox mode / Hailo-15 bbox mode - both work the same - they read bbox bbox from the nms core until a delimeter comes
+ * and expect to read the amount of delimeters as the same amount of number of classes (times num chunks if more than one chunk per frame).
+ *
+ * 3) Hailo8 Burst mode - Hailo 8 burst mode reads bursts in the size of burst-size and expects each burst to be made of x bboxes and
+ * then a delimeter and padding until the end of the burst - essentially what the state machine does here is read until the first delimeter
+ * and then expect padding until end of burts (in release mode we dont check that the rest of burst is padding and
+ * just go onto the next burst but in debug we validate that rest of burst is padding). NOTE: in Hailo-8 delimeter value and
+ * padding value are both 0xFFFFFFFFFFFFFFFF so essentially we read until first delimeter - and the every following delimeter
+ * in burst is padding. This mode also supports interrupt per frame - assuming burst size received from SDK is larger than max bboxes + 1 (for delimeter)
+ * we know there will be one burst per class and hence the output size will be num classes * burst size and we enable one interrupt per frame.
+ *
+ * 4) Hailo15 Burst per class mode - Hailo-15 Burst per class mode reads bursts in the size of burst size and expects the following order.
+ * x bboxes , followed by a delimeter, followed by an image delimeter, followed by padding until the end of the burst. The bbboxes, delimeter
+ * and image delimeter can all be in different bursts - so essentially the way the state machine works is the following: we read burst burst,
+ * in each burst we iterate over the bboxes until we find a delimeter - once after that we know how many bboxes there were for that class,
+ * and then we expect to see a following image delimeter after the delimeter, once we read the image delimeter we expect padding until the end of the
+ * burst (which we ensure in debug but not in release). NOTE: if a burst ends on a delimeter we need to read the next burst to get the image delimeter
+ * even in the case where the amount of delimeters we read is equal to the amount of classes - otherwise there is data still in the core
+ * that was not emptied and will be read as part of the next frame. This mode also supports interrupt per frame - assuming burst size received from SDK
+ * is larger than max bboxes + 2 (for image delimeter and delimeter) we know there will be one burst per class and hence the output size will be
+ * num classes * burst size and we enable one interrupt per frame.
+ *
+ * 5) Hailo15 Burst per frame mode - Hailo-15 Burst per frame mode reads bursts in the size of burst size and expects the following order.
+ * x bboxes , followed by a delimeter, for all the classes until the last class where the last delimeter should be followed by an image delimeter
+ * and padding until the end of the burst. The state machine works in the following way - we read burst burst, and for each time we reach a delimeter
+ * we save the amount of bboxes that were read for that class and keep reading the burst. NOTE: this is the only mode where there can be multiple
+ * delimeters per burst. Once we read the last delimeter (which we know from number classes) - we ensure there is a following image delimeter (which again
+ * can be in the following burst) and then assume the rest of the burst is padding (and in debug we verify that). NOTE: currently this mode is not
+ * supported in the sdk.
+ *
+ **/
+
+#include "hailo/hailort.h"
+#include "hailo/expected.hpp"
+#include "stream_common/nms_stream_reader.hpp"
+#include "src/hef/layer_info.hpp"
+
+namespace hailort
+{
+
+static void finish_reading_burst_update_state(NMSBurstState *burst_state, bool *can_stop_reading_burst, size_t *burst_index)
+{
+ *burst_state = NMSBurstState::NMS_BURST_STATE_WAITING_FOR_DELIMETER;
+ *burst_index = (*burst_index + 1);
+ *can_stop_reading_burst = true;
+}
+
+// Function that implements the state machine of the 3 different nms burst modes based on the value of the current bbox and the current state.
+hailo_status NMSStreamReader::advance_state_machine(NMSBurstState *burst_state, const uint64_t current_bbox,
+ const hailo_nms_burst_type_t burst_type, const uint32_t num_classes, size_t *num_delimeters_received,
+ bool *can_stop_reading_burst, const size_t burst_offset, const size_t burst_size, size_t *burst_index)
+{
+ switch(current_bbox) {
+ // This is also case for Hailo8 padding - seeing as they are same value
+ case NMS_DELIMITER:
+ {
+ // If we are in hailo8 per class mode - if we are in state waiting for delimeter - we received delimeter
+ // otherwise we must be in state waiting for padding - in which case we received padding.
+ if (HAILO_BURST_TYPE_H8_PER_CLASS == burst_type) {
+ CHECK_IN_DEBUG((NMSBurstState::NMS_BURST_STATE_WAITING_FOR_DELIMETER == (*burst_state)) ||
+ (NMSBurstState::NMS_BURST_STATE_WAITING_FOR_PADDING == (*burst_state)), HAILO_NMS_BURST_INVALID_DATA,
+ "Invalid state, H8 NMS burst cannot receive delimeter while in state {}", (*burst_state));
+ // To differentiate from H8 padding - where we should not increment amount of delimeters found
+ if ((*burst_state) == NMSBurstState::NMS_BURST_STATE_WAITING_FOR_DELIMETER) {
+ (*num_delimeters_received)++;
+ }
+#ifdef NDEBUG
+ // In hailo8 burst mode - if is in state waiting for delimeter and got delimeter - rest will be padding and can skip
+ if ((*burst_state) == NMSBurstState::NMS_BURST_STATE_WAITING_FOR_DELIMETER) {
+ finish_reading_burst_update_state(burst_state, can_stop_reading_burst, burst_index);
+ break;
+ }
+#endif
+ // In hailo8 mode after delimeter we expect padding until end of burst - seeing as h8 padding is same value
+ // Weather was in state wait for delimeter or state wait for padding - will always go to wait for padding until end of burst
+ *burst_state = NMSBurstState::NMS_BURST_STATE_WAITING_FOR_PADDING;
+ if (burst_offset == (burst_size - sizeof(current_bbox))) {
+ finish_reading_burst_update_state(burst_state, can_stop_reading_burst, burst_index);
+ }
+ break;
+
+ } else if (HAILO_BURST_TYPE_H15_PER_CLASS == burst_type) {
+ CHECK_IN_DEBUG(NMSBurstState::NMS_BURST_STATE_WAITING_FOR_DELIMETER == (*burst_state), HAILO_NMS_BURST_INVALID_DATA,
+ "Invalid state, H15 Per class NMS burst cannot receive delimeter while in state {}", (*burst_state));
+ (*num_delimeters_received)++;
+ *burst_state = NMSBurstState::NMS_BURST_STATE_WAITING_FOR_IMAGE_DELIMETER;
+ } else {
+ CHECK_IN_DEBUG(NMSBurstState::NMS_BURST_STATE_WAITING_FOR_DELIMETER == (*burst_state), HAILO_NMS_BURST_INVALID_DATA,
+ "Invalid state, H15 Per Frame NMS burst cannot receive delimeter while in state {}", (*burst_state));
+ // in hailo15 per frame - if number of delimeter is same as num classes - we expect image delimeter next
+ // otherwise expect another delimeter
+ (*num_delimeters_received)++;
+ if (num_classes == (*num_delimeters_received)) {
+ *burst_state = NMSBurstState::NMS_BURST_STATE_WAITING_FOR_IMAGE_DELIMETER;
+ }
+ }
+ break;
+ }
+
+ case NMS_IMAGE_DELIMITER:
+ {
+ CHECK_IN_DEBUG(HAILO_BURST_TYPE_H8_PER_CLASS != burst_type, HAILO_NMS_BURST_INVALID_DATA,
+ "Invalid state, H8 NMS burst cannot receive image delimeter");
+
+ CHECK_IN_DEBUG(NMSBurstState::NMS_BURST_STATE_WAITING_FOR_IMAGE_DELIMETER == (*burst_state), HAILO_NMS_BURST_INVALID_DATA,
+ "Invalid state, H15 NMS burst cannot receive image delimeter in state {}", (*burst_state));
+
+ // in both hailo15 per class and per frame - when receiving image delimeter we move to expecting padding
+ *burst_state = NMSBurstState::NMS_BURST_STATE_WAITING_FOR_PADDING;
+
+#ifdef NDEBUG
+ finish_reading_burst_update_state(burst_state, can_stop_reading_burst, burst_index);
+#endif // NDEBUG
+ break;
+ }
+
+ case NMS_H15_PADDING:
+ {
+ if ((HAILO_BURST_TYPE_H15_PER_CLASS == burst_type) || (HAILO_BURST_TYPE_H15_PER_FRAME == burst_type)) {
+ CHECK_IN_DEBUG(NMSBurstState::NMS_BURST_STATE_WAITING_FOR_PADDING == (*burst_state), HAILO_NMS_BURST_INVALID_DATA,
+ "Invalid state, H15 NMS burst cannot receive padding in state {}", (*burst_state));
+ }
+ // In case of padding next state is wait for padding unless it is last padding of burst - then next state will be
+ // Wait for delimeter - will only get to this stage in debug - in release once image delimeter is read we ignore rest of
+ // burst seeing as it must be padding
+ if (burst_offset == (burst_size - sizeof(current_bbox))) {
+ finish_reading_burst_update_state(burst_state, can_stop_reading_burst, burst_index);
+ }
+ break;
+ }
+ }
+
+ return HAILO_SUCCESS;
+}
+
+hailo_status NMSStreamReader::read_nms_bbox_mode(OutputStream &stream, void *buffer, size_t offset)
+{
+ const uint32_t num_classes = stream.get_info().nms_info.number_of_classes;
+ const uint32_t chunks_per_frame = stream.get_info().nms_info.chunks_per_frame;
+ const size_t bbox_size = stream.get_info().nms_info.bbox_size;
+
+ for (size_t delimeters_found = 0; delimeters_found < (num_classes * chunks_per_frame); delimeters_found++) {
+ nms_bbox_counter_t class_bboxes_count = 0;
+ nms_bbox_counter_t* class_bboxes_count_ptr = (nms_bbox_counter_t*)(reinterpret_cast<uint8_t*>(buffer) + offset);
+ offset += sizeof(*class_bboxes_count_ptr);
+
+ while (true) {
+ MemoryView buffer_view(static_cast<uint8_t*>(buffer) + offset, bbox_size);
+ auto status = stream.read_impl(buffer_view);
+ if ((HAILO_STREAM_ABORTED_BY_USER == status) ||
+ ((HAILO_STREAM_NOT_ACTIVATED == status))) {
+ return status;
+ }
+ CHECK_SUCCESS(status, "Failed reading nms bbox");
+ const uint64_t current_bbox = *(uint64_t*)((uint8_t*)buffer + offset);
+
+ if (NMS_IMAGE_DELIMITER == current_bbox) {
+ continue;
+ }
+
+ if (NMS_DELIMITER == current_bbox) {
+ break;
+ }
+
+ class_bboxes_count++;
+ CHECK_IN_DEBUG(class_bboxes_count <= stream.get_info().nms_info.max_bboxes_per_class, HAILO_INTERNAL_FAILURE,
+ "Data read from the device for the current class was size {}, max size is {}", class_bboxes_count,
+ stream.get_info().nms_info.max_bboxes_per_class);
+ offset += bbox_size;
+ }
+
+ *class_bboxes_count_ptr = class_bboxes_count;
+ }
+
+ return HAILO_SUCCESS;
+}
+
+hailo_status NMSStreamReader::read_nms_burst_mode(OutputStream &stream, void *buffer, size_t offset, size_t buffer_size)
+{
+ NMSBurstState burst_state = NMSBurstState::NMS_BURST_STATE_WAITING_FOR_DELIMETER;
+ const uint32_t bbox_size = stream.get_info().nms_info.bbox_size;
+ const size_t burst_size = stream.get_layer_info().nms_info.burst_size * bbox_size;
+ const hailo_nms_burst_type_t burst_type = stream.get_layer_info().nms_info.burst_type;
+ const auto num_expected_delimeters = stream.get_info().nms_info.chunks_per_frame * stream.get_info().nms_info.number_of_classes;
+ // Transfer size if affected from if working in interrupt per burst or interrupt per frame
+ const size_t transfer_size = LayerInfoUtils::get_nms_layer_transfer_size(stream.get_layer_info());
+ const bool is_interrupt_per_frame = (transfer_size > burst_size);
+
+ CHECK(bbox_size == sizeof(uint64_t), HAILO_INTERNAL_FAILURE,
+ "Invalid Bbox size, must be 8 bytes received {}", bbox_size);
+
+ CHECK(transfer_size <= buffer_size, HAILO_INTERNAL_FAILURE, "Invalid transfer size {}, Cannot be larger than buffer {}",
+ transfer_size, buffer_size);
+
+ // Start writing bboxes at offset sizeof(nms_bbox_counter_t) - because the first sizeof(nms_bbox_counter_t) will be
+ // used to write amount of bboxes found for class 0 etc...
+ nms_bbox_counter_t class_bboxes_count = 0;
+ nms_bbox_counter_t* class_bboxes_count_ptr = (nms_bbox_counter_t*)(reinterpret_cast<uint8_t*>(buffer) + offset);
+ offset += sizeof(nms_bbox_counter_t);
+
+ // Counter of number of delimeters found in frame
+ size_t delimeters_found = 0;
+ size_t burst_index = 0;
+ uint8_t *start_index_of_burst_in_buffer = nullptr;
+ while ((delimeters_found < num_expected_delimeters) || (NMSBurstState::NMS_BURST_STATE_WAITING_FOR_IMAGE_DELIMETER == burst_state)) {
+ // In interrupt per frame we read whole frame once (in first iteration) - then don't read in following loop iterations
+ // delimeters_found will always be 0 in first iteration - and in interrupt_per_frame will always be larger in following iterations
+ if (!is_interrupt_per_frame || (0 == delimeters_found)) {
+ assert(offset + transfer_size <= buffer_size);
+ start_index_of_burst_in_buffer = static_cast<uint8_t*>(buffer) + offset;
+ MemoryView buffer_view(start_index_of_burst_in_buffer, transfer_size);
+ auto status = stream.read_impl(buffer_view);
+ if ((HAILO_STREAM_ABORTED_BY_USER == status) || ((HAILO_STREAM_NOT_ACTIVATED == status))) {
+ return status;
+ }
+ CHECK_SUCCESS(status, "Failed reading nms burst");
+ }
+
+ // Flag that marks if we can stop reading burst and continue to next burst
+ bool can_stop_reading_burst = false;
+ // Iterate through burst and copy relevant data to user buffer
+ for (size_t burst_offset = 0; burst_offset < burst_size; burst_offset += bbox_size) {
+ uint64_t current_bbox = 0;
+ if (is_interrupt_per_frame) {
+ assert((burst_index * burst_size) + burst_offset < transfer_size);
+ current_bbox = *(uint64_t*)((uint8_t*)start_index_of_burst_in_buffer + (burst_index * burst_size) + burst_offset);
+ } else {
+ current_bbox = *(uint64_t*)((uint8_t*)start_index_of_burst_in_buffer + burst_offset);
+ }
+
+ // If read delimeter - fill in information about num of bboxes found for the class (we also make sure that
+ // It is in state NMS_BURST_STATE_WAITING_FOR_DELIMETER because in hailo8 padding is same value)
+ if ((NMS_DELIMITER == current_bbox) && (NMSBurstState::NMS_BURST_STATE_WAITING_FOR_DELIMETER == burst_state)) {
+ *class_bboxes_count_ptr = class_bboxes_count;
+ class_bboxes_count_ptr = (nms_bbox_counter_t*)(reinterpret_cast<uint8_t*>(buffer) + offset);
+ class_bboxes_count = 0;
+ offset += sizeof(nms_bbox_counter_t);
+ }
+
+ // Received delimeter can stop reading burst because rest of burst is image delimeter then padding
+ if ((NMS_DELIMITER == current_bbox) || (NMS_IMAGE_DELIMITER == current_bbox) || (NMS_H15_PADDING == current_bbox)) {
+ auto status = advance_state_machine(&burst_state, current_bbox, burst_type, stream.get_info().nms_info.number_of_classes,
+ &delimeters_found, &can_stop_reading_burst, burst_offset, burst_size, &burst_index);
+ CHECK_SUCCESS(status);
+
+ if (can_stop_reading_burst) {
+ break;
+ }
+ continue;
+ }
+
+ class_bboxes_count++;
+ CHECK_IN_DEBUG(class_bboxes_count <= stream.get_info().nms_info.max_bboxes_per_class, HAILO_INTERNAL_FAILURE,
+ "Data read from the device for the current class was size {}, max size is {}", class_bboxes_count,
+ stream.get_info().nms_info.max_bboxes_per_class);
+
+ // Copy bbox to correct location in buffer
+ memcpy((static_cast<uint8_t*>(buffer) + offset), ¤t_bbox, sizeof(current_bbox));
+ offset += bbox_size;
+ }
+ }
+
+ return HAILO_SUCCESS;
+}
+
+hailo_status NMSStreamReader::read_nms(OutputStream &stream, void *buffer, size_t offset, size_t size)
+{
+ hailo_status status = HAILO_UNINITIALIZED;
+ const bool burst_mode = (HAILO_BURST_TYPE_NO_BURST != stream.get_layer_info().nms_info.burst_type);
+ if (burst_mode) {
+ status = NMSStreamReader::read_nms_burst_mode(stream, buffer, offset, size);
+ } else {
+ status = NMSStreamReader::read_nms_bbox_mode(stream, buffer, offset);
+ }
+ if ((HAILO_STREAM_ABORTED_BY_USER == status) || (HAILO_STREAM_NOT_ACTIVATED == status)) {
+ return status;
+ }
+ CHECK_SUCCESS(status, "Failed reading nms");
+
+ return HAILO_SUCCESS;
+}
+
+} /* namespace hailort */
\ No newline at end of file
--- /dev/null
+/**
+ * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+ **/
+/**
+ * @file nms_stream_reader.hpp
+ * @brief static class that helps receives and reads the nms ouput stream according to the differnet burst mode, type and size.
+ *
+ * For explanation on the different burst modes and types and state machine and logic of the class please check out the cpp.
+ *
+ **/
+
+#ifndef _NMS_STREAM_READER_HPP_
+#define _NMS_STREAM_READER_HPP_
+
+#include "hailo/stream.hpp"
+#include "common/utils.hpp"
+#include "hailo/hailort_common.hpp"
+
+namespace hailort
+{
+
+static constexpr uint32_t MAX_NMS_BURST_SIZE = 65536;
+static const uint64_t NMS_DELIMITER = 0xFFFFFFFFFFFFFFFF;
+static const uint64_t NMS_IMAGE_DELIMITER = 0xFFFFFFFFFFFFFFFE;
+static const uint64_t NMS_H15_PADDING = 0xFFFFFFFFFFFFFFFD;
+
+enum class NMSBurstState {
+ NMS_BURST_STATE_WAITING_FOR_DELIMETER = 0,
+ NMS_BURST_STATE_WAITING_FOR_IMAGE_DELIMETER = 1,
+ NMS_BURST_STATE_WAITING_FOR_PADDING = 2,
+};
+
+class NMSStreamReader {
+public:
+ static hailo_status read_nms(OutputStream &stream, void *buffer, size_t offset, size_t size);
+private:
+ static hailo_status read_nms_bbox_mode(OutputStream &stream, void *buffer, size_t offset);
+ static hailo_status read_nms_burst_mode(OutputStream &stream, void *buffer, size_t offset, size_t buffer_size);
+ static hailo_status advance_state_machine(NMSBurstState *burst_state, const uint64_t current_bbox,
+ const hailo_nms_burst_type_t burst_type, const uint32_t num_classes, size_t *num_delimeters_received,
+ bool *can_stop_reading_burst, const size_t burst_offset, const size_t burst_size, size_t *burst_index);
+};
+
+} /* namespace hailort */
+
+#endif /* _STREAM_INTERNAL_HPP_ */
\ No newline at end of file
#include "hailo/hailort_common.hpp"
#include "hailo/transform.hpp"
#include "common/utils.hpp"
+#include "stream_common/nms_stream_reader.hpp"
#include <sstream>
hailo_status InputStream::write(const MemoryView &buffer)
{
- CHECK((buffer.size() % get_info().hw_frame_size) == 0, HAILO_INVALID_ARGUMENT,
- "write size {} must be a multiple of hw size {}", buffer.size(), get_info().hw_frame_size);
+ CHECK(buffer.size() == get_frame_size(), HAILO_INVALID_ARGUMENT,
+ "write size {} must be {}", buffer.size(), get_frame_size());
CHECK(((buffer.size() % HailoRTCommon::HW_DATA_ALIGNMENT) == 0), HAILO_INVALID_ARGUMENT,
"Input must be aligned to {} (got {})", HailoRTCommon::HW_DATA_ALIGNMENT, buffer.size());
-
- return sync_write_all_raw_buffer_no_transform_impl(const_cast<uint8_t*>(buffer.data()), 0, buffer.size());
+
+ return write_impl(buffer);
}
-hailo_status InputStream::wait_for_ready(size_t /* transfer_size */, std::chrono::milliseconds /* timeout */)
+hailo_status InputStream::write(const void *buffer, size_t size)
{
- return HAILO_NOT_IMPLEMENTED;
+ return write(MemoryView::create_const(buffer, size));
}
-hailo_status InputStream::write_async(std::shared_ptr<DmaMappedBuffer> /* buffer */, const TransferDoneCallback &/* user_callback */, void */* opaque */)
+hailo_status InputStream::wait_for_async_ready(size_t /* transfer_size */, std::chrono::milliseconds /* timeout */)
{
+ LOGGER__ERROR("wait_for_async_ready not implemented for sync API");
return HAILO_NOT_IMPLEMENTED;
}
+Expected<size_t> InputStream::get_async_max_queue_size() const
+{
+ LOGGER__ERROR("get_async_max_queue_size not implemented for sync API");
+ return make_unexpected(HAILO_NOT_IMPLEMENTED);
+}
+
std::string InputStream::to_string() const
{
std::stringstream string_stream;
hailo_status OutputStream::read_nms(void *buffer, size_t offset, size_t size)
{
- uint32_t num_of_classes = get_info().nms_info.number_of_classes;
- uint32_t max_bboxes_per_class = get_info().nms_info.max_bboxes_per_class;
- uint32_t chunks_per_frame = get_info().nms_info.chunks_per_frame;
- size_t bbox_size = get_info().nms_info.bbox_size;
- size_t transfer_size = bbox_size;
-
CHECK(size == get_info().hw_frame_size, HAILO_INSUFFICIENT_BUFFER,
"On nms stream buffer size should be {} (given size {})", get_info().hw_frame_size, size);
- for (uint32_t chunk_index = 0; chunk_index < chunks_per_frame; chunk_index++) {
- for (uint32_t class_index = 0; class_index < num_of_classes; class_index++) {
- nms_bbox_counter_t class_bboxes_count = 0;
- nms_bbox_counter_t* class_bboxes_count_ptr = (nms_bbox_counter_t*)(reinterpret_cast<uint8_t*>(buffer) + offset);
- offset += sizeof(*class_bboxes_count_ptr);
-
- // Read bboxes until reaching delimiter
- for (;;) {
- MemoryView buffer_view(static_cast<uint8_t*>(buffer) + offset, transfer_size);
- auto expected_bytes_read = sync_read_raw_buffer(buffer_view);
- if ((HAILO_STREAM_ABORTED_BY_USER == expected_bytes_read.status()) ||
- ((HAILO_STREAM_NOT_ACTIVATED == expected_bytes_read.status()))) {
- return expected_bytes_read.status();
- }
- CHECK_EXPECTED_AS_STATUS(expected_bytes_read, "Failed reading nms bbox");
- transfer_size = expected_bytes_read.release();
- CHECK(transfer_size == bbox_size, HAILO_INTERNAL_FAILURE,
- "Data read from the device was size {}, should be bbox size {}", transfer_size, bbox_size);
-
- if (HailoRTCommon::NMS_DUMMY_DELIMITER == *(uint64_t*)((uint8_t*)buffer + offset)) {
- continue;
- }
-
- if (HailoRTCommon::NMS_DELIMITER == *(uint64_t*)((uint8_t*)buffer + offset)) {
- break;
- }
-
- class_bboxes_count++;
- CHECK(class_bboxes_count <= max_bboxes_per_class, HAILO_INTERNAL_FAILURE,
- "Data read from the device for the current class was size {}, max size is {}", class_bboxes_count, max_bboxes_per_class);
- offset += bbox_size;
- }
-
- *class_bboxes_count_ptr = class_bboxes_count;
- }
- }
- return HAILO_SUCCESS;
+ return NMSStreamReader::read_nms((*this), buffer, offset, size);
}
hailo_status OutputStream::read(MemoryView buffer)
{
- CHECK((buffer.size() % get_info().hw_frame_size) == 0, HAILO_INVALID_ARGUMENT,
- "Read size {} must be a multiple of hw size {}", buffer.size(), get_info().hw_frame_size);
+ CHECK(buffer.size() == get_frame_size(), HAILO_INVALID_ARGUMENT, "Read size {} must be {}", buffer.size(),
+ get_frame_size());
if (get_info().format.order == HAILO_FORMAT_ORDER_HAILO_NMS){
return read_nms(buffer.data(), 0, buffer.size());
} else {
- return this->read_all(buffer);
+ return read_impl(buffer);
}
}
-hailo_status OutputStream::wait_for_ready(size_t /* transfer_size */, std::chrono::milliseconds /* timeout */)
+hailo_status OutputStream::read(void *buffer, size_t size)
{
- return HAILO_NOT_IMPLEMENTED;
+ return read(MemoryView(buffer, size));
}
-hailo_status OutputStream::read_async(std::shared_ptr<DmaMappedBuffer> /* buffer */, const TransferDoneCallback &/* user_callback */, void */* opaque */)
+hailo_status OutputStream::wait_for_async_ready(size_t /* transfer_size */, std::chrono::milliseconds /* timeout */)
{
+ LOGGER__ERROR("wait_for_async_ready not implemented for sync API");
return HAILO_NOT_IMPLEMENTED;
}
+Expected<size_t> OutputStream::get_async_max_queue_size() const
+{
+ LOGGER__ERROR("get_async_max_queue_size not implemented for sync API");
+ return make_unexpected(HAILO_NOT_IMPLEMENTED);
+}
std::string OutputStream::to_string() const
{
m_stream_info = stream_info;
}
+hailo_status InputStreamBase::write_async(BufferPtr buffer, const TransferDoneCallback &user_callback)
+{
+ CHECK_ARG_NOT_NULL(buffer);
+ CHECK_ARG_NOT_NULL(buffer->data());
+ CHECK(buffer->size() == get_frame_size(), HAILO_INVALID_ARGUMENT, "Write size {} must be frame size {}", buffer->size(),
+ get_frame_size());
+
+ auto wrapped_callback = [buffer, user_callback](hailo_status status) {
+ user_callback(CompletionInfo{status, buffer->data(), buffer->size()});
+ };
+ return write_async(TransferRequest{MemoryView(*buffer), wrapped_callback, buffer});
+}
+
+hailo_status InputStreamBase::write_async(const MemoryView &buffer, const TransferDoneCallback &user_callback)
+{
+ CHECK_ARG_NOT_NULL(buffer.data());
+ CHECK(buffer.size() == get_frame_size(), HAILO_INVALID_ARGUMENT, "Write size {} must be frame size {}", buffer.size(),
+ get_frame_size());
+
+ auto wrapped_callback = [buffer, user_callback](hailo_status status) {
+ user_callback(CompletionInfo{status, const_cast<uint8_t*>(buffer.data()), buffer.size()});
+ };
+ return write_async(TransferRequest{buffer, wrapped_callback});
+}
+
+hailo_status InputStreamBase::write_async(const void *buffer, size_t size, const TransferDoneCallback &user_callback)
+{
+ return write_async(MemoryView::create_const(buffer, size), user_callback);
+}
+
+hailo_status InputStreamBase::write_async(TransferRequest &&)
+{
+ LOGGER__ERROR("write_async not implemented for sync API");
+ return HAILO_NOT_IMPLEMENTED;
+}
+
EventPtr &InputStreamBase::get_core_op_activated_event()
{
return m_core_op_activated_event;
m_stream_info = stream_info;
}
+hailo_status OutputStreamBase::read_async(BufferPtr buffer, const TransferDoneCallback &user_callback)
+{
+ CHECK_ARG_NOT_NULL(buffer);
+ CHECK_ARG_NOT_NULL(buffer->data());
+ CHECK(buffer->size() == get_frame_size(), HAILO_INVALID_ARGUMENT, "Read size {} must be frame size {}", buffer->size(),
+ get_frame_size());
+
+ auto wrapped_callback = [buffer, user_callback](hailo_status status) {
+ user_callback(CompletionInfo{status, const_cast<uint8_t*>(buffer->data()), buffer->size()});
+ };
+ return read_async(TransferRequest{MemoryView(*buffer), wrapped_callback, buffer});
+}
+
+hailo_status OutputStreamBase::read_async(MemoryView buffer, const TransferDoneCallback &user_callback)
+{
+ CHECK_ARG_NOT_NULL(buffer.data());
+ CHECK(buffer.size() == get_frame_size(), HAILO_INVALID_ARGUMENT, "Read size {} must be frame size {}", buffer.size(),
+ get_frame_size());
+
+ auto wrapped_callback = [buffer, user_callback](hailo_status status) {
+ user_callback(CompletionInfo{status, const_cast<uint8_t*>(buffer.data()), buffer.size()});
+ };
+ return read_async(TransferRequest{buffer, wrapped_callback});
+}
+
+hailo_status OutputStreamBase::read_async(void *buffer, size_t size, const TransferDoneCallback &user_callback)
+{
+ return read_async(MemoryView(buffer, size), user_callback);
+}
+
+hailo_status OutputStreamBase::read_async(TransferRequest &&)
+{
+ LOGGER__ERROR("read_async not implemented for sync API");
+ return HAILO_NOT_IMPLEMENTED;
+}
+
EventPtr &OutputStreamBase::get_core_op_activated_event()
{
return m_core_op_activated_event;
*
* InputStream (External "interface")
* |-- InputStreamBase (Base class)
- * |-- VdmaInputStream
+ * |-- VdmaInputStreamBase
+ * |-- VdmaInputStream
+ * |-- VdmaAsyncInputStream
* |-- EthernetInputStream
* |-- MipiInputStream
+ * |-- VDeviceInputStreamBase
+ * |-- See vdevice_stream.hpp for subclasses
*
*
* OutputStream (External "interface")
* |-- OutputStreamBase (Base class)
- * |-- VdmaOutputStream
+ * |-- VdmaOutputStreamBase
+ * |-- VdmaOutputStream
+ * |-- VdmaAsyncOutputStream
* |-- EthernetOutputStream
- *
+ * |-- VDeviceOutputStreamBase
+ * |-- See vdevice_stream.hpp for subclasses
**/
#ifndef _STREAM_INTERNAL_HPP_
#include "hailo/event.hpp"
#include "hailo/hailort_common.hpp"
+#include "stream_common/async_common.hpp"
#include "hef/hef_internal.hpp"
#include "device_common/control_protocol.hpp"
#include "hef/layer_info.hpp"
#include "vdma/channel/boundary_channel.hpp"
+using device_id_t = std::string;
+
namespace hailort
{
return m_nn_stream_config;
};
- virtual hailo_status send_pending_buffer(size_t device_index = 0)
+ virtual hailo_status send_pending_buffer(const device_id_t &device_id)
{
- (void)device_index;
+ (void)device_id;
return HAILO_INVALID_OPERATION;
}
{
return make_unexpected(HAILO_INVALID_OPERATION);
}
-
+
virtual Expected<size_t> get_pending_frames_count() const
{
return make_unexpected(HAILO_INVALID_OPERATION);
}
- virtual hailo_status register_interrupt_callback(const vdma::ProcessingCompleteCallback &/*callback*/)
- {
- return HAILO_INVALID_OPERATION;
- }
+ virtual hailo_status write_async(BufferPtr buffer, const TransferDoneCallback &user_callback) override final;
+ virtual hailo_status write_async(const MemoryView &buffer, const TransferDoneCallback &user_callback) override final;
+ virtual hailo_status write_async(const void *buffer, size_t size, const TransferDoneCallback &user_callback) override final;
+
+ virtual hailo_status write_async(TransferRequest &&transfer_request);
CONTROL_PROTOCOL__nn_stream_config_t m_nn_stream_config;
{
m_stream_info = LayerInfoUtils::get_stream_info_from_layer_info(layer_info);
- const bool hw_padding_supported = HefConfigurator::is_hw_padding_supported(layer_info);
+ auto max_periph_bytes_from_hef = HefConfigurator::max_periph_bytes_value(stream_interface);
+ if (HAILO_SUCCESS != max_periph_bytes_from_hef.status()) {
+ status = max_periph_bytes_from_hef.status();
+ return;
+ }
+ const auto max_periph_bytes = MIN(max_periph_bytes_from_hef.value(), layer_info.max_shmifo_size);
+ const bool hw_padding_supported = HefConfigurator::is_hw_padding_supported(layer_info, max_periph_bytes);
+
auto nn_stream_config = HefConfigurator::parse_nn_stream_config(layer_info,
hw_padding_supported && (HAILO_STREAM_INTERFACE_MIPI != stream_interface)); // On MIPI networks, we don't want to use hw padding nn stream config.
if(!nn_stream_config) {
{
return make_unexpected(HAILO_INVALID_OPERATION);
}
-
+
virtual Expected<size_t> get_pending_frames_count() const
{
return make_unexpected(HAILO_INVALID_OPERATION);
}
- virtual hailo_status register_interrupt_callback(const vdma::ProcessingCompleteCallback &/*callback*/)
+ virtual hailo_status set_next_device_to_read(const device_id_t &device_id)
{
+ (void)device_id;
return HAILO_INVALID_OPERATION;
}
+ virtual hailo_status read_async(BufferPtr buffer, const TransferDoneCallback &user_callback) override final;
+ virtual hailo_status read_async(MemoryView buffer, const TransferDoneCallback &user_callback) override final;
+ virtual hailo_status read_async(void *buffer, size_t size, const TransferDoneCallback &user_callback) override final;
+
+ virtual hailo_status read_async(TransferRequest &&transfer_request);
+
CONTROL_PROTOCOL__nn_stream_config_t m_nn_stream_config;
protected:
- explicit OutputStreamBase(const LayerInfo &layer_info,
+ explicit OutputStreamBase(const LayerInfo &layer_info, hailo_stream_interface_t stream_interface,
EventPtr &&core_op_activated_event, hailo_status &status) :
m_layer_info(layer_info), m_core_op_activated_event(std::move(core_op_activated_event))
{
m_stream_info = LayerInfoUtils::get_stream_info_from_layer_info(m_layer_info);
- const bool hw_padding_supported = HefConfigurator::is_hw_padding_supported(m_layer_info);
+ auto max_periph_bytes_from_hef = HefConfigurator::max_periph_bytes_value(stream_interface);
+ if (HAILO_SUCCESS != max_periph_bytes_from_hef.status()) {
+ status = max_periph_bytes_from_hef.status();
+ return;
+ }
+ const auto max_periph_bytes = MIN(max_periph_bytes_from_hef.value(), layer_info.max_shmifo_size);
+ const bool hw_padding_supported = HefConfigurator::is_hw_padding_supported(layer_info, max_periph_bytes);
+
auto nn_stream_config = HefConfigurator::parse_nn_stream_config(m_layer_info, hw_padding_supported);
if(!nn_stream_config) {
LOGGER__ERROR("Failed parse nn stream config");
switch (format.order)
{
case HAILO_FORMAT_ORDER_NHWC:
+ case HAILO_FORMAT_ORDER_RGB4:
case HAILO_FORMAT_ORDER_NHW:
case HAILO_FORMAT_ORDER_BAYER_RGB:
case HAILO_FORMAT_ORDER_12_BIT_BAYER_RGB:
void transform__d2h_NMS(const uint8_t *src_ptr, uint8_t *dst_ptr, const hailo_nms_info_t &nms_info, std::vector<size_t> &chunk_offsets)
{
/* Validate arguments */
- ASSERT(NULL != src_ptr);
- ASSERT(NULL != dst_ptr);
+ assert(NULL != src_ptr);
+ assert(NULL != dst_ptr);
uint32_t num_of_classes = nms_info.number_of_classes;
uint32_t bbox_size = nms_info.bbox_size;
// Add bbox from all chunks of current class
src_offset = chunk_offsets[chunk_index];
class_bboxes_count = *((nms_bbox_counter_t*)((uint8_t*)src_ptr + src_offset));
+ assert(class_bboxes_count <= nms_info.max_bboxes_per_class);
*dst_bbox_counter = static_cast<nms_bbox_counter_t>(*dst_bbox_counter + class_bboxes_count);
src_offset += sizeof(nms_bbox_counter_t);
CHECK(dst_image_shape.features == 1, HAILO_INVALID_OPERATION,
"NHCW_to_NHW argmax Transform is supported only when dst features ({}) is 1",
dst_image_shape.features);
- CHECK(src_image_shape.features < std::numeric_limits<T>::max(), HAILO_INVALID_OPERATION,
- "NHCW_to_NHW argmax Transform is supported only when src features ({}) is smaller than {}",
+ CHECK(src_image_shape.features <= std::numeric_limits<T>::max(), HAILO_INVALID_OPERATION,
+ "NHCW_to_NHW argmax Transform is supported only when src features ({}) is equal/smaller than {}",
src_image_shape.features, std::numeric_limits<T>::max());
const auto src_row_size = src_image_shape.width * src_image_shape.features;
const auto src_row_size = HailoRTCommon::align_to(row_size, RGB4_ALIGNMENT);
const auto dst_row_size = dst_image_shape.width * dst_image_shape.features;
- const auto pad_size = (dst_image_shape.width - src_image_shape.width) * dst_image_shape.features;
+ const auto pad_size = dst_image_shape.width - src_image_shape.width;
uint32_t src_offset = 0;
uint32_t dst_offset = 0;
dst_offset = r * dst_row_size + f * dst_image_shape.width + c;
dst_ptr[dst_offset] = src_ptr[src_offset];
}
- /* pad feature to 8 elemnts */
+ /* pad feature to 8 elements */
if (pad_size != 0) {
dst_offset = r * dst_row_size + f * dst_image_shape.width + src_image_shape.width;
std::fill_n(dst_ptr + dst_offset, pad_size, static_cast<T>(0));
switch (m_dst_format.type) {
case HAILO_FORMAT_TYPE_UINT8:
if (HAILO_FORMAT_TYPE_UINT8 == m_src_format.type) {
- Quantization::dequantize_output_buffer_in_place<uint8_t, uint8_t>((uint8_t*)dst_ptr, shape_size, m_dst_quant_info);
+ if (m_are_all_qps_the_same) {
+ Quantization::dequantize_output_buffer_in_place<uint8_t, uint8_t>((uint8_t*)dst_ptr, shape_size, m_dst_quant_info);
+ } else {
+ dequantize_output_by_feature<uint8_t, uint8_t>((uint8_t*)dst_ptr, shape_size, m_quant_info_per_feature, m_quant_infos_rep_count);
+ }
}
else {
return HAILO_INVALID_OPERATION;
break;
case HAILO_FORMAT_TYPE_UINT16:
if (HAILO_FORMAT_TYPE_UINT8 == m_src_format.type) {
- Quantization::dequantize_output_buffer_in_place<uint16_t, uint8_t>((uint16_t*)dst_ptr, shape_size, m_dst_quant_info);
+ if (m_are_all_qps_the_same) {
+ Quantization::dequantize_output_buffer_in_place<uint16_t, uint8_t>((uint16_t*)dst_ptr, shape_size, m_dst_quant_info);
+ } else {
+ dequantize_output_by_feature<uint16_t, uint8_t>((uint16_t*)dst_ptr, shape_size, m_quant_info_per_feature, m_quant_infos_rep_count);
+ }
}
else if (HAILO_FORMAT_TYPE_UINT16 == m_src_format.type) {
- Quantization::dequantize_output_buffer_in_place<uint16_t, uint16_t>((uint16_t*)dst_ptr, shape_size, m_dst_quant_info);
+ if (m_are_all_qps_the_same) {
+ Quantization::dequantize_output_buffer_in_place<uint16_t, uint16_t>((uint16_t*)dst_ptr, shape_size, m_dst_quant_info);
+ } else {
+ dequantize_output_by_feature<uint16_t, uint16_t>((uint16_t*)dst_ptr, shape_size, m_quant_info_per_feature, m_quant_infos_rep_count);
+ }
}
else {
return HAILO_INVALID_OPERATION;
/* if output layer is argmax - do not rescale */
if (HAILO_FORMAT_ORDER_NHW != m_dst_format.order) {
if (HAILO_FORMAT_TYPE_UINT8 == m_src_format.type) {
- Quantization::dequantize_output_buffer_in_place<float32_t, uint8_t>((float32_t*)dst_ptr, shape_size, m_dst_quant_info);
+ if (m_are_all_qps_the_same) {
+ Quantization::dequantize_output_buffer_in_place<float32_t, uint8_t>((float32_t*)dst_ptr, shape_size, m_dst_quant_info);
+ } else {
+ dequantize_output_by_feature<float32_t, uint8_t>((float32_t*)dst_ptr, shape_size, m_quant_info_per_feature, m_quant_infos_rep_count);
+ }
}
else if (HAILO_FORMAT_TYPE_UINT16 == m_src_format.type) {
- Quantization::dequantize_output_buffer_in_place<float32_t, uint16_t>((float32_t*)dst_ptr, shape_size, m_dst_quant_info);
+ if (m_are_all_qps_the_same) {
+ Quantization::dequantize_output_buffer_in_place<float32_t, uint16_t>((float32_t*)dst_ptr, shape_size, m_dst_quant_info);
+ } else {
+ dequantize_output_by_feature<float32_t, uint16_t>((float32_t*)dst_ptr, shape_size, m_quant_info_per_feature, m_quant_infos_rep_count);
+ }
}
else {
return HAILO_INVALID_OPERATION;
OutputTransformContext(src_frame_size, src_format, dst_frame_size, dst_format, dst_quant_info, should_quantize,
should_transpose, should_reorder), m_src_image_shape(src_image_shape), m_dst_image_shape(dst_image_shape),
m_transpose_buffer(std::move(transpose_buffer))
-{}
+{
+ std::vector<hailo_quant_info_t> dst_quant_infos = { dst_quant_info }; // TODO: Get vector from HEF
+ bool are_all_qps_the_same = true;
+ if (dst_quant_infos.size() > 1) {
+ for (const auto &quant_info : dst_quant_infos) {
+ if (0 != memcmp(&quant_info, &dst_quant_infos[0], sizeof(quant_info))) {
+ are_all_qps_the_same = false;
+ break;
+ }
+ }
+ }
+ m_are_all_qps_the_same = are_all_qps_the_same;
+
+ switch (dst_format.order) {
+ case HAILO_FORMAT_ORDER_NHW:
+ case HAILO_FORMAT_ORDER_BAYER_RGB:
+ case HAILO_FORMAT_ORDER_12_BIT_BAYER_RGB:
+ case HAILO_FORMAT_ORDER_NCHW:
+ for (const auto &quant_info : dst_quant_infos) {
+ m_quant_info_per_feature.emplace_back(quant_info.qp_zp, quant_info.qp_scale);
+ }
+ m_quant_infos_rep_count = static_cast<uint32_t>(dst_frame_size);
+ break;
+ case HAILO_FORMAT_ORDER_NHWC:
+ case HAILO_FORMAT_ORDER_FCR:
+ case HAILO_FORMAT_ORDER_F8CR:
+ case HAILO_FORMAT_ORDER_NC:
+ case HAILO_FORMAT_ORDER_RGB4:
+ for (const auto &quant_info : dst_quant_infos) {
+ m_quant_info_per_feature.emplace_back(quant_info.qp_zp, quant_info.qp_scale);
+ }
+ m_quant_infos_rep_count = 1;
+ break;
+ case HAILO_FORMAT_ORDER_NHCW:
+ for (const auto &quant_info : dst_quant_infos) {
+ m_quant_info_per_feature.emplace_back(quant_info.qp_zp, quant_info.qp_scale);
+ }
+ m_quant_infos_rep_count = dst_image_shape.width;
+ break;
+ default:
+ LOGGER__CRITICAL("Got unknown format order = {}", dst_format.order);
+ break;
+ }
+}
Expected<std::unique_ptr<OutputTransformContext>> FrameOutputTransformContext::create(const hailo_3d_image_shape_t &src_image_shape,
const hailo_format_t &src_format, const hailo_3d_image_shape_t &dst_image_shape,
#include "hailo/buffer.hpp"
#include "hailo/hef.hpp"
#include "hailo/transform.hpp"
+#include "hailo/quantization.hpp"
#include "stream_common/stream_internal.hpp"
#include "hef/layer_info.hpp"
std::vector<hailo_mux_info_t> m_mux_infos;
};
+struct QuantInfoForDequantize
+{
+ float32_t m_qp_zp;
+ float32_t m_qp_scale;
+ QuantInfoForDequantize(float32_t qp_zp, float32_t qp_scale) : m_qp_zp(qp_zp), m_qp_scale(qp_scale)
+ {}
+};
+
class HAILORTAPI FrameOutputTransformContext final : public OutputTransformContext
{
public:
virtual std::string description() const override;
private:
+ template <typename T, typename Q>
+ static inline void dequantize_output_by_feature(T *dst_ptr, uint32_t buffer_elements_count,
+ const std::vector<QuantInfoForDequantize> &quant_infos, uint32_t repetition_count)
+ {
+ uint32_t elements_dequantized = 0;
+ while (elements_dequantized < buffer_elements_count) {
+ for (int32_t i = static_cast<int32_t>(quant_infos.size()) - 1; i >= 0; i--) {
+ Quantization::dequantize_output_buffer_in_place<T, Q>(dst_ptr, buffer_elements_count - repetition_count - elements_dequantized,
+ repetition_count, quant_infos[i].m_qp_zp, quant_infos[i].m_qp_scale);
+ elements_dequantized += repetition_count;
+ }
+ }
+ }
+
const hailo_3d_image_shape_t m_src_image_shape;
const hailo_3d_image_shape_t m_dst_image_shape;
Buffer m_transpose_buffer;
+ bool m_are_all_qps_the_same;
+ std::vector<QuantInfoForDequantize> m_quant_info_per_feature;
+ uint32_t m_quant_infos_rep_count;
};
class HAILORTAPI NMSOutputTransformContext final : public OutputTransformContext
${CMAKE_CURRENT_SOURCE_DIR}/hailort_common.cpp
${CMAKE_CURRENT_SOURCE_DIR}/hailort_logger.cpp
+ ${CMAKE_CURRENT_SOURCE_DIR}/buffer_storage.cpp
${CMAKE_CURRENT_SOURCE_DIR}/buffer.cpp
${CMAKE_CURRENT_SOURCE_DIR}/sensor_config_utils.cpp
)
{
assert(nullptr != buffer);
+ stream << "[addr = " << static_cast<const void *>(buffer) << ", size = " << size << "]" << std::endl;
+
static const bool UPPERCASE = true;
static const size_t BYTES_PER_LINE = 32;
static const char *BYTE_DELIM = " ";
stream << fmt::format("0x{:08X}", offset) << BYTE_DELIM; // 32 bit offset into a buffer should be enough
stream << StringUtils::to_hex_string(buffer + offset, line_size, UPPERCASE, BYTE_DELIM) << std::endl;
}
- stream << "[size = " << std::dec << size << "]";
}
Buffer::Buffer() :
+ m_storage(),
m_data(nullptr),
m_size(0)
{}
+Buffer::Buffer(BufferStoragePtr storage) :
+ m_storage(storage),
+ m_data(static_cast<uint8_t *>(m_storage->user_address())),
+ m_size(m_storage->size())
+{}
+
Buffer::Buffer(Buffer&& other) :
- m_data(std::move(other.m_data)),
+ m_storage(std::move(other.m_storage)),
+ m_data(std::exchange(other.m_data, nullptr)),
m_size(std::exchange(other.m_size, 0))
{}
-Expected<Buffer> Buffer::create(size_t size)
+Expected<Buffer> Buffer::create(size_t size, const BufferStorageParams ¶ms)
{
- std::unique_ptr<uint8_t[]> data(new (std::nothrow) uint8_t[size]);
- if (data == nullptr) {
- LOGGER__ERROR("Failed allocating {} bytes", size);
- return make_unexpected(HAILO_OUT_OF_HOST_MEMORY);
- }
+ auto storage = BufferStorage::create(size, params);
+ CHECK_EXPECTED(storage);
- return Buffer(std::move(data), size);
+ return Buffer(storage.release());
}
-Expected<Buffer> Buffer::create(size_t size, uint8_t default_value)
+Expected<Buffer> Buffer::create(size_t size, uint8_t default_value, const BufferStorageParams ¶ms)
{
- auto buffer = create(size);
+ auto buffer = create(size, params);
CHECK_EXPECTED(buffer);
- std::memset(static_cast<void*>(buffer->m_data.get()), default_value, size);
+ std::memset(static_cast<void*>(buffer->m_data), default_value, size);
return buffer;
}
-Expected<BufferPtr> Buffer::create_shared(size_t size)
+Expected<BufferPtr> Buffer::create_shared(size_t size, const BufferStorageParams ¶ms)
{
- auto buffer = Buffer::create(size);
+ auto buffer = Buffer::create(size, params);
CHECK_EXPECTED(buffer);
auto buffer_ptr = make_shared_nothrow<Buffer>(buffer.release());
CHECK_NOT_NULL_AS_EXPECTED(buffer_ptr, HAILO_OUT_OF_HOST_MEMORY);
return buffer_ptr;
}
-Expected<BufferPtr> Buffer::create_shared(size_t size, uint8_t default_value)
+Expected<BufferPtr> Buffer::create_shared(size_t size, uint8_t default_value, const BufferStorageParams ¶ms)
{
- auto buffer = Buffer::create(size, default_value);
+ auto buffer = Buffer::create(size, default_value, params);
CHECK_EXPECTED(buffer);
auto buffer_ptr = make_shared_nothrow<Buffer>(buffer.release());
CHECK_NOT_NULL_AS_EXPECTED(buffer_ptr, HAILO_OUT_OF_HOST_MEMORY);
return buffer_ptr;
}
-Expected<Buffer> Buffer::create(const uint8_t *src, size_t size)
+Expected<BufferPtr> Buffer::create_shared(const uint8_t *src, size_t size, const BufferStorageParams ¶ms)
{
- auto buffer = create(size);
+ auto buffer = Buffer::create(src, size, params);
CHECK_EXPECTED(buffer);
- std::memcpy(static_cast<void*>(buffer->m_data.get()), static_cast<const void*>(src), size);
+ auto buffer_ptr = make_shared_nothrow<Buffer>(buffer.release());
+ CHECK_NOT_NULL_AS_EXPECTED(buffer_ptr, HAILO_OUT_OF_HOST_MEMORY);
+ return buffer_ptr;
+}
+
+Expected<Buffer> Buffer::create(const uint8_t *src, size_t size, const BufferStorageParams ¶ms)
+{
+ auto buffer = create(size, params);
+ CHECK_EXPECTED(buffer);
+ std::memcpy(static_cast<void*>(buffer->m_data), static_cast<const void*>(src), size);
return buffer;
}
-Expected<Buffer> Buffer::create(std::initializer_list<uint8_t> init)
+Expected<Buffer> Buffer::create(std::initializer_list<uint8_t> init, const BufferStorageParams ¶ms)
{
- auto buffer = create(init.size());
+ auto buffer = create(init.size(), params);
CHECK_EXPECTED(buffer);
size_t index = 0;
for (const auto& n : init) {
Expected<Buffer> Buffer::copy() const
{
- return Buffer::create(m_data.get(), m_size);
+ return Buffer::create(m_data, m_size);
}
Buffer& Buffer::operator=(Buffer&& other)
{
- m_data = std::move(other.m_data);
+ m_storage = std::move(other.m_storage);
+ m_data = std::exchange(other.m_data, nullptr);
m_size = std::exchange(other.m_size, 0);
return *this;
}
if (m_size != rhs.m_size) {
return false;
}
- return (0 == std::memcmp(data(), rhs.data(), m_size));
+ return (0 == std::memcmp(m_data, rhs.m_data, m_size));
}
bool Buffer::operator!=(const Buffer& rhs) const
if (m_size != rhs.m_size) {
return true;
}
- return (0 != std::memcmp(data(), rhs.data(), m_size));
+ return (0 != std::memcmp(m_data, rhs.m_data, m_size));
}
uint8_t& Buffer::operator[](size_t pos)
return iterator(data() + m_size);
}
+BufferStorage &Buffer::storage()
+{
+ return *m_storage;
+}
+
uint8_t* Buffer::data() noexcept
{
- return m_data.get();
+ return m_data;
}
const uint8_t* Buffer::data() const noexcept
{
- return m_data.get();
+ return m_data;
}
size_t Buffer::size() const noexcept
return m_size;
}
-uint8_t* Buffer::release() noexcept
-{
- m_size = 0;
- return m_data.release();
-}
-
std::string Buffer::to_string() const
{
for (size_t i = 0; i < m_size; i++) {
if (m_data[i] == 0) {
// We'll return a string that ends at the first null in the buffer
- return std::string(reinterpret_cast<const char*>(m_data.get()));
+ return std::string(reinterpret_cast<const char*>(m_data));
}
}
- return std::string(reinterpret_cast<const char*>(m_data.get()), m_size);
+ return std::string(reinterpret_cast<const char*>(m_data), m_size);
}
// Note: This is a friend function
return as_type<uint64_t>();
}
-Buffer::Buffer(std::unique_ptr<uint8_t[]> data, size_t size) :
- m_data(std::move(data)),
- m_size(size)
- {}
-
MemoryView::MemoryView() :
m_data(nullptr),
m_size(0)
--- /dev/null
+/**\r
+ * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved.\r
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)\r
+**/\r
+/**\r
+ * @file buffer_storage.cpp\r
+ * @brief TODO: fill me (HRT-10026)\r
+ **/\r
+\r
+#include "hailo/buffer_storage.hpp"\r
+#include "hailo/hailort.h"\r
+#include "hailo/vdevice.hpp"\r
+#include "vdma/vdma_device.hpp"\r
+#include "vdma/memory/dma_able_buffer.hpp"\r
+#include "vdma/memory/mapped_buffer.hpp"\r
+#include "common/utils.hpp"\r
+\r
+namespace hailort\r
+{\r
+\r
+// Checking ABI of hailo_dma_buffer_direction_t vs HailoRTDriver::DmaDirection\r
+static_assert(HAILO_DMA_BUFFER_DIRECTION_H2D == (int)HailoRTDriver::DmaDirection::H2D,\r
+ "hailo_dma_buffer_direction_t must match HailoRTDriver::DmaDirection");\r
+static_assert(HAILO_DMA_BUFFER_DIRECTION_D2H == (int)HailoRTDriver::DmaDirection::D2H,\r
+ "hailo_dma_buffer_direction_t must match HailoRTDriver::DmaDirection");\r
+static_assert(HAILO_DMA_BUFFER_DIRECTION_BOTH == (int)HailoRTDriver::DmaDirection::BOTH,\r
+ "hailo_dma_buffer_direction_t must match HailoRTDriver::DmaDirection");\r
+\r
+BufferStorageParams::HeapParams::HeapParams()\r
+{}\r
+\r
+Expected<BufferStorageParams::DmaMappingParams> BufferStorageParams::DmaMappingParams::create(\r
+ const hailo_buffer_dma_mapping_params_t ¶ms)\r
+{\r
+ CHECK_AS_EXPECTED((params.device == nullptr) || (params.vdevice == nullptr), HAILO_INVALID_ARGUMENT,\r
+ "Can't set both device and vdevice fields");\r
+ return DmaMappingParams(params);\r
+}\r
+\r
+BufferStorageParams::DmaMappingParams::DmaMappingParams(const hailo_buffer_dma_mapping_params_t ¶ms) :\r
+ device(reinterpret_cast<Device*>(params.device)),\r
+ vdevice(reinterpret_cast<VDevice*>(params.vdevice)),\r
+ data_direction(params.direction)\r
+{}\r
+\r
+BufferStorageParams::DmaMappingParams::DmaMappingParams(Device &device, hailo_dma_buffer_direction_t data_direction) :\r
+ device(&device),\r
+ vdevice(nullptr),\r
+ data_direction(data_direction)\r
+{}\r
+\r
+BufferStorageParams::DmaMappingParams::DmaMappingParams(VDevice &vdevice, hailo_dma_buffer_direction_t data_direction) :\r
+ device(nullptr),\r
+ vdevice(&vdevice),\r
+ data_direction(data_direction)\r
+{}\r
+\r
+BufferStorageParams::DmaMappingParams::DmaMappingParams() :\r
+ device(nullptr),\r
+ vdevice(nullptr),\r
+ data_direction(HAILO_DMA_BUFFER_DIRECTION_MAX_ENUM)\r
+{}\r
+\r
+Expected<BufferStorageParams> BufferStorageParams::create(const hailo_buffer_parameters_t ¶ms)\r
+{\r
+ BufferStorageParams result{};\r
+ result.flags = params.flags;\r
+\r
+ if (params.flags == HAILO_BUFFER_FLAGS_NONE) {\r
+ result.heap_params = HeapParams();\r
+ } else if ((params.flags & HAILO_BUFFER_FLAGS_DMA) != 0) {\r
+ auto dma_mapping_params = DmaMappingParams::create(params.dma_mapping_params);\r
+ CHECK_EXPECTED(dma_mapping_params);\r
+ result.dma_mapping_params = dma_mapping_params.release();\r
+ } else {\r
+ // TODO: HRT-10903\r
+ LOGGER__ERROR("Buffer storage flags not currently supported {}", params.flags);\r
+ return make_unexpected(HAILO_NOT_IMPLEMENTED);\r
+ }\r
+\r
+ return result;\r
+}\r
+\r
+BufferStorageParams BufferStorageParams::create_dma()\r
+{\r
+ BufferStorageParams result{};\r
+ result.flags = HAILO_BUFFER_FLAGS_DMA;\r
+ result.dma_mapping_params = DmaMappingParams();\r
+ return result;\r
+}\r
+\r
+BufferStorageParams BufferStorageParams::create_dma(Device &device, hailo_dma_buffer_direction_t data_direction)\r
+{\r
+ BufferStorageParams result{};\r
+ result.flags = HAILO_BUFFER_FLAGS_DMA;\r
+ result.dma_mapping_params = DmaMappingParams(device, data_direction);\r
+ return result;\r
+}\r
+\r
+BufferStorageParams BufferStorageParams::create_dma(VDevice &vdevice, hailo_dma_buffer_direction_t data_direction)\r
+{\r
+ BufferStorageParams result{};\r
+ result.flags = HAILO_BUFFER_FLAGS_DMA;\r
+ result.dma_mapping_params = DmaMappingParams(vdevice, data_direction);\r
+ return result;\r
+}\r
+\r
+BufferStorageParams::BufferStorageParams() :\r
+ flags(HAILO_BUFFER_FLAGS_NONE),\r
+ heap_params()\r
+{}\r
+\r
+Expected<BufferStoragePtr> BufferStorage::create(size_t size, const BufferStorageParams ¶ms)\r
+{\r
+ if (params.flags == HAILO_BUFFER_FLAGS_NONE) {\r
+ auto result = HeapStorage::create(size);\r
+ CHECK_EXPECTED(result);\r
+ return std::static_pointer_cast<BufferStorage>(result.release());\r
+ } else if (0 != (params.flags & HAILO_BUFFER_FLAGS_DMA)) {\r
+ // TODO: check other flags here (HRT-10903)\r
+ auto &dma_mapping_params = params.dma_mapping_params;\r
+\r
+ DmaStoragePtr storage = nullptr;\r
+ if ((dma_mapping_params.device != nullptr) && (dma_mapping_params.vdevice != nullptr)) {\r
+ LOGGER__ERROR("Can't map a buffer to both vdevice and device");\r
+ return make_unexpected(HAILO_INVALID_ARGUMENT);\r
+ } else if (dma_mapping_params.device != nullptr) {\r
+ auto result = DmaStorage::create(size, dma_mapping_params.data_direction,\r
+ *dma_mapping_params.device);\r
+ CHECK_EXPECTED(result);\r
+ storage = result.release();\r
+ } else if (dma_mapping_params.vdevice != nullptr) {\r
+ auto result = DmaStorage::create(size, dma_mapping_params.data_direction,\r
+ *dma_mapping_params.vdevice);\r
+ CHECK_EXPECTED(result);\r
+ storage = result.release();\r
+ } else {\r
+ auto result = DmaStorage::create(size);\r
+ CHECK_EXPECTED(result);\r
+ storage = result.release();\r
+ }\r
+ return std::static_pointer_cast<BufferStorage>(storage);\r
+ }\r
+\r
+ // TODO: HRT-10903\r
+ LOGGER__ERROR("Buffer storage flags not currently supported {}", params.flags);\r
+ return make_unexpected(HAILO_NOT_IMPLEMENTED);\r
+}\r
+\r
+BufferStorage::BufferStorage(Type type) :\r
+ m_type(type)\r
+{}\r
+\r
+BufferStorage::Type BufferStorage::type() const\r
+{\r
+ return m_type;\r
+}\r
+\r
+Expected<HeapStoragePtr> HeapStorage::create(size_t size)\r
+{\r
+ std::unique_ptr<uint8_t[]> data(new (std::nothrow) uint8_t[size]);\r
+ CHECK_NOT_NULL_AS_EXPECTED(data, HAILO_OUT_OF_HOST_MEMORY);\r
+\r
+ auto result = make_shared_nothrow<HeapStorage>(std::move(data), size);\r
+ CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY);\r
+\r
+ return result;\r
+}\r
+\r
+HeapStorage::HeapStorage(std::unique_ptr<uint8_t[]> data, size_t size) :\r
+ BufferStorage(Type::HEAP),\r
+ m_data(std::move(data)),\r
+ m_size(size)\r
+{}\r
+\r
+HeapStorage::HeapStorage(HeapStorage&& other) noexcept :\r
+ BufferStorage(std::move(other)),\r
+ m_data(std::move(other.m_data)),\r
+ m_size(std::exchange(other.m_size, 0))\r
+{}\r
+\r
+size_t HeapStorage::size() const\r
+{\r
+ return m_size;\r
+}\r
+\r
+void *HeapStorage::user_address()\r
+{\r
+ return m_data.get();\r
+}\r
+\r
+Expected<void *> HeapStorage::release() noexcept\r
+{\r
+ m_size = 0;\r
+ return m_data.release();\r
+}\r
+\r
+Expected<bool> HeapStorage::dma_map(Device &, hailo_dma_buffer_direction_t)\r
+{\r
+ LOGGER__ERROR("Heap allocated buffers can't be mapped to DMA");\r
+ return make_unexpected(HAILO_INVALID_OPERATION);\r
+}\r
+\r
+Expected<bool> HeapStorage::dma_map(HailoRTDriver &, hailo_dma_buffer_direction_t)\r
+{\r
+ LOGGER__ERROR("Heap allocated buffers can't be mapped to DMA");\r
+ return make_unexpected(HAILO_INVALID_OPERATION);\r
+}\r
+\r
+Expected<vdma::MappedBufferPtr> HeapStorage::get_dma_mapped_buffer(const std::string &)\r
+{\r
+ LOGGER__ERROR("Mapped buffer is not supported for Heap allocated buffers");\r
+ return make_unexpected(HAILO_INVALID_OPERATION);\r
+}\r
+\r
+Expected<DmaStoragePtr> DmaStorage::create(size_t size)\r
+{\r
+ static const auto ALLOCATE_BUFFER = nullptr;\r
+ return create(ALLOCATE_BUFFER, size);\r
+}\r
+\r
+Expected<DmaStoragePtr> DmaStorage::create(size_t size,\r
+ hailo_dma_buffer_direction_t data_direction, Device &device)\r
+{\r
+ static const auto ALLOCATE_BUFFER = nullptr;\r
+ return create(ALLOCATE_BUFFER, size, data_direction,\r
+ std::vector<std::reference_wrapper<Device>>{std::ref(device)});\r
+}\r
+\r
+Expected<DmaStoragePtr> DmaStorage::create(size_t size,\r
+ hailo_dma_buffer_direction_t data_direction, VDevice &vdevice)\r
+{\r
+ static const auto ALLOCATE_BUFFER = nullptr;\r
+ auto physical_devices = vdevice.get_physical_devices();\r
+ CHECK_EXPECTED(physical_devices);\r
+ return create(ALLOCATE_BUFFER, size, data_direction, physical_devices.release());\r
+}\r
+\r
+Expected<DmaStoragePtr> DmaStorage::create_from_user_address(void *user_address, size_t size)\r
+{\r
+ return create(user_address, size);\r
+}\r
+\r
+Expected<DmaStoragePtr> DmaStorage::create_from_user_address(void *user_address, size_t size,\r
+ hailo_dma_buffer_direction_t data_direction, Device &device)\r
+{\r
+ CHECK_ARG_NOT_NULL_AS_EXPECTED(user_address);\r
+ return create(user_address, size, data_direction,\r
+ std::vector<std::reference_wrapper<Device>>{std::ref(device)});\r
+}\r
+\r
+Expected<DmaStoragePtr> DmaStorage::create_from_user_address(void *user_address, size_t size,\r
+ hailo_dma_buffer_direction_t data_direction, VDevice &vdevice)\r
+{\r
+ CHECK_ARG_NOT_NULL_AS_EXPECTED(user_address);\r
+ auto physical_devices = vdevice.get_physical_devices();\r
+ CHECK_EXPECTED(physical_devices);\r
+ return create(user_address, size, data_direction, physical_devices.release());\r
+}\r
+\r
+Expected<DmaStoragePtr> DmaStorage::create(void *user_address, size_t size,\r
+ hailo_dma_buffer_direction_t data_direction,\r
+ std::vector<std::reference_wrapper<Device>> &&physical_devices)\r
+{\r
+ // TODO: HRT-10283 support sharing low memory buffers for DART and similar systems.\r
+ auto dma_able_buffer = vdma::DmaAbleBuffer::create(size, user_address);\r
+ CHECK_EXPECTED(dma_able_buffer);\r
+\r
+ auto result = make_shared_nothrow<DmaStorage>(dma_able_buffer.release());\r
+ CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY);\r
+\r
+ for (auto &device : physical_devices) {\r
+ auto is_new_mapping = result->dma_map(device, data_direction);\r
+ CHECK_EXPECTED(is_new_mapping);\r
+ CHECK_AS_EXPECTED(is_new_mapping.value(), HAILO_INTERNAL_FAILURE);\r
+ }\r
+\r
+ return result;\r
+}\r
+\r
+DmaStorage::DmaStorage(vdma::DmaAbleBufferPtr &&dma_able_buffer) :\r
+ BufferStorage(Type::DMA),\r
+ m_dma_able_buffer(std::move(dma_able_buffer)),\r
+ m_mappings()\r
+{}\r
+\r
+size_t DmaStorage::size() const\r
+{\r
+ return m_dma_able_buffer->size();\r
+}\r
+\r
+void *DmaStorage::user_address()\r
+{\r
+ return m_dma_able_buffer->user_address();\r
+}\r
+\r
+Expected<void *> DmaStorage::release() noexcept\r
+{\r
+ return make_unexpected(HAILO_NOT_IMPLEMENTED);\r
+}\r
+\r
+Expected<bool> DmaStorage::dma_map(Device &device, hailo_dma_buffer_direction_t data_direction)\r
+{\r
+ const auto device_type = device.get_type();\r
+ CHECK_AS_EXPECTED(((Device::Type::INTEGRATED == device_type) || (Device::Type::PCIE == device_type)),\r
+ HAILO_INVALID_ARGUMENT, "Invalid device type (expected integrated/pcie, received {})", device_type);\r
+ VdmaDevice *vdma_device = reinterpret_cast<VdmaDevice*>(&device);\r
+\r
+ return dma_map(vdma_device->get_driver(), data_direction);\r
+}\r
+\r
+Expected<bool> DmaStorage::dma_map(HailoRTDriver &driver, hailo_dma_buffer_direction_t data_direction)\r
+{\r
+ CHECK_AS_EXPECTED(data_direction <= HAILO_DMA_BUFFER_DIRECTION_BOTH, HAILO_INVALID_ARGUMENT,\r
+ "Invalid data direction {}", data_direction);\r
+\r
+ const auto &device_id = driver.device_id();\r
+ auto find_result = m_mappings.find(device_id);\r
+ if (find_result != m_mappings.end()) {\r
+ // The buffer has been mapped => don't map it again\r
+ return Expected<bool>(false); // not a new mapping\r
+ }\r
+\r
+ // The buffer hasn't been mapped => map it now\r
+ auto mapped_buffer = vdma::MappedBuffer::create_shared(driver, m_dma_able_buffer,\r
+ static_cast<HailoRTDriver::DmaDirection>(data_direction));\r
+ CHECK_EXPECTED(mapped_buffer);\r
+\r
+ m_mappings.emplace(device_id, mapped_buffer.value());\r
+ return Expected<bool>(true); // new mapping\r
+}\r
+\r
+Expected<vdma::MappedBufferPtr> DmaStorage::get_dma_mapped_buffer(const std::string &device_id)\r
+{\r
+ auto mapped_buffer = m_mappings.find(device_id);\r
+ if (mapped_buffer == m_mappings.end()) {\r
+ // Don't print error message here\r
+ LOGGER__INFO("Mapped buffer for {} not found", device_id);\r
+ return make_unexpected(HAILO_NOT_FOUND);\r
+ }\r
+\r
+ return Expected<vdma::MappedBufferPtr>(mapped_buffer->second);\r
+}\r
+\r
+} /* namespace hailort */\r
+++ /dev/null
-/**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
- * Distributed under the MIT license (https://opensource.org/licenses/MIT)
- **/
-/**
- * @file event.hpp
- * @brief Event and Semaphore wrapper objects used for multithreading
- **/
-
-#ifndef _EVENT_INTERNAL_HPP_
-#define _EVENT_INTERNAL_HPP_
-
-#include "hailo/hailort.h"
-#include "hailo/expected.hpp"
-
-#include <memory>
-#include <vector>
-#include <array>
-#include <chrono>
-#if defined(__GNUC__)
-#include <poll.h>
-#endif
-
-namespace hailort
-{
-
-// TODO: Replace with a static wait_multiple func belonging to Waitable (SDK-16567).
-// Will get a vector of pointers as an argument. Can also use variadic
-// template args for cases with fixed number Waitables
-class WaitOrShutdown final
-{
-public:
- WaitOrShutdown(WaitablePtr waitable, EventPtr shutdown_event);
- ~WaitOrShutdown() = default;
-
- WaitOrShutdown(const WaitOrShutdown &other) = delete;
- WaitOrShutdown &operator=(const WaitOrShutdown &other) = delete;
- WaitOrShutdown(WaitOrShutdown &&other) noexcept = default;
- WaitOrShutdown &operator=(WaitOrShutdown &&other) = delete;
-
- // Waits on waitable or shutdown_event to be signaled:
- // * If shutdown_event is signaled:
- // - shutdown_event is not reset
- // - HAILO_SHUTDOWN_EVENT_SIGNALED is returned
- // * If waitable is signaled:
- // - waitable is reset if waitable->is_auto_reset()
- // - HAILO_SUCCESS is returned
- // * If both waitable and shutdown_event are signaled:
- // - shutdown_event is not reset
- // - waitable is not reset
- // - HAILO_SHUTDOWN_EVENT_SIGNALED is returned
- // * If neither are signaled, then HAILO_TIMEOUT is returned
- // * On any failure an appropriate status shall be returned
- hailo_status wait(std::chrono::milliseconds timeout);
- hailo_status signal();
-
-private:
- // Note: We want to guarantee that if the shutdown event is signaled, HAILO_SHUTDOWN_EVENT_SIGNALED will be
- // returned.
- // * In Unix, using poll this isn't a problem since we'll get all the readable fds in a single call.
- // * In Windows, using WaitForMultipleObjects, this works differently (from msdn):
- // If bWaitAll is FALSE, the return value minus WAIT_OBJECT_0 indicates the lpHandles array index
- // of the object that satisfied the wait. If more than one object became signaled during the call,
- // this is the array index of the signaled object with the smallest index value of all the signaled
- // objects.
- // (https://docs.microsoft.com/en-us/windows/win32/api/synchapi/nf-synchapi-waitformultipleobjects)
- // * Hence, SHUTDOWN_INDEX must come before WAITABLE_INDEX!
- static const size_t SHUTDOWN_INDEX = 0;
- static const size_t WAITABLE_INDEX = 1;
- #if defined(_MSC_VER) || defined(__QNX__)
- using WaitHandleArray = std::array<underlying_waitable_handle_t, 2>;
- #else
- using WaitHandleArray = std::array<struct pollfd, 2>;
- #endif
-
- const WaitablePtr m_waitable;
- const EventPtr m_shutdown_event;
- WaitHandleArray m_wait_handle_array;
-
- static WaitHandleArray create_wait_handle_array(WaitablePtr waitable, EventPtr shutdown_event);
-};
-
-} /* namespace hailort */
-
-#endif /* _EVENT_INTERNAL_HPP_ */
--- /dev/null
+/**\r
+ * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved.\r
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)\r
+**/\r
+/**\r
+ * @file exported_resource_manager.hpp\r
+ * @brief Holds resources that are exported via c-api\r
+ **/\r
+\r
+#ifndef _HAILO_EXPORTED_RESOURCE_MANAGER_HPP_\r
+#define _HAILO_EXPORTED_RESOURCE_MANAGER_HPP_\r
+\r
+#include "hailo/hailort.h"\r
+\r
+#include <unordered_map>\r
+\r
+namespace hailort\r
+{\r
+\r
+// TODO: Merge ExportedResourceManager and SharedResourceManager (HRT-10317)\r
+template<typename Resource, typename Key, typename Hash = std::hash<Key>>\r
+class ExportedResourceManager final\r
+{\r
+public:\r
+ static hailo_status register_resource(const Resource &resource, const Key &key)\r
+ {\r
+ return get_instance().register_resource_impl(resource, key);\r
+ }\r
+\r
+ static Expected<std::reference_wrapper<Resource>> get_resource(const Key &key)\r
+ {\r
+ return get_instance().get_resource_impl(key);\r
+ }\r
+\r
+ static hailo_status unregister_resource(const Key &key)\r
+ {\r
+ return get_instance().unregister_resource_impl(key);\r
+ }\r
+\r
+private:\r
+ static ExportedResourceManager& get_instance()\r
+ {\r
+ static ExportedResourceManager instance;\r
+ return instance;\r
+ }\r
+\r
+ hailo_status register_resource_impl(const Resource &resource, const Key &key)\r
+ {\r
+ std::lock_guard<std::mutex> lock_guard(m_mutex);\r
+\r
+ auto it = m_storage.find(key);\r
+ if (it != m_storage.end()) {\r
+ LOGGER__TRACE("There's already a resource registered under key {}", key);\r
+ return HAILO_INVALID_ARGUMENT;\r
+ }\r
+\r
+ m_storage[key] = resource;\r
+ return HAILO_SUCCESS;\r
+ }\r
+\r
+ Expected<std::reference_wrapper<Resource>> get_resource_impl(const Key &key)\r
+ {\r
+ std::lock_guard<std::mutex> lock_guard(m_mutex);\r
+\r
+ auto it = m_storage.find(key);\r
+ if (it == m_storage.end()) {\r
+ LOGGER__TRACE("Key {} not found in resource manager", key);\r
+ return make_unexpected(HAILO_NOT_FOUND);\r
+ }\r
+\r
+ return std::ref(it->second);\r
+ }\r
+\r
+ hailo_status unregister_resource_impl(const Key &key)\r
+ {\r
+ std::lock_guard<std::mutex> lock_guard(m_mutex);\r
+\r
+ auto it = m_storage.find(key);\r
+ if (it == m_storage.end()) {\r
+ LOGGER__TRACE("Key {} not found in resource manager", key);\r
+ return HAILO_NOT_FOUND;\r
+ }\r
+\r
+ m_storage.erase(it);\r
+ return HAILO_SUCCESS;\r
+ }\r
+\r
+ std::mutex m_mutex;\r
+ std::unordered_map<Key, Resource, Hash> m_storage;\r
+};\r
+\r
+} /* namespace hailort */\r
+\r
+#endif /* _HAILO_EXPORTED_RESOURCE_MANAGER_HPP_ */\r
const uint32_t HailoRTCommon::BBOX_PARAMS;
const uint32_t HailoRTCommon::MAX_DEFUSED_LAYER_COUNT;
const size_t HailoRTCommon::HW_DATA_ALIGNMENT;
-const uint64_t HailoRTCommon::NMS_DELIMITER;
-const uint64_t HailoRTCommon::NMS_DUMMY_DELIMITER;
Expected<hailo_device_id_t> HailoRTCommon::to_device_id(const std::string &device_id)
{
#define HAILORT_ANDROID_LOGGER_PATTERN ("%v") // Android logger will print only message (additional info are built-in)
#define HAILORT_LOGGER_PATH_ENV_VAR ("HAILORT_LOGGER_PATH")
+#define PERIODIC_LOGGER_FLUSH_TIME_IN_SECONDS (5)
#ifdef _WIN32
#define PATH_SEPARATOR "\\"
return make_shared_nothrow<spdlog::sinks::null_sink_st>();
}
+ auto is_dir = Filesystem::is_directory(dir_path);
+ if (!is_dir) {
+ std::cerr << "HailoRT warning: Cannot create log file " << filename << "! Path " << dir_path << " is not valid." << std::endl;
+ return make_shared_nothrow<spdlog::sinks::null_sink_st>();
+ }
+ if (!is_dir.value()) {
+ std::cerr << "HailoRT warning: Cannot create log file " << filename << "! Path " << dir_path << " is not a directory." << std::endl;
+ return make_shared_nothrow<spdlog::sinks::null_sink_st>();
+ }
+
if (!Filesystem::is_path_accesible(dir_path)) {
- std::cerr << "HailoRT warning: Cannot create log file " << filename
- << "! Please check the directory " << dir_path << " write permissions." << std::endl;
- // Create null sink instead (Will throw away its log)
+ std::cerr << "HailoRT warning: Cannot create log file " << filename << "! Please check the directory " << dir_path << " write permissions." << std::endl;
return make_shared_nothrow<spdlog::sinks::null_sink_st>();
}
const auto file_path = dir_path + PATH_SEPARATOR + filename;
if (Filesystem::does_file_exists(file_path) && !Filesystem::is_path_accesible(file_path)) {
- std::cerr << "HailoRT warning: Cannot create log file " << filename
- << "! Please check the file " << file_path << " write permissions." << std::endl;
- // Create null sink instead (Will throw away its log)
+ std::cerr << "HailoRT warning: Cannot create log file " << filename << "! Please check the file " << file_path << " write permissions." << std::endl;
return make_shared_nothrow<spdlog::sinks::null_sink_st>();
}
return make_shared_nothrow<spdlog::sinks::basic_file_sink_mt>(file_path);
}
-HailoRTLogger::HailoRTLogger() :
+HailoRTLogger::HailoRTLogger(spdlog::level::level_enum console_level, spdlog::level::level_enum file_level, spdlog::level::level_enum flush_level) :
m_console_sink(make_shared_nothrow<spdlog::sinks::stderr_color_sink_mt>()),
#ifdef __ANDROID__
m_main_log_file_sink(make_shared_nothrow<spdlog::sinks::android_sink_mt>(HAILORT_NAME))
m_local_log_file_sink(create_file_sink(get_log_path(HAILORT_LOGGER_PATH_ENV_VAR), HAILORT_LOGGER_FILENAME, true))
#endif
{
+ if ((nullptr == m_console_sink) || (nullptr == m_main_log_file_sink) || (nullptr == m_local_log_file_sink)) {
+ std::cerr << "Allocating memory on heap for logger sinks has failed! Please check if this host has enough memory. Writing to log will result in a SEGFAULT!" << std::endl;
+ return;
+ }
#ifdef __ANDROID__
m_main_log_file_sink->set_pattern(HAILORT_ANDROID_LOGGER_PATTERN);
m_local_log_file_sink->set_pattern(HAILORT_LOCAL_FILE_LOGGER_PATTERN);
#endif
- // TODO: Handle null pointers for logger and sinks
m_console_sink->set_pattern(HAILORT_CONSOLE_LOGGER_PATTERN);
spdlog::sinks_init_list sink_list = { m_console_sink, m_main_log_file_sink, m_local_log_file_sink };
m_hailort_logger = make_shared_nothrow<spdlog::logger>(HAILORT_NAME, sink_list.begin(), sink_list.end());
+ if (nullptr == m_hailort_logger) {
+ std::cerr << "Allocating memory on heap for HailoRT logger has failed! Please check if this host has enough memory. Writing to log will result in a SEGFAULT!" << std::endl;
+ return;
+ }
-#ifdef NDEBUG
- set_levels(spdlog::level::warn, spdlog::level::info, spdlog::level::warn);
-#else
- set_levels(spdlog::level::warn, spdlog::level::debug, spdlog::level::debug);
-#endif
+ set_levels(console_level, file_level, flush_level);
spdlog::set_default_logger(m_hailort_logger);
}
-std::shared_ptr<spdlog::logger> HailoRTLogger::logger()
-{
- return m_hailort_logger;
-}
-
-void HailoRTLogger::set_levels(spdlog::level::level_enum console_level,
- spdlog::level::level_enum file_level, spdlog::level::level_enum flush_level)
+void HailoRTLogger::set_levels(spdlog::level::level_enum console_level, spdlog::level::level_enum file_level,
+ spdlog::level::level_enum flush_level)
{
m_console_sink->set_level(console_level);
m_main_log_file_sink->set_level(file_level);
m_local_log_file_sink->set_level(file_level);
m_hailort_logger->flush_on(flush_level);
+ spdlog::flush_every(std::chrono::seconds(PERIODIC_LOGGER_FLUSH_TIME_IN_SECONDS));
}
#include "hailo/hailort.h"
#include "common/logger_macros.hpp"
+#include "common/utils.hpp"
namespace hailort
{
class HailoRTLogger {
public:
- static HailoRTLogger& get_instance()
+#ifdef NDEBUG
+ static std::unique_ptr<HailoRTLogger> &get_instance(spdlog::level::level_enum console_level = spdlog::level::warn,
+ spdlog::level::level_enum file_level = spdlog::level::info, spdlog::level::level_enum flush_level = spdlog::level::warn)
+#else
+ static std::unique_ptr<HailoRTLogger> &get_instance(spdlog::level::level_enum console_level = spdlog::level::warn,
+ spdlog::level::level_enum file_level = spdlog::level::debug, spdlog::level::level_enum flush_level = spdlog::level::debug)
+#endif
{
- static HailoRTLogger instance;
+ static std::unique_ptr<HailoRTLogger> instance = nullptr;
+ if (nullptr == instance) {
+ instance = make_unique_nothrow<HailoRTLogger>(console_level, file_level, flush_level);
+ }
return instance;
}
+
+ HailoRTLogger(spdlog::level::level_enum console_level, spdlog::level::level_enum file_level, spdlog::level::level_enum flush_level);
+ ~HailoRTLogger() = default;
HailoRTLogger(HailoRTLogger const&) = delete;
void operator=(HailoRTLogger const&) = delete;
- std::shared_ptr<spdlog::logger> logger();
- void set_levels(spdlog::level::level_enum console_level, spdlog::level::level_enum file_level,
- spdlog::level::level_enum flush_level);
static std::string get_log_path(const std::string &path_env_var);
static std::string get_main_log_path();
static std::shared_ptr<spdlog::sinks::sink> create_file_sink(const std::string &dir_path, const std::string &filename, bool rotate);
private:
- HailoRTLogger();
static std::string parse_log_path(const char *log_path);
+ void set_levels(spdlog::level::level_enum console_level, spdlog::level::level_enum file_level, spdlog::level::level_enum flush_level);
std::shared_ptr<spdlog::sinks::sink> m_console_sink;
set(SRC_FILES
${CMAKE_CURRENT_SOURCE_DIR}/tracer.cpp
+ ${CMAKE_CURRENT_SOURCE_DIR}/scheduler_profiler_handler.cpp
+ ${CMAKE_CURRENT_SOURCE_DIR}/monitor_handler.cpp
)
set(HAILORT_CPP_SOURCES ${HAILORT_CPP_SOURCES} ${SRC_FILES} PARENT_SCOPE)
--- /dev/null
+/**
+ * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+ **/
+/**
+ * @file handler.hpp
+ * @brief Handlers base class for HailoRT tracer mechanism
+ **/
+
+#ifndef _HAILO_HANDLER_HPP_
+#define _HAILO_HANDLER_HPP_
+
+#include "hailo/hailort.h"
+#include "hailo/stream.hpp"
+
+#include "vdevice/scheduler/scheduler_base.hpp"
+
+namespace hailort
+{
+
+struct Trace
+{
+ Trace(const std::string &name)
+ : name(name)
+ {}
+
+ virtual ~Trace() = default;
+
+ uint64_t timestamp = 0;
+ std::string name;
+};
+
+struct InitTrace : Trace
+{
+ InitTrace() : Trace("init") {}
+};
+
+struct CoreOpIdleTrace : Trace
+{
+ CoreOpIdleTrace(const device_id_t &device_id, scheduler_core_op_handle_t core_op_handle)
+ : Trace("core_op_idle"), device_id(device_id), core_op_handle(core_op_handle)
+ {}
+
+ device_id_t device_id;
+ scheduler_core_op_handle_t core_op_handle;
+};
+
+struct AddDeviceTrace : Trace
+{
+ AddDeviceTrace(const device_id_t &device_id, const std::string &device_arch)
+ : Trace("add_device_trace"), device_id(device_id), device_arch(device_arch)
+ {}
+
+ device_id_t device_id;
+ std::string device_arch;
+};
+
+struct SchedulerStartTrace : Trace
+{
+ SchedulerStartTrace(uint32_t device_count)
+ : Trace("scheduler_start"), device_count(device_count)
+ {}
+
+ uint32_t device_count = 0;
+};
+
+struct AddCoreOpTrace : Trace
+{
+ AddCoreOpTrace(const device_id_t &device_id, const std::string &core_op_name, uint64_t timeout, uint32_t threshold, scheduler_core_op_handle_t handle,
+ bool is_nms)
+ : Trace("add_core_op"), device_id(device_id), core_op_name(core_op_name), timeout(timeout), threshold(threshold), core_op_handle(handle), is_nms(is_nms)
+ {}
+
+ device_id_t device_id;
+ std::string core_op_name;
+ uint64_t timeout = 0;
+ uint32_t threshold = 0;
+ scheduler_core_op_handle_t core_op_handle = INVALID_CORE_OP_HANDLE;
+ bool is_nms;
+};
+
+struct CreateCoreOpInputStreamsTrace : Trace
+{
+ CreateCoreOpInputStreamsTrace(const device_id_t &device_id, const std::string &core_op_name, const std::string &stream_name, uint32_t queue_size)
+ : Trace("create_input_stream"), device_id(device_id), core_op_name(core_op_name), stream_name(stream_name), queue_size(queue_size)
+ {}
+
+ device_id_t device_id;
+ std::string core_op_name;
+ std::string stream_name;
+ uint32_t queue_size;
+};
+
+struct CreateCoreOpOutputStreamsTrace : Trace
+{
+ CreateCoreOpOutputStreamsTrace(const device_id_t &device_id, const std::string &core_op_name, const std::string &stream_name, uint32_t queue_size)
+ : Trace("create_output_stream"), device_id(device_id), core_op_name(core_op_name), stream_name(stream_name), queue_size(queue_size)
+ {}
+
+ device_id_t device_id;
+ std::string core_op_name;
+ std::string stream_name;
+ uint32_t queue_size;
+};
+
+struct WriteFrameTrace : Trace
+{
+ WriteFrameTrace(const device_id_t &device_id, scheduler_core_op_handle_t core_op_handle, const std::string &queue_name)
+ : Trace("write_frame"), device_id(device_id), core_op_handle(core_op_handle), queue_name(queue_name)
+ {}
+
+ device_id_t device_id;
+ scheduler_core_op_handle_t core_op_handle;
+ std::string queue_name;
+};
+
+struct InputVdmaDequeueTrace : Trace
+{
+ InputVdmaDequeueTrace(const device_id_t &device_id, scheduler_core_op_handle_t core_op_handle, const std::string &queue_name)
+ : Trace("input_vdma_dequeue"), device_id(device_id), core_op_handle(core_op_handle), queue_name(queue_name)
+ {}
+
+ device_id_t device_id;
+ scheduler_core_op_handle_t core_op_handle;
+ std::string queue_name;
+};
+
+struct ReadFrameTrace : Trace
+{
+ ReadFrameTrace(const device_id_t &device_id, scheduler_core_op_handle_t core_op_handle, const std::string &queue_name)
+ : Trace("read_frame"), device_id(device_id), core_op_handle(core_op_handle), queue_name(queue_name)
+ {}
+
+ std::string device_id;
+ scheduler_core_op_handle_t core_op_handle;
+ std::string queue_name;
+};
+
+struct OutputVdmaEnqueueTrace : Trace
+{
+ OutputVdmaEnqueueTrace(const device_id_t &device_id, scheduler_core_op_handle_t core_op_handle, const std::string &queue_name, uint32_t frames)
+ : Trace("output_vdma_enqueue"), device_id(device_id), core_op_handle(core_op_handle), queue_name(queue_name), frames(frames)
+ {}
+
+ device_id_t device_id;
+ scheduler_core_op_handle_t core_op_handle;
+ std::string queue_name;
+ uint32_t frames = 0;
+};
+
+struct ChooseCoreOpTrace : Trace
+{
+ ChooseCoreOpTrace(const device_id_t &device_id, scheduler_core_op_handle_t handle, bool threshold, bool timeout, core_op_priority_t priority)
+ : Trace("choose_core_op"), device_id(device_id), core_op_handle(handle), threshold(threshold), timeout(timeout), priority(priority)
+ {}
+
+ device_id_t device_id;
+ scheduler_core_op_handle_t core_op_handle;
+ bool threshold = false;
+ bool timeout = false;
+ core_op_priority_t priority;
+};
+
+struct SwitchCoreOpTrace : Trace
+{
+ SwitchCoreOpTrace(const device_id_t &device_id, scheduler_core_op_handle_t handle)
+ : Trace("switch_core_op"), device_id(device_id), core_op_handle(handle)
+ {}
+
+ device_id_t device_id;
+ scheduler_core_op_handle_t core_op_handle;
+};
+
+class Handler
+{
+public:
+ virtual ~Handler() = default;
+
+ virtual void handle_trace(const InitTrace&) {};
+ virtual void handle_trace(const AddCoreOpTrace&) {};
+ virtual void handle_trace(const CreateCoreOpInputStreamsTrace&) {};
+ virtual void handle_trace(const CreateCoreOpOutputStreamsTrace&) {};
+ virtual void handle_trace(const WriteFrameTrace&) {};
+ virtual void handle_trace(const InputVdmaDequeueTrace&) {};
+ virtual void handle_trace(const ReadFrameTrace&) {};
+ virtual void handle_trace(const OutputVdmaEnqueueTrace&) {};
+ virtual void handle_trace(const ChooseCoreOpTrace&) {};
+ virtual void handle_trace(const SwitchCoreOpTrace&) {};
+ virtual void handle_trace(const SchedulerStartTrace&) {};
+ virtual void handle_trace(const CoreOpIdleTrace&) {};
+ virtual void handle_trace(const AddDeviceTrace&) {};
+
+};
+
+struct JSON;
+
+}
+
+#endif /* _HAILO_HANDLER_HPP */
\ No newline at end of file
--- /dev/null
+/**
+ * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+ **/
+/**
+ * @file monitor_handler.cpp
+ * @brief Implementation of the scheduler monitor handlers base with HailoRT tracer mechanism
+ **/
+
+#include "monitor_handler.hpp"
+
+#include "common/logger_macros.hpp"
+#include "common/os_utils.hpp"
+
+namespace hailort
+{
+MonitorHandler::MonitorHandler()
+{}
+
+MonitorHandler::~MonitorHandler()
+{
+ clear_monitor();
+}
+
+void MonitorHandler::clear_monitor() {
+
+ if (m_is_monitor_currently_working) {
+ m_is_monitor_currently_working = false;
+ m_mon_shutdown_event->signal();
+ if (m_mon_thread.joinable()) {
+ m_mon_thread.join();
+ }
+ }
+ m_devices_info.clear();
+ m_core_ops_info.clear();
+}
+
+void MonitorHandler::handle_trace(const SchedulerStartTrace &trace)
+{
+ m_device_count = trace.device_count;
+ start_mon();
+}
+
+void MonitorHandler::handle_trace(const CoreOpIdleTrace &trace)
+{
+ update_utilization_read_buffers_finished(trace.device_id, trace.core_op_handle, true);
+}
+
+void MonitorHandler::handle_trace(const AddCoreOpTrace &trace)
+{
+ m_core_ops_info[trace.core_op_handle].utilization = 0;
+ m_core_ops_info[trace.core_op_handle].core_op_name = trace.core_op_name;
+ m_core_ops_info[trace.core_op_handle].is_nms = trace.is_nms;
+}
+
+void MonitorHandler::handle_trace(const AddDeviceTrace &trace)
+{
+ DeviceInfo device_info(trace.device_id, trace.device_arch);
+ m_devices_info.emplace(trace.device_id, device_info);
+}
+
+void MonitorHandler::handle_trace(const SwitchCoreOpTrace &trace)
+{
+ assert(contains(m_devices_info, trace.device_id));
+ m_devices_info.at(trace.device_id).current_core_op_handle = trace.core_op_handle;
+}
+
+void MonitorHandler::handle_trace(const CreateCoreOpInputStreamsTrace &trace)
+{
+ // TODO- HRT-10371 'if' should be removed, this is temporary solution since this trace is called out of the scheduler.
+ if (!m_is_monitor_currently_working) { return; }
+ auto core_op_handle = get_core_op_handle_by_name(trace.core_op_name);
+ assert(contains(m_core_ops_info, core_op_handle));
+ m_core_ops_info[core_op_handle].input_streams_info[trace.stream_name] = StreamsInfo{trace.queue_size, 0};
+}
+
+void MonitorHandler::handle_trace(const CreateCoreOpOutputStreamsTrace &trace)
+{
+ // TODO- HRT-10371 'if' should be removed, this is temporary solution since this trace is called out of the scheduler.
+ if (!m_is_monitor_currently_working) { return; }
+ auto core_op_handle = get_core_op_handle_by_name(trace.core_op_name);
+ assert(contains(m_core_ops_info, core_op_handle));
+ m_core_ops_info[core_op_handle].output_streams_info[trace.stream_name] = StreamsInfo{trace.queue_size, 0};
+}
+
+void MonitorHandler::handle_trace(const WriteFrameTrace &trace)
+{
+ assert(contains(m_core_ops_info, trace.core_op_handle));
+ assert(contains(m_core_ops_info[trace.core_op_handle].input_streams_info, trace.queue_name));
+ m_core_ops_info[trace.core_op_handle].input_streams_info[trace.queue_name].pending_frames_count++;
+}
+
+void MonitorHandler::handle_trace(const ReadFrameTrace &trace)
+{
+ assert(contains(m_core_ops_info, trace.core_op_handle));
+ assert(contains(m_core_ops_info[trace.core_op_handle].output_streams_info, trace.queue_name));
+ m_core_ops_info[trace.core_op_handle].output_streams_info[trace.queue_name].pending_frames_count--;
+ m_core_ops_info[trace.core_op_handle].output_streams_info[trace.queue_name].total_frames_count++;
+}
+
+void MonitorHandler::handle_trace(const OutputVdmaEnqueueTrace &trace)
+{
+ assert(contains(m_core_ops_info, trace.core_op_handle));
+ assert(contains(m_core_ops_info[trace.core_op_handle].output_streams_info, trace.queue_name));
+ m_core_ops_info[trace.core_op_handle].output_streams_info[trace.queue_name].pending_frames_count += trace.frames;
+}
+
+void MonitorHandler::handle_trace(const InputVdmaDequeueTrace &trace)
+{
+ assert(contains(m_core_ops_info, trace.core_op_handle));
+ assert(contains(m_core_ops_info[trace.core_op_handle].input_streams_info, trace.queue_name));
+ m_core_ops_info[trace.core_op_handle].input_streams_info[trace.queue_name].pending_frames_count--;
+ update_utilization_send_started(trace.device_id);
+}
+
+scheduler_core_op_handle_t MonitorHandler::get_core_op_handle_by_name(const std::string &name)
+{
+ for (const auto &core_op_info : m_core_ops_info) {
+ if (0 == core_op_info.second.core_op_name.compare(name)) {
+ return core_op_info.first;
+ }
+ }
+ return INVALID_CORE_OP_HANDLE;
+}
+
+hailo_status MonitorHandler::start_mon()
+{
+#if defined(__GNUC__)
+
+ /* Clearing monitor members. Since the owner of monitor_handler is tracer, which is static,
+ the monitor may get rerun without destructor being called. */
+ if (m_is_monitor_currently_working) {
+ clear_monitor();
+ }
+ m_is_monitor_currently_working = true;
+
+ m_mon_shutdown_event = Event::create_shared(Event::State::not_signalled);
+ m_last_measured_timestamp = std::chrono::steady_clock::now();
+ CHECK(nullptr != m_mon_shutdown_event, HAILO_OUT_OF_HOST_MEMORY);
+
+ auto tmp_file = open_temp_mon_file();
+ CHECK_EXPECTED_AS_STATUS(tmp_file);
+ m_mon_tmp_output = tmp_file.release();
+
+ m_mon_thread = std::thread([this] ()
+ {
+ while (true) {
+ auto status = m_mon_shutdown_event->wait(DEFAULT_SCHEDULER_MON_INTERVAL);
+ if (HAILO_TIMEOUT == status) {
+ dump_state();
+ } else if (HAILO_SUCCESS == status) {
+ break; // shutdown_event was signaled
+ } else if (HAILO_SUCCESS != status) {
+ LOGGER__ERROR("Scheduler monitor failed with status {}", status);
+ return;
+ }
+ }
+ return;
+ });
+
+ return HAILO_SUCCESS;
+#else
+ return HAILO_NOT_IMPLEMENTED;
+#endif
+}
+
+std::string get_curr_pid_as_str()
+{
+ return std::to_string(OsUtils::get_curr_pid());
+}
+
+#if defined(__GNUC__)
+Expected<std::shared_ptr<TempFile>> MonitorHandler::open_temp_mon_file()
+{
+ std::string file_name = get_curr_pid_as_str();
+ auto tmp_file = TempFile::create(file_name, SCHEDULER_MON_TMP_DIR);
+ CHECK_EXPECTED(tmp_file);
+
+ auto tmp_file_ptr = make_shared_nothrow<TempFile>(tmp_file.release());
+ CHECK_AS_EXPECTED(nullptr != tmp_file_ptr, HAILO_OUT_OF_HOST_MEMORY);
+
+ return tmp_file_ptr;
+}
+
+void MonitorHandler::dump_state()
+{
+ auto file = LockedFile::create(m_mon_tmp_output->name(), "w");
+ if (HAILO_SUCCESS != file.status()) {
+ LOGGER__ERROR("Failed to open and lock file {}, with status: {}", m_mon_tmp_output->name(), file.status());
+ return;
+ }
+
+ ProtoMon mon;
+ mon.set_pid(get_curr_pid_as_str());
+ time_dependent_events_cycle_calc();
+ log_monitor_networks_infos(mon);
+ log_monitor_device_infos(mon);
+ log_monitor_frames_infos(mon);
+
+ clear_accumulators();
+
+ if (!mon.SerializeToFileDescriptor(file->get_fd())) {
+ LOGGER__ERROR("Failed to SerializeToFileDescriptor(), with errno: {}", errno);
+ }
+}
+#endif
+
+void MonitorHandler::time_dependent_events_cycle_calc()
+{
+ auto curr_time = std::chrono::steady_clock::now();
+ m_last_measured_time_duration = std::chrono::duration_cast<std::chrono::duration<double>>(curr_time - m_last_measured_timestamp).count();
+
+ for (auto &device : m_devices_info) {
+ if (!device.second.device_has_drained_everything) {
+ update_utilization_read_buffers_finished(device.second.device_id, device.second.current_core_op_handle, false);
+ }
+ }
+ m_last_measured_timestamp = curr_time;
+}
+
+void MonitorHandler::log_monitor_device_infos(ProtoMon &mon)
+{
+ for (auto const &device_info_pair : m_devices_info) {
+ auto device_info = device_info_pair.second;
+ auto curr_device_utilization = device_info.device_utilization_duration;
+ auto utilization_percentage = ((curr_device_utilization * 100) / m_last_measured_time_duration);
+
+ auto device_infos = mon.add_device_infos();
+ device_infos->set_device_id(device_info.device_id);
+ device_infos->set_utilization(utilization_percentage);
+ device_infos->set_device_arch(device_info.device_arch);
+ }
+}
+
+void MonitorHandler::log_monitor_networks_infos(ProtoMon &mon)
+{
+ for (uint32_t core_op_handle = 0; core_op_handle < m_core_ops_info.size(); core_op_handle++) {
+ auto curr_core_op_utilization = m_core_ops_info[core_op_handle].utilization;
+ auto utilization = ((curr_core_op_utilization * 100) / m_last_measured_time_duration);
+ double min_fps = std::numeric_limits<double>::max();
+
+ for (auto const &stream : m_core_ops_info[core_op_handle].output_streams_info) {
+ double fps = stream.second.total_frames_count / m_last_measured_time_duration;
+ min_fps = (fps < min_fps) ? fps : min_fps;
+ }
+
+ auto net_info = mon.add_networks_infos();
+ net_info->set_network_name(m_core_ops_info[core_op_handle].core_op_name);
+ net_info->set_utilization(utilization);
+ net_info->set_fps(min_fps);
+ }
+}
+
+void MonitorHandler::log_monitor_frames_infos(ProtoMon &mon)
+{
+ for (uint32_t core_op_handle = 0; core_op_handle < m_core_ops_info.size(); core_op_handle++) {
+ assert(contains(m_core_ops_info, core_op_handle));
+ auto net_frames_info = mon.add_net_frames_infos();
+ for (auto const &stream : m_core_ops_info[core_op_handle].input_streams_info) {
+ net_frames_info->set_network_name(m_core_ops_info[core_op_handle].core_op_name);
+ auto stream_frames_info = net_frames_info->add_streams_frames_infos();
+ stream_frames_info->set_stream_name(stream.first);
+ stream_frames_info->set_stream_direction(PROTO__STREAM_DIRECTION__HOST_TO_DEVICE);
+ stream_frames_info->set_buffer_frames_size(static_cast<int32_t>(stream.second.queue_size * m_device_count));
+ stream_frames_info->set_pending_frames_count(static_cast<int32_t>(stream.second.pending_frames_count));
+ }
+
+ for (auto const &stream : m_core_ops_info[core_op_handle].output_streams_info) {
+ net_frames_info->set_network_name(m_core_ops_info[core_op_handle].core_op_name);
+ auto stream_frames_info = net_frames_info->add_streams_frames_infos();
+ stream_frames_info->set_stream_name(stream.first);
+ stream_frames_info->set_stream_direction(PROTO__STREAM_DIRECTION__DEVICE_TO_HOST);
+ if (m_core_ops_info[core_op_handle].is_nms) {
+ stream_frames_info->set_pending_frames_count(SCHEDULER_MON_NAN_VAL);
+ stream_frames_info->set_buffer_frames_size(SCHEDULER_MON_NAN_VAL);
+ } else {
+ stream_frames_info->set_pending_frames_count(static_cast<int32_t>(stream.second.pending_frames_count));
+ stream_frames_info->set_buffer_frames_size(static_cast<int32_t>(stream.second.queue_size * m_device_count));
+ }
+ }
+ }
+}
+
+void MonitorHandler::update_utilization_timers(const device_id_t &device_id, scheduler_core_op_handle_t core_op_handle)
+{
+ assert(contains(m_core_ops_info, core_op_handle));
+ assert(contains(m_devices_info, device_id));
+
+ auto time_diff = std::chrono::duration_cast<std::chrono::duration<double>>(
+ std::chrono::steady_clock::now() - m_devices_info.at(device_id).last_measured_utilization_timestamp).count();
+
+ m_devices_info.at(device_id).device_utilization_duration += time_diff;
+ m_core_ops_info[core_op_handle].utilization += time_diff;
+}
+
+void MonitorHandler::update_utilization_timestamp(const device_id_t &device_id)
+{
+ assert(contains(m_devices_info, device_id));
+ m_devices_info.at(device_id).last_measured_utilization_timestamp = std::chrono::steady_clock::now();
+}
+
+void MonitorHandler::update_utilization_send_started(const device_id_t &device_id)
+{
+ assert(contains(m_devices_info, device_id));
+ if (m_devices_info.at(device_id).device_has_drained_everything) {
+ update_device_drained_state(device_id, false);
+ update_utilization_timestamp(device_id);
+ }
+}
+
+void MonitorHandler::update_device_drained_state(const device_id_t &device_id, bool state)
+{
+ assert(contains(m_devices_info, device_id));
+ m_devices_info.at(device_id).device_has_drained_everything = state;
+}
+
+void MonitorHandler::update_utilization_read_buffers_finished(const device_id_t &device_id,
+ scheduler_core_op_handle_t core_op_handle, bool is_drained_everything)
+{
+ update_utilization_timers(device_id, core_op_handle);
+ update_device_drained_state(device_id, is_drained_everything);
+ if (!is_drained_everything) {
+ update_utilization_timestamp(device_id);
+ }
+}
+
+void MonitorHandler::clear_accumulators()
+{
+ for (auto &device_info : m_devices_info) {
+ device_info.second.device_utilization_duration = 0;
+ }
+
+ for (auto &handle_core_op_pair : m_core_ops_info) {
+ for (auto &handle_streams_pair : handle_core_op_pair.second.output_streams_info) {
+ handle_streams_pair.second.total_frames_count = 0;
+ }
+ handle_core_op_pair.second.utilization = 0;
+ }
+}
+
+}
\ No newline at end of file
--- /dev/null
+/**
+ * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+ **/
+/**
+ * @file monitor_handler.hpp
+ * @brief Implementation of the scheduler monitor handlers base with HailoRT tracer mechanism
+ **/
+
+#ifndef _HAILO_MONITOR_HANDLER_HPP_
+#define _HAILO_MONITOR_HANDLER_HPP_
+
+#include "handler.hpp"
+
+#include "hailo/hailort.h"
+#include "hailo/expected.hpp"
+#include "hailo/event.hpp"
+
+#include "common/filesystem.hpp"
+#include "common/utils.hpp"
+
+#include "vdevice/scheduler/scheduler_base.hpp"
+
+#include <iostream>
+#include <string>
+#include <thread>
+
+#if defined(_MSC_VER)
+#pragma warning(push)
+#pragma warning(disable: 4244 4267 4127)
+#else
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wconversion"
+#endif
+#include "scheduler_mon.pb.h"
+#if defined(_MSC_VER)
+#pragma warning( pop )
+#else
+#pragma GCC diagnostic pop
+#endif
+
+namespace hailort
+{
+
+#define SCHEDULER_MON_TMP_DIR ("/tmp/hmon_files/")
+#define SCHEDULER_MON_ENV_VAR ("HAILO_MONITOR")
+#define DEFAULT_SCHEDULER_MON_INTERVAL (std::chrono::seconds(1))
+#define SCHEDULER_MON_NAN_VAL (-1)
+
+using stream_name = std::string;
+
+struct DeviceInfo {
+ DeviceInfo(const device_id_t &device_id, const std::string &device_arch) :
+ device_id(device_id), device_arch(device_arch), device_has_drained_everything(true),
+ device_utilization_duration(0), last_measured_utilization_timestamp(std::chrono::steady_clock::now()),
+ current_core_op_handle(INVALID_CORE_OP_HANDLE)
+ {}
+ std::string device_id;
+ std::string device_arch;
+ bool device_has_drained_everything;
+ double device_utilization_duration;
+ std::chrono::time_point<std::chrono::steady_clock> last_measured_utilization_timestamp;
+ scheduler_core_op_handle_t current_core_op_handle;
+};
+
+struct StreamsInfo {
+ uint32_t queue_size;
+ uint32_t pending_frames_count;
+ uint32_t total_frames_count = 0;
+};
+
+struct CoreOpInfo {
+ std::unordered_map<stream_name, StreamsInfo> input_streams_info;
+ std::unordered_map<stream_name, StreamsInfo> output_streams_info;
+ std::string core_op_name;
+ bool is_nms;
+ double utilization;
+};
+
+class MonitorHandler : public Handler
+{
+public:
+ MonitorHandler(MonitorHandler const&) = delete;
+ void operator=(MonitorHandler const&) = delete;
+
+ MonitorHandler();
+ ~MonitorHandler();
+ void clear_monitor();
+
+ virtual void handle_trace(const AddCoreOpTrace&) override;
+ virtual void handle_trace(const CreateCoreOpInputStreamsTrace&) override;
+ virtual void handle_trace(const CreateCoreOpOutputStreamsTrace&) override;
+ virtual void handle_trace(const WriteFrameTrace&) override;
+ virtual void handle_trace(const ReadFrameTrace&) override;
+ virtual void handle_trace(const InputVdmaDequeueTrace&) override;
+ virtual void handle_trace(const OutputVdmaEnqueueTrace&) override;
+ virtual void handle_trace(const SwitchCoreOpTrace&) override;
+ virtual void handle_trace(const SchedulerStartTrace&) override;
+ virtual void handle_trace(const CoreOpIdleTrace&) override;
+ virtual void handle_trace(const AddDeviceTrace&) override;
+
+private:
+ hailo_status start_mon();
+#if defined(__GNUC__)
+ Expected<std::shared_ptr<TempFile>> open_temp_mon_file();
+ void dump_state();
+#endif
+ void time_dependent_events_cycle_calc();
+ void log_monitor_device_infos(ProtoMon &mon);
+ void log_monitor_networks_infos(ProtoMon &mon);
+ void log_monitor_frames_infos(ProtoMon &mon);
+ void update_utilization_timers(const device_id_t &device_id, scheduler_core_op_handle_t core_op_handle);
+ void update_utilization_timestamp(const device_id_t &device_id);
+ void update_utilization_send_started(const device_id_t &device_id);
+ void update_device_drained_state(const device_id_t &device_id, bool state);
+ void update_utilization_read_buffers_finished(const device_id_t &device_id, scheduler_core_op_handle_t core_op_hanle, bool is_drained_everything);
+ void clear_accumulators();
+ scheduler_core_op_handle_t get_core_op_handle_by_name(const std::string &name);
+
+ bool m_is_monitor_currently_working = false;
+ uint32_t m_device_count;
+ std::thread m_mon_thread;
+ EventPtr m_mon_shutdown_event;
+#if defined(__GNUC__)
+ std::shared_ptr<TempFile> m_mon_tmp_output;
+#endif
+ std::chrono::time_point<std::chrono::steady_clock> m_last_measured_timestamp;
+ double m_last_measured_time_duration;
+ // TODO: Consider adding Accumulator classes for more info (min, max, mean, etc..)
+ std::unordered_map<scheduler_core_op_handle_t, CoreOpInfo> m_core_ops_info;
+ std::unordered_map<device_id_t, DeviceInfo> m_devices_info;
+};
+}
+
+#endif /* _MONITOR_HANDLER_HPP_ */
\ No newline at end of file
--- /dev/null
+/**
+ * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+ **/
+/**
+ * @file scheduler_profiler_handler.cpp
+ * @brief Implementation of the scheduler profiler handlers base with HailoRT tracer mechanism
+ **/
+
+#include "scheduler_profiler_handler.hpp"
+
+#include "common/logger_macros.hpp"
+
+#include "utils/hailort_logger.hpp"
+
+#include <spdlog/sinks/rotating_file_sink.h>
+#include <spdlog/sinks/stdout_color_sinks.h>
+#include <spdlog/sinks/android_sink.h>
+#include <spdlog/sinks/null_sink.h>
+
+#include <iomanip>
+#include <sstream>
+
+#define SCHEDULER_PROFILER_NAME ("SchedulerProfiler")
+#define SCHEDULER_PROFILER_LOGGER_FILENAME ("scheduler_profiler.json")
+#define SCHEDULER_PROFILER_LOGGER_PATTERN ("%v")
+
+#define SCHEDULER_PROFILER_LOGGER_PATH ("SCHEDULER_PROFILER_LOGGER_PATH")
+
+namespace hailort
+{
+
+SchedulerProfilerHandler::SchedulerProfilerHandler(int64_t &start_time)
+#ifndef __ANDROID__
+ : m_file_sink(HailoRTLogger::create_file_sink(HailoRTLogger::get_log_path(SCHEDULER_PROFILER_LOGGER_PATH), SCHEDULER_PROFILER_LOGGER_FILENAME, false)),
+ m_first_write(true)
+#endif
+{
+#ifndef __ANDROID__
+ spdlog::sinks_init_list sink_list = { m_file_sink };
+ m_profiler_logger = make_shared_nothrow<spdlog::logger>(SCHEDULER_PROFILER_NAME, sink_list.begin(), sink_list.end());
+ m_file_sink->set_level(spdlog::level::level_enum::info);
+ m_file_sink->set_pattern(SCHEDULER_PROFILER_LOGGER_PATTERN);
+ std::stringstream ss;
+ ss << "{\"ns_since_epoch_zero_time\": \"" << start_time << "\",\n\"scheduler_actions\": [\n";
+ m_profiler_logger->info(ss.str());
+#else
+ (void)start_time;
+#endif
+}
+
+SchedulerProfilerHandler::~SchedulerProfilerHandler()
+{
+ m_profiler_logger->info("]\n}");
+}
+
+struct JSON
+{
+ std::unordered_map<std::string, std::string> members;
+ JSON(const std::initializer_list<std::pair<const std::string, std::string>> &dict) : members{dict} {}
+ JSON(const std::unordered_map<std::string, uint32_t> &dict) {
+ for (auto &pair : dict) {
+ members.insert({pair.first, std::to_string(pair.second)});
+ }
+ }
+};
+
+template<class T>
+std::string json_to_string(const T &val) {
+ return std::to_string(val);
+}
+
+template<>
+std::string json_to_string(const std::string &val) {
+ std::ostringstream os;
+ os << std::quoted(val);
+ return os.str();
+}
+
+template<>
+std::string json_to_string(const bool &bool_val) {
+ return bool_val ? "true" : "false";
+}
+
+template<>
+std::string json_to_string(const JSON &json_val) {
+ std::ostringstream os;
+ os << "{\n";
+ size_t i = 0;
+ for (const auto &kv : json_val.members) {
+ ++i;
+ os << std::quoted(kv.first) << " : ";
+ os << kv.second;
+ if (i != json_val.members.size()) {
+ os << ",\n";
+ }
+ }
+ os << "\n}";
+ return os.str();
+}
+
+bool SchedulerProfilerHandler::comma()
+{
+ auto result = !m_first_write;
+ m_first_write = false;
+ return result;
+}
+
+void SchedulerProfilerHandler::log(JSON json)
+{
+ m_profiler_logger->info("{}{}", comma() ? ",\n" : "", json_to_string(json));
+}
+
+void SchedulerProfilerHandler::handle_trace(const AddCoreOpTrace &trace)
+{
+ log(JSON({
+ {"action", json_to_string(trace.name)},
+ {"timestamp", json_to_string(trace.timestamp)},
+ {"device_id", json_to_string(trace.device_id)},
+ {"core_op_name", json_to_string(trace.core_op_name)},
+ {"core_op_handle", json_to_string(trace.core_op_handle)},
+ {"timeout", json_to_string((uint64_t)trace.timeout)},
+ {"threshold", json_to_string((uint64_t)trace.threshold)}
+ }));
+}
+
+void SchedulerProfilerHandler::handle_trace(const CreateCoreOpInputStreamsTrace &trace)
+{
+ log(JSON({
+ {"action", json_to_string(trace.name)},
+ {"timestamp", json_to_string(trace.timestamp)},
+ {"device_id", json_to_string(trace.device_id)},
+ {"core_op_name", json_to_string(trace.core_op_name)},
+ {"stream_name", json_to_string(trace.stream_name)},
+ {"queue_size", json_to_string(trace.queue_size)}
+ }));
+}
+
+void SchedulerProfilerHandler::handle_trace(const CreateCoreOpOutputStreamsTrace &trace)
+{
+ log(JSON({
+ {"action", json_to_string(trace.name)},
+ {"timestamp", json_to_string(trace.timestamp)},
+ {"device_id", json_to_string(trace.device_id)},
+ {"core_op_name", json_to_string(trace.core_op_name)},
+ {"stream_name", json_to_string(trace.stream_name)},
+ {"queue_size", json_to_string(trace.queue_size)}
+ }));
+}
+
+void SchedulerProfilerHandler::handle_trace(const WriteFrameTrace &trace)
+{
+ log(JSON({
+ {"action", json_to_string(trace.name)},
+ {"timestamp", json_to_string(trace.timestamp)},
+ {"device_id", json_to_string(trace.device_id)},
+ {"core_op_handle", json_to_string(trace.core_op_handle)},
+ {"queue_name", json_to_string(trace.queue_name)}
+ }));
+}
+
+void SchedulerProfilerHandler::handle_trace(const InputVdmaDequeueTrace &trace)
+{
+ log(JSON({
+ {"action", json_to_string(trace.name)},
+ {"timestamp", json_to_string(trace.timestamp)},
+ {"device_id", json_to_string(trace.device_id)},
+ {"core_op_handle", json_to_string(trace.core_op_handle)},
+ {"queue_name", json_to_string(trace.queue_name)}
+ }));
+}
+
+void SchedulerProfilerHandler::handle_trace(const ReadFrameTrace &trace)
+{
+ log(JSON({
+ {"action", json_to_string(trace.name)},
+ {"timestamp", json_to_string(trace.timestamp)},
+ {"device_id", json_to_string(trace.device_id)},
+ {"core_op_handle", json_to_string(trace.core_op_handle)},
+ {"queue_name", json_to_string(trace.queue_name)}
+ }));
+}
+
+void SchedulerProfilerHandler::handle_trace(const OutputVdmaEnqueueTrace &trace)
+{
+ log(JSON({
+ {"action", json_to_string(trace.name)},
+ {"timestamp", json_to_string(trace.timestamp)},
+ {"device_id", json_to_string(trace.device_id)},
+ {"core_op_handle", json_to_string(trace.core_op_handle)},
+ {"queue_name", json_to_string(trace.queue_name)},
+ {"frames", json_to_string(trace.frames)}
+ }));
+}
+
+void SchedulerProfilerHandler::handle_trace(const ChooseCoreOpTrace &trace)
+{
+ log(JSON({
+ {"action", json_to_string(trace.name)},
+ {"timestamp", json_to_string(trace.timestamp)},
+ {"device_id", json_to_string(trace.device_id)},
+ {"chosen_core_op_handle", json_to_string(trace.core_op_handle)},
+ {"threshold", json_to_string(trace.threshold)},
+ {"timeout", json_to_string(trace.timeout)},
+ {"priority", json_to_string(trace.priority)}
+ }));
+}
+
+void SchedulerProfilerHandler::handle_trace(const SwitchCoreOpTrace &trace)
+{
+ log(JSON({
+ {"action", json_to_string(trace.name)},
+ {"timestamp", json_to_string(trace.timestamp)},
+ {"device_id", json_to_string(trace.device_id)},
+ {"core_op_handle", json_to_string(trace.core_op_handle)}
+ }));
+}
+
+}
\ No newline at end of file
--- /dev/null
+/**
+ * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+ **/
+/**
+ * @file scheduler_profiler_handler.hpp
+ * @brief Implementation of the scheduler profiler handlers base with HailoRT tracer mechanism
+ **/
+
+#ifndef _HAILO_SCHEDULER_PROFILER_HANDLER_HPP_
+#define _HAILO_SCHEDULER_PROFILER_HANDLER_HPP_
+
+#include "hailo/hailort.h"
+
+#include "handler.hpp"
+
+namespace hailort
+{
+class SchedulerProfilerHandler : public Handler
+{
+public:
+ SchedulerProfilerHandler(SchedulerProfilerHandler const&) = delete;
+ void operator=(SchedulerProfilerHandler const&) = delete;
+
+ SchedulerProfilerHandler(int64_t &start_time);
+ ~SchedulerProfilerHandler();
+
+ virtual void handle_trace(const AddCoreOpTrace&) override;
+ virtual void handle_trace(const CreateCoreOpInputStreamsTrace&) override;
+ virtual void handle_trace(const CreateCoreOpOutputStreamsTrace&) override;
+ virtual void handle_trace(const WriteFrameTrace&) override;
+ virtual void handle_trace(const InputVdmaDequeueTrace&) override;
+ virtual void handle_trace(const ReadFrameTrace&) override;
+ virtual void handle_trace(const OutputVdmaEnqueueTrace&) override;
+ virtual void handle_trace(const ChooseCoreOpTrace&) override;
+ virtual void handle_trace(const SwitchCoreOpTrace&) override;
+
+private:
+ void log(JSON json);
+ bool comma();
+
+ std::shared_ptr<spdlog::sinks::sink> m_file_sink;
+ std::shared_ptr<spdlog::logger> m_profiler_logger;
+ std::atomic<bool> m_first_write;
+};
+
+}
+
+#endif /* _SCHEDULER_PROFILER_HANDLER_HPP_ */
\ No newline at end of file
/**
* @file tracer.cpp
* @brief: Tracing mechanism for HailoRT + FW events
- *
**/
#include "common/utils.hpp"
-#include "utils/hailort_logger.hpp"
#include "utils/profiler/tracer.hpp"
-#include <spdlog/sinks/rotating_file_sink.h>
-#include <spdlog/sinks/stdout_color_sinks.h>
-#include <spdlog/sinks/android_sink.h>
-#include <spdlog/sinks/null_sink.h>
-#include <iomanip>
-#include <sstream>
-
-
-#define SCHEDULER_PROFILER_NAME ("SchedulerProfiler")
-#define SCHEDULER_PROFILER_LOGGER_FILENAME ("scheduler_profiler.json")
-#define SCHEDULER_PROFILER_LOGGER_PATTERN ("%v")
-
-#define SCHEDULER_PROFILER_LOGGER_PATH ("SCHEDULER_PROFILER_LOGGER_PATH")
-
#define PROFILER_ENV_VAR ("HAILO_ENABLE_PROFILER")
namespace hailort
Tracer::Tracer()
{
- auto should_trace_env = std::getenv(PROFILER_ENV_VAR);
- m_should_trace = ((nullptr != should_trace_env) && (strnlen(should_trace_env, 2) == 1) && (strncmp(should_trace_env, "1", 1) == 0));
+ init_scheduler_profiler_handler();
+ init_monitor_handler();
+}
+
+void Tracer::init_scheduler_profiler_handler()
+{
+ const char* env_var_name = PROFILER_ENV_VAR;
+ m_should_trace = is_env_variable_on(env_var_name);
if (m_should_trace) {
m_start_time = std::chrono::high_resolution_clock::now();
int64_t time_since_epoch = std::chrono::duration_cast<std::chrono::nanoseconds>(m_start_time.time_since_epoch()).count();
}
}
-SchedulerProfilerHandler::SchedulerProfilerHandler(int64_t &start_time)
-#ifndef __ANDROID__
- : m_file_sink(HailoRTLogger::create_file_sink(HailoRTLogger::get_log_path(SCHEDULER_PROFILER_LOGGER_PATH), SCHEDULER_PROFILER_LOGGER_FILENAME, false)),
- m_first_write(true)
-#endif
+void Tracer::init_monitor_handler()
{
-#ifndef __ANDROID__
- spdlog::sinks_init_list sink_list = { m_file_sink };
- m_profiler_logger = make_shared_nothrow<spdlog::logger>(SCHEDULER_PROFILER_NAME, sink_list.begin(), sink_list.end());
- m_file_sink->set_level(spdlog::level::level_enum::info);
- m_file_sink->set_pattern(SCHEDULER_PROFILER_LOGGER_PATTERN);
- std::stringstream ss;
- ss << "{\"ns_since_epoch_zero_time\": \"" << start_time << "\",\n\"scheduler_actions\": [\n";
- m_profiler_logger->info(ss.str());
-#else
- (void)start_time;
-#endif
-}
-
-SchedulerProfilerHandler::~SchedulerProfilerHandler()
-{
- m_profiler_logger->info("]\n}");
-}
-
-struct JSON
-{
- std::unordered_map<std::string, std::string> members;
- JSON(const std::initializer_list<std::pair<const std::string, std::string>> &dict) : members{dict} {}
- JSON(const std::unordered_map<std::string, uint32_t> &dict) {
- for (auto &pair : dict) {
- members.insert({pair.first, std::to_string(pair.second)});
- }
- }
-};
-
-template<class T>
-std::string json_to_string(const T &val) {
- return std::to_string(val);
-}
-
-template<>
-std::string json_to_string(const std::string &val) {
- std::ostringstream os;
- os << std::quoted(val);
- return os.str();
-}
-
-template<>
-std::string json_to_string(const bool &bool_val) {
- return bool_val ? "true" : "false";
-}
-
-template<>
-std::string json_to_string(const JSON &json_val) {
- std::ostringstream os;
- os << "{\n";
- size_t i = 0;
- for (const auto &kv : json_val.members) {
- ++i;
- os << std::quoted(kv.first) << " : ";
- os << kv.second;
- if (i != json_val.members.size()) {
- os << ",\n";
- }
+ const char* env_var_name = SCHEDULER_MON_ENV_VAR;
+ m_should_monitor = is_env_variable_on(env_var_name);
+ if (m_should_monitor) {
+ m_handlers.push_back(std::make_unique<MonitorHandler>());
}
- os << "\n}";
- return os.str();
}
-bool SchedulerProfilerHandler::comma()
-{
- auto result = !m_first_write;
- m_first_write = false;
- return result;
-}
-
-void SchedulerProfilerHandler::log(JSON json)
-{
- m_profiler_logger->info("{}{}", comma() ? ",\n" : "", json_to_string(json));
-}
-
-void SchedulerProfilerHandler::handle_trace(const AddCoreOpTrace &trace)
-{
- log(JSON({
- {"action", json_to_string(trace.name)},
- {"timestamp", json_to_string(trace.timestamp)},
- {"device_id", json_to_string(trace.device_id)},
- {"core_op_name", json_to_string(trace.core_op_name)},
- {"core_op_handle", json_to_string(trace.core_op_handle)},
- {"timeout", json_to_string((uint64_t)trace.timeout)},
- {"threshold", json_to_string((uint64_t)trace.threshold)}
- }));
-}
-
-void SchedulerProfilerHandler::handle_trace(const CreateCoreOpInputStreamsTrace &trace)
-{
- log(JSON({
- {"action", json_to_string(trace.name)},
- {"timestamp", json_to_string(trace.timestamp)},
- {"device_id", json_to_string(trace.device_id)},
- {"core_op_name", json_to_string(trace.core_op_name)},
- {"stream_name", json_to_string(trace.stream_name)},
- {"queue_size", json_to_string(trace.queue_size)}
- }));
-}
-
-void SchedulerProfilerHandler::handle_trace(const CreateCoreOpOutputStreamsTrace &trace)
-{
- log(JSON({
- {"action", json_to_string(trace.name)},
- {"timestamp", json_to_string(trace.timestamp)},
- {"device_id", json_to_string(trace.device_id)},
- {"core_op_name", json_to_string(trace.core_op_name)},
- {"stream_name", json_to_string(trace.stream_name)},
- {"queue_size", json_to_string(trace.queue_size)}
- }));
-}
-
-void SchedulerProfilerHandler::handle_trace(const WriteFrameTrace &trace)
-{
- log(JSON({
- {"action", json_to_string(trace.name)},
- {"timestamp", json_to_string(trace.timestamp)},
- {"device_id", json_to_string(trace.device_id)},
- {"core_op_handle", json_to_string(trace.core_op_handle)},
- {"queue_name", json_to_string(trace.queue_name)}
- }));
-}
-
-void SchedulerProfilerHandler::handle_trace(const InputVdmaDequeueTrace &trace)
-{
- log(JSON({
- {"action", json_to_string(trace.name)},
- {"timestamp", json_to_string(trace.timestamp)},
- {"device_id", json_to_string(trace.device_id)},
- {"core_op_handle", json_to_string(trace.core_op_handle)},
- {"queue_name", json_to_string(trace.queue_name)}
- }));
-}
-
-void SchedulerProfilerHandler::handle_trace(const ReadFrameTrace &trace)
-{
- log(JSON({
- {"action", json_to_string(trace.name)},
- {"timestamp", json_to_string(trace.timestamp)},
- {"device_id", json_to_string(trace.device_id)},
- {"core_op_handle", json_to_string(trace.core_op_handle)},
- {"queue_name", json_to_string(trace.queue_name)}
- }));
-}
-
-void SchedulerProfilerHandler::handle_trace(const OutputVdmaEnqueueTrace &trace)
-{
- log(JSON({
- {"action", json_to_string(trace.name)},
- {"timestamp", json_to_string(trace.timestamp)},
- {"device_id", json_to_string(trace.device_id)},
- {"core_op_handle", json_to_string(trace.core_op_handle)},
- {"queue_name", json_to_string(trace.queue_name)},
- {"frames", json_to_string(trace.frames)}
- }));
-}
-
-void SchedulerProfilerHandler::handle_trace(const ChooseCoreOpTrace &trace)
-{
- log(JSON({
- {"action", json_to_string(trace.name)},
- {"timestamp", json_to_string(trace.timestamp)},
- {"device_id", json_to_string(trace.device_id)},
- {"chosen_core_op_handle", json_to_string(trace.core_op_handle)},
- {"threshold", json_to_string(trace.threshold)},
- {"timeout", json_to_string(trace.timeout)},
- {"priority", json_to_string(trace.priority)}
- }));
-}
-
-void SchedulerProfilerHandler::handle_trace(const SwitchCoreOpTrace &trace)
-{
- log(JSON({
- {"action", json_to_string(trace.name)},
- {"timestamp", json_to_string(trace.timestamp)},
- {"device_id", json_to_string(trace.device_id)},
- {"core_op_handle", json_to_string(trace.core_op_handle)}
- }));
-}
-
-
}
#define _HAILO_TRACER_HPP_
#include "hailo/hailort.h"
-#include "common/logger_macros.hpp"
-
-#include "vdevice/scheduler/scheduler_base.hpp"
-
-#include <chrono>
-#include <memory>
-#include <vector>
-#include <map>
-#include <unordered_map>
-#include <atomic>
-#include <chrono>
-#include <sstream>
-#include <iomanip>
-
+#include "scheduler_profiler_handler.hpp"
+#include "monitor_handler.hpp"
namespace hailort
{
-
-struct Trace
-{
- Trace(const std::string &name)
- : name(name)
- {}
-
- virtual ~Trace() = default;
-
- uint64_t timestamp = 0;
- std::string name;
-};
-
-struct InitTrace : Trace
-{
- InitTrace() : Trace("init") {}
-};
-
-struct AddCoreOpTrace : Trace
-{
- AddCoreOpTrace(const std::string &device_id, const std::string &core_op_name, uint64_t timeout, uint32_t threshold, scheduler_core_op_handle_t handle)
- : Trace("add_core_op"), device_id(device_id), core_op_name(core_op_name), timeout(timeout), threshold(threshold), core_op_handle(handle)
- {}
-
- std::string device_id;
- std::string core_op_name;
- uint64_t timeout = 0;
- uint32_t threshold = 0;
- scheduler_core_op_handle_t core_op_handle = INVALID_CORE_OP_HANDLE;
-};
-
-struct CreateCoreOpInputStreamsTrace : Trace
-{
- CreateCoreOpInputStreamsTrace(const std::string &device_id, const std::string &core_op_name, const std::string &stream_name, uint32_t queue_size)
- : Trace("create_input_stream"), device_id(device_id), core_op_name(core_op_name), stream_name(stream_name), queue_size(queue_size)
- {}
-
- std::string device_id;
- std::string core_op_name;
- std::string stream_name;
- uint32_t queue_size;
-};
-
-struct CreateCoreOpOutputStreamsTrace : Trace
-{
- CreateCoreOpOutputStreamsTrace(const std::string &device_id, const std::string &core_op_name, const std::string &stream_name, uint32_t queue_size)
- : Trace("create_output_stream"), device_id(device_id), core_op_name(core_op_name), stream_name(stream_name), queue_size(queue_size)
- {}
-
- std::string device_id;
- std::string core_op_name;
- std::string stream_name;
- uint32_t queue_size;
-};
-
-struct WriteFrameTrace : Trace
-{
- WriteFrameTrace(const std::string &device_id, scheduler_core_op_handle_t core_op_handle, const std::string &queue_name)
- : Trace("write_frame"), device_id(device_id), core_op_handle(core_op_handle), queue_name(queue_name)
- {}
-
- std::string device_id;
- scheduler_core_op_handle_t core_op_handle;
- std::string queue_name;
-};
-
-struct InputVdmaDequeueTrace : Trace
-{
- InputVdmaDequeueTrace(const std::string &device_id, scheduler_core_op_handle_t core_op_handle, const std::string &queue_name)
- : Trace("input_vdma_dequeue"), device_id(device_id), core_op_handle(core_op_handle), queue_name(queue_name)
- {}
-
- std::string device_id;
- scheduler_core_op_handle_t core_op_handle;
- std::string queue_name;
-};
-
-struct ReadFrameTrace : Trace
-{
- ReadFrameTrace(const std::string &device_id, scheduler_core_op_handle_t core_op_handle, const std::string &queue_name)
- : Trace("read_frame"), device_id(device_id), core_op_handle(core_op_handle), queue_name(queue_name)
- {}
-
- std::string device_id;
- scheduler_core_op_handle_t core_op_handle;
- std::string queue_name;
-};
-
-struct OutputVdmaEnqueueTrace : Trace
-{
- OutputVdmaEnqueueTrace(const std::string &device_id, scheduler_core_op_handle_t core_op_handle, const std::string &queue_name, uint32_t frames)
- : Trace("output_vdma_enqueue"), device_id(device_id), core_op_handle(core_op_handle), queue_name(queue_name), frames(frames)
- {}
-
- std::string device_id;
- scheduler_core_op_handle_t core_op_handle;
- std::string queue_name;
- uint32_t frames = 0;
-};
-
-struct ChooseCoreOpTrace : Trace
-{
- ChooseCoreOpTrace(const std::string &device_id, scheduler_core_op_handle_t handle, bool threshold, bool timeout, core_op_priority_t priority)
- : Trace("choose_core_op"), device_id(device_id), core_op_handle(handle), threshold(threshold), timeout(timeout), priority(priority)
- {}
-
- std::string device_id;
- scheduler_core_op_handle_t core_op_handle;
- bool threshold = false;
- bool timeout = false;
- core_op_priority_t priority;
-};
-
-struct SwitchCoreOpTrace : Trace
-{
- SwitchCoreOpTrace(const std::string &device_id, scheduler_core_op_handle_t handle)
- : Trace("switch_core_op"), device_id(device_id), core_op_handle(handle)
- {}
-
- std::string device_id;
- scheduler_core_op_handle_t core_op_handle;
-};
-
-class Handler
-{
-public:
- virtual ~Handler() = default;
-
- virtual void handle_trace(const InitTrace&) {};
- virtual void handle_trace(const AddCoreOpTrace&) {};
- virtual void handle_trace(const CreateCoreOpInputStreamsTrace&) {};
- virtual void handle_trace(const CreateCoreOpOutputStreamsTrace&) {};
- virtual void handle_trace(const WriteFrameTrace&) {};
- virtual void handle_trace(const InputVdmaDequeueTrace&) {};
- virtual void handle_trace(const ReadFrameTrace&) {};
- virtual void handle_trace(const OutputVdmaEnqueueTrace&) {};
- virtual void handle_trace(const ChooseCoreOpTrace&) {};
- virtual void handle_trace(const SwitchCoreOpTrace&) {};
-};
-
-struct JSON;
-
-class SchedulerProfilerHandler : public Handler
-{
-public:
- SchedulerProfilerHandler(SchedulerProfilerHandler const&) = delete;
- void operator=(SchedulerProfilerHandler const&) = delete;
-
- SchedulerProfilerHandler(int64_t &start_time);
- ~SchedulerProfilerHandler();
-
- virtual void handle_trace(const AddCoreOpTrace&) override;
- virtual void handle_trace(const CreateCoreOpInputStreamsTrace&) override;
- virtual void handle_trace(const CreateCoreOpOutputStreamsTrace&) override;
- virtual void handle_trace(const WriteFrameTrace&) override;
- virtual void handle_trace(const InputVdmaDequeueTrace&) override;
- virtual void handle_trace(const ReadFrameTrace&) override;
- virtual void handle_trace(const OutputVdmaEnqueueTrace&) override;
- virtual void handle_trace(const ChooseCoreOpTrace&) override;
- virtual void handle_trace(const SwitchCoreOpTrace&) override;
-
-private:
- void log(JSON json);
- bool comma();
-
- std::shared_ptr<spdlog::sinks::sink> m_file_sink;
- std::shared_ptr<spdlog::logger> m_profiler_logger;
- std::atomic<bool> m_first_write;
-};
-
class Tracer
{
public:
private:
Tracer();
+ void init_monitor_handler();
+ void init_scheduler_profiler_handler();
static Tracer& get_instance()
{
template<class TraceType, typename... Args>
void execute_trace(Args... trace_args)
{
- if (!m_should_trace) {
+ if ((!m_should_trace) && (!m_should_monitor)) {
return;
}
}
bool m_should_trace = false;
+ bool m_should_monitor = false;
std::chrono::high_resolution_clock::time_point m_start_time;
std::vector<std::unique_ptr<Handler>> m_handlers;
};
std::shared_ptr<T> resource;
};
+// TODO: Merge ExportedResourceManager and SharedResourceManager (HRT-10317)
template<class Key, class T>
class SharedResourceManager
{
#include "common/utils.hpp"
#include "common/logger_macros.hpp"
-
-#include "utils/event_internal.hpp"
+#include "common/event_internal.hpp"
// Define __unix__ for inclusion of readerwriterqueue.h because readerwriterqueue is implemented over POSIX standards
// but checks __unix__ - otherwise QNX returns unsupported platform (need HAILO_UNDEF_UNIX_FLAG in order to undefine
${CMAKE_CURRENT_SOURCE_DIR}/pipeline_multiplexer.cpp
${CMAKE_CURRENT_SOURCE_DIR}/vdevice_stream.cpp
+ ${CMAKE_CURRENT_SOURCE_DIR}/vdevice_native_stream.cpp
${CMAKE_CURRENT_SOURCE_DIR}/vdevice_stream_multiplexer_wrapper.cpp
+ ${CMAKE_CURRENT_SOURCE_DIR}/callback_reorder_queue.cpp
- ${CMAKE_CURRENT_SOURCE_DIR}/scheduler/network_group_scheduler.cpp
+ ${CMAKE_CURRENT_SOURCE_DIR}/scheduler/scheduler.cpp
${CMAKE_CURRENT_SOURCE_DIR}/scheduler/scheduler_oracle.cpp
${CMAKE_CURRENT_SOURCE_DIR}/scheduler/scheduled_core_op_state.cpp
+ ${CMAKE_CURRENT_SOURCE_DIR}/scheduler/scheduled_stream.cpp
${CMAKE_CURRENT_SOURCE_DIR}/scheduler/multi_device_scheduled_stream.cpp
)
--- /dev/null
+/**
+ * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+**/
+/**
+ * @file callback_reorder_queue.cpp
+ **/
+
+#include "callback_reorder_queue.hpp"
+
+namespace hailort
+{
+
+InternalTransferDoneCallback CallbackReorderQueue::wrap_callback(const InternalTransferDoneCallback &original)
+{
+ std::lock_guard<std::mutex> lock_guard(m_queue_mutex);
+ const uint64_t current_callback_index = m_registered_callbacks++;
+
+ return [this, original, current_callback_index](hailo_status status) {
+ // Push callback without calling it yet.
+ push_callback(std::make_pair(current_callback_index, [original, status]() {
+ return original(status);
+ }));
+
+ // Then, call the queued callbacks in order (if there is ready callback).
+ call_queued_callbacks_in_order();
+ };
+}
+
+void CallbackReorderQueue::cancel_last_callback()
+{
+ std::lock_guard<std::mutex> lock_guard(m_queue_mutex);
+ assert(m_called_callbacks < m_registered_callbacks);
+ m_registered_callbacks--;
+}
+
+void CallbackReorderQueue::push_callback(const Callback &callback)
+{
+ std::lock_guard<std::mutex> lock_guard(m_queue_mutex);
+ assert(m_callbacks_queue.size() < m_max_size);
+ m_callbacks_queue.push(callback);
+}
+
+void CallbackReorderQueue::call_queued_callbacks_in_order()
+{
+ // Allow only one thread to execute the callbacks.
+ std::lock_guard<std::mutex> callbacks_lock(m_callbacks_mutex);
+
+ while (auto callback = pop_ready_callback()) {
+ callback->second();
+ }
+}
+
+Expected<CallbackReorderQueue::Callback> CallbackReorderQueue::pop_ready_callback()
+{
+ std::lock_guard<std::mutex> lock_guard(m_queue_mutex);
+
+ if (m_callbacks_queue.empty()) {
+ return make_unexpected(HAILO_NOT_AVAILABLE);
+ }
+
+ if (m_callbacks_queue.top().first != m_called_callbacks) {
+ // We need to wait until top() contains callback with index - m_called_callbacks.
+ return make_unexpected(HAILO_NOT_AVAILABLE);
+ }
+
+ auto next_callback = m_callbacks_queue.top();
+ m_callbacks_queue.pop();
+
+ m_called_callbacks++;
+ return next_callback;
+}
+
+} /* namespace hailort */
--- /dev/null
+/**
+ * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+**/
+/**
+ * @file callback_reorder_queue.hpp
+ * @brief When using multiple devices with async API, we may get interrupt for some input/output stream out of order
+ * (For example - the second device may be faster than the first).
+ * To ensure the order of the callbacks, we put the callbacks in queue and call them in the same order inserted.
+ **/
+
+#ifndef _HAILO_CALLBACK_REORDER_QUEUE_HPP_
+#define _HAILO_CALLBACK_REORDER_QUEUE_HPP_
+
+#include "stream_common/async_common.hpp"
+
+#include <mutex>
+#include <queue>
+
+namespace hailort
+{
+
+class CallbackReorderQueue final {
+public:
+ CallbackReorderQueue(size_t max_size) :
+ m_max_size(max_size),
+ m_callbacks_queue(compare_callbacks{}, make_queue_storage(m_max_size))
+ {}
+
+ // Wraps the given original callback so it will be called in the same wrap_callback order.
+ InternalTransferDoneCallback wrap_callback(const InternalTransferDoneCallback &original);
+
+ // If some wrapped callback wasn't registered to some async API (for example because the queue is full), we need to
+ // remove the counters we added in `wrap_callback` (otherwise, next callback will wait forever).
+ // Note!
+ // * Call this function only after a `wrap_callback` was called.
+ // * Make sure the wrapped callback will never be called! (Otherwise counters will loss syncronization).
+ void cancel_last_callback();
+
+private:
+ // must be called with m_lock held
+ void call_queued_callbacks_in_order();
+
+ // Each callback has a function pointer and its index
+ using Callback = std::pair<uint64_t, std::function<void()>>;
+
+ void push_callback(const Callback &callback);
+
+ // Pop next callback ready to be called. Can return HAILO_NOT_AVAILABLE if there is no callback ready.
+ Expected<Callback> pop_ready_callback();
+
+ // We don't want to have any memory allocations in runtime, so we init the priority queue with a reserved vector.
+ static std::vector<Callback> make_queue_storage(size_t max_size)
+ {
+ std::vector<Callback> storage;
+ storage.reserve(max_size);
+ return storage;
+ }
+
+ const size_t m_max_size;
+
+ // Guards access to m_callbacks_queue and the counters.
+ std::mutex m_queue_mutex;
+
+ // Increasing counter for the index on next register callback. We don't worry about overflow (Even if we assume
+ // extreme value of 1,000,000 per second)
+ uint64_t m_registered_callbacks = 0;
+
+ // Amount of callback that have called. Because the callbacks are called in order, this counter contains the index
+ // of the next callback expected to be executed.
+ uint64_t m_called_callbacks = 0;
+
+ struct compare_callbacks {
+ bool operator()(const Callback &a, const Callback &b)
+ {
+ // We want to pop the lower index first
+ return a.first > b.first;
+ }
+ };
+
+ // Callbacks are stored inside a priority_queue data-structure.
+ // The queue is sorted by the callbacks index (so we pop the callbacks with the smallest index first).
+ std::priority_queue<Callback, std::vector<Callback>, compare_callbacks> m_callbacks_queue;
+
+ // This lock guarantee that only one thread is executing the callbacks.
+ std::mutex m_callbacks_mutex;
+};
+
+} /* namespace hailort */
+
+#endif /* _HAILO_CALLBACK_REORDER_QUEUE_HPP_ */
m_written_streams_count(0),
m_read_streams_count(0),
m_next_to_read_after_drain(INVALID_CORE_OP_HANDLE)
-{}
+{
+ assert(is_multiplexer_supported());
+}
-bool PipelineMultiplexer::should_use_multiplexer()
+bool PipelineMultiplexer::is_multiplexer_supported()
{
auto disable_multiplexer_env = std::getenv(DISABLE_MULTIPLEXER_ENV_VAR);
if ((nullptr != disable_multiplexer_env) && (strnlen(disable_multiplexer_env, 2) == 1) && (strncmp(disable_multiplexer_env, "1", 1) == 0)) {
m_is_waiting_to_write[core_op_handle] = true;
hailo_status status = HAILO_SUCCESS;
m_writing_cv.wait(lock, [this, core_op_handle, &status] {
- if (!has_more_than_one_core_op_instance() || !should_use_multiplexer()) {
+ if (!has_more_than_one_core_op_instance()) {
return true;
}
hailo_status status = HAILO_SUCCESS;
auto wait_res = m_reading_cv.wait_for(lock, timeout, [this, core_op_handle, stream_name, &drain_frames, &status] {
- if (should_core_op_stop(core_op_handle)) {
+ if (m_should_core_op_stop[core_op_handle][stream_name]) {
status = HAILO_STREAM_ABORTED_BY_USER;
return true; // return true so that the wait will finish
}
#include "common/barrier.hpp"
-#include "vdevice/scheduler/network_group_scheduler.hpp"
+#include "vdevice/scheduler/scheduler.hpp"
#include <mutex>
#include <queue>
namespace hailort
{
-#define DISABLE_MULTIPLEXER_ENV_VAR "HAILO_DISABLE_MULTIPLEXER"
+#define DISABLE_MULTIPLEXER_ENV_VAR "HAILO_DISABLE_MULTIPLEXER_INTERNAL"
using multiplexer_core_op_handle_t = uint32_t;
using run_once_for_stream_handle_t = uint32_t;
void set_can_output_vstream_read(multiplexer_core_op_handle_t core_op_handle, const std::string &vstream_name, bool can_read);
- static bool should_use_multiplexer();
+ static bool is_multiplexer_supported();
private:
namespace hailort
{
-hailo_status MultiDeviceScheduledInputStream::send_pending_buffer(size_t device_index)
+Expected<std::unique_ptr<MultiDeviceScheduledInputStream>> MultiDeviceScheduledInputStream::create(
+ std::map<device_id_t, std::reference_wrapper<VdmaInputStreamBase>> &&streams,
+ const scheduler_core_op_handle_t &core_op_handle,
+ EventPtr &&core_op_activated_event,
+ const LayerInfo &layer_info,
+ CoreOpsSchedulerWeakPtr core_ops_scheduler)
+{
+ auto buffer_frame_size = streams.begin()->second.get().get_buffer_frames_size();
+ CHECK_EXPECTED(buffer_frame_size);
+ auto frame_size = streams.begin()->second.get().get_frame_size();
+ auto buffers_queue_ptr = BuffersQueue::create_unique(frame_size, (streams.size() * buffer_frame_size.value()));
+ CHECK_EXPECTED(buffers_queue_ptr);
+
+ auto status = HAILO_UNINITIALIZED;
+ auto stream = make_unique_nothrow<MultiDeviceScheduledInputStream>(std::move(streams),
+ core_op_handle, std::move(core_op_activated_event), layer_info,
+ core_ops_scheduler, buffers_queue_ptr.release(), status);
+ CHECK_AS_EXPECTED((nullptr != stream), HAILO_OUT_OF_HOST_MEMORY);
+ CHECK_SUCCESS_AS_EXPECTED(status);
+ return stream;
+}
+
+hailo_status MultiDeviceScheduledInputStream::send_pending_buffer(const device_id_t &device_id)
{
auto buffer = m_queue->front(get_timeout()); // Counting on scheduler to not allow paralle calls to this function
if (HAILO_STREAM_ABORTED_BY_USER == buffer.status()) {
return buffer.status();
}
CHECK_EXPECTED_AS_STATUS(buffer);
- auto status = m_streams[device_index].get().write_buffer_only(buffer.value());
+ assert(contains(m_streams, device_id));
+ auto status = m_streams.at(device_id).get().write_buffer_only(buffer.value());
if (HAILO_STREAM_ABORTED_BY_USER == status) {
LOGGER__INFO("send_pending_buffer was aborted.");
return status;
CHECK_SUCCESS(status);
m_queue->pop(); // Release buffer to free the queue for other dequeues
- VdmaInputStream &vdma_input = static_cast<VdmaInputStream&>(m_streams[device_index].get());
- return vdma_input.send_pending_buffer();
+ auto &vdma_input = dynamic_cast<VdmaInputStreamBase&>(m_streams.at(device_id).get());
+ return vdma_input.send_pending_buffer(device_id);
}
-Expected<size_t> MultiDeviceScheduledInputStream::sync_write_raw_buffer(const MemoryView &buffer,
+hailo_status MultiDeviceScheduledInputStream::write_impl(const MemoryView &buffer,
const std::function<bool()> &should_cancel)
{
- auto core_ops_scheduler = m_core_ops_scheduler.lock();
- CHECK_AS_EXPECTED(core_ops_scheduler, HAILO_INTERNAL_FAILURE);
-
- auto status = core_ops_scheduler->wait_for_write(m_core_op_handle, name(), get_timeout(), should_cancel);
- if (HAILO_STREAM_ABORTED_BY_USER == status) {
- LOGGER__INFO("Write to stream was aborted.");
- return make_unexpected(status);
+ if (should_cancel()) {
+ return HAILO_STREAM_ABORTED_BY_USER;
}
- CHECK_SUCCESS_AS_EXPECTED(status);
- status = m_queue->push(buffer, get_timeout());
+ auto core_ops_scheduler = m_core_ops_scheduler.lock();
+ CHECK(core_ops_scheduler, HAILO_INTERNAL_FAILURE);
- auto write_finish_status = core_ops_scheduler->signal_write_finish(m_core_op_handle, name(), status != HAILO_SUCCESS);
+ auto status = m_queue->push(buffer, get_timeout());
if (HAILO_STREAM_ABORTED_BY_USER == status) {
LOGGER__INFO("'push' was aborted.");
- return make_unexpected(status);
+ return status;
}
- CHECK_SUCCESS_AS_EXPECTED(status);
+ CHECK_SUCCESS(status);
+ auto write_finish_status = core_ops_scheduler->signal_frame_pending_to_send(m_core_op_handle, name());
if (HAILO_STREAM_ABORTED_BY_USER == write_finish_status) {
- return make_unexpected(write_finish_status);
+ return write_finish_status;
}
- CHECK_SUCCESS_AS_EXPECTED(write_finish_status);
+ CHECK_SUCCESS(write_finish_status);
- return buffer.size();
+ return HAILO_SUCCESS;
}
Expected<size_t> MultiDeviceScheduledInputStream::get_pending_frames_count() const
hailo_status MultiDeviceScheduledInputStream::abort()
{
auto status = HAILO_SUCCESS; // Best effort
- for (auto &stream : m_streams) {
+ for (const auto &pair : m_streams) {
+ auto &stream = pair.second;
auto abort_status = stream.get().abort();
if (HAILO_SUCCESS != status) {
LOGGER__ERROR("Failed to abort input stream. (status: {} device: {})", status, stream.get().get_dev_id());
hailo_status MultiDeviceScheduledInputStream::clear_abort()
{
auto status = HAILO_SUCCESS; // Best effort
- for (auto &stream : m_streams) {
+ for (const auto &pair : m_streams) {
+ auto &stream = pair.second;
auto clear_abort_status = stream.get().clear_abort();
if ((HAILO_SUCCESS != clear_abort_status) && (HAILO_STREAM_NOT_ACTIVATED != clear_abort_status)) {
LOGGER__ERROR("Failed to clear abort input stream. (status: {} device: {})", clear_abort_status, stream.get().get_dev_id());
std::atomic_bool m_should_stop;
};
-class MultiDeviceScheduledInputStream : public ScheduledInputStream {
+// Stream used on scheduler input multiple device with SYNC api (On async api, the ScheduledAsyncInputStream handles
+// both single and multiple devices).
+class MultiDeviceScheduledInputStream : public ScheduledInputStreamBase {
public:
+ static Expected<std::unique_ptr<MultiDeviceScheduledInputStream>> create(
+ std::map<device_id_t, std::reference_wrapper<VdmaInputStreamBase>> &&streams,
+ const scheduler_core_op_handle_t &core_op_handle,
+ EventPtr &&core_op_activated_event,
+ const LayerInfo &layer_info,
+ CoreOpsSchedulerWeakPtr core_ops_scheduler);
+
MultiDeviceScheduledInputStream(
- std::vector<std::reference_wrapper<VdmaInputStream>> &&streams,
+ std::map<device_id_t, std::reference_wrapper<VdmaInputStreamBase>> &&streams,
const scheduler_core_op_handle_t &core_op_handle,
EventPtr &&core_op_activated_event,
const LayerInfo &layer_info,
CoreOpsSchedulerWeakPtr core_ops_scheduler,
std::unique_ptr<BuffersQueue> &&frames_queue,
hailo_status &status) :
- ScheduledInputStream(std::move(streams), core_op_handle,
+ ScheduledInputStreamBase(std::move(streams), core_op_handle,
std::move(core_op_activated_event), layer_info, core_ops_scheduler, status),
m_queue(std::move(frames_queue))
{}
- virtual hailo_status send_pending_buffer(size_t device_index = 0) override;
+ virtual hailo_status send_pending_buffer(const device_id_t &device_id) override;
virtual Expected<size_t> get_pending_frames_count() const override;
protected:
- virtual Expected<size_t> sync_write_raw_buffer(const MemoryView &buffer,
- const std::function<bool()> &should_cancel = []() { return false; }) override;
+ virtual hailo_status write_impl(const MemoryView &buffer, const std::function<bool()> &should_cancel) override;
virtual hailo_status abort() override;
virtual hailo_status clear_abort() override;
+++ /dev/null
-/**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
- * Distributed under the MIT license (https://opensource.org/licenses/MIT)
- **/
-/**
- * TODO: Rename in a different PR
- * @file network_group_scheduler.cpp
- * @brief: Network scheduler
- **/
-
-#include "common/os_utils.hpp"
-
-
-#include "vdevice/scheduler/network_group_scheduler.hpp"
-#include "vdevice/vdevice_core_op.hpp"
-#include "vdevice/scheduler/scheduler_oracle.hpp"
-#include "vdevice/vdevice_stream_multiplexer_wrapper.hpp"
-#include "hef/hef_internal.hpp"
-#include "utils/profiler/tracer_macros.hpp"
-
-#include <fstream>
-
-
-namespace hailort
-{
-
-#define SINGLE_CONTEXT_BATCH_SIZE (1)
-#define DEFAULT_BURST_SIZE (1)
-
-// TODO: use device handles instead device count
-CoreOpsScheduler::CoreOpsScheduler(hailo_scheduling_algorithm_t algorithm, uint32_t device_count, std::vector<std::string> &devices_bdf_id,
- std::vector<std::string> &devices_arch) :
- SchedulerBase(algorithm, device_count, devices_bdf_id, devices_arch),
- m_changing_current_batch_size(),
- m_should_core_op_stop(),
- m_before_read_write_mutex(),
- m_core_ops_cvs(),
- m_should_monitor(false)
-#if defined(__GNUC__)
- , m_mon_tmp_output()
-#endif
-{
- // TODO: HRT-7391 - Change scheduler monitor to work only when MON command is active
- m_should_monitor = SchedulerMon::should_monitor();
- if (m_should_monitor) {
- auto status = start_mon();
- if (HAILO_SUCCESS != status) {
- LOGGER__ERROR("Failed to initiate hailo monitor of networks, with status {}", status);
- }
- }
-}
-
-CoreOpsScheduler::~CoreOpsScheduler()
-{
- for (auto device_info : m_devices) {
- if (INVALID_CORE_OP_HANDLE != device_info->current_core_op_handle) {
- auto current_core_op = m_scheduled_core_ops[device_info->current_core_op_handle]->get_core_op();
- auto current_core_op_bundle = std::dynamic_pointer_cast<VDeviceCoreOp>(current_core_op);
- assert(nullptr != current_core_op_bundle);
- auto vdma_core_op = current_core_op_bundle->get_core_op_by_device_index(device_info->device_id);
- if (!vdma_core_op) {
- LOGGER__ERROR("Error retrieving core-op in scheduler destructor");
- } else {
- static const auto RESUME_PENDING_STREAM_TRANSFERS = true;
- if (HAILO_SUCCESS != VdmaConfigManager::switch_core_op(vdma_core_op.value(), nullptr, 0,
- RESUME_PENDING_STREAM_TRANSFERS)) {
- LOGGER__ERROR("Error deactivating core-op when destroying scheduler");
- }
- }
- }
- }
-
- if (m_should_monitor) {
- m_should_monitor = false;
- m_mon_shutdown_event->signal();
- if (m_mon_thread.joinable()) {
- m_mon_thread.join();
- }
- }
-}
-
-Expected<CoreOpsSchedulerPtr> CoreOpsScheduler::create_round_robin(uint32_t device_count, std::vector<std::string> &devices_bdf_id, std::vector<std::string> &devices_arch)
-{
- auto ptr = make_shared_nothrow<CoreOpsScheduler>(HAILO_SCHEDULING_ALGORITHM_ROUND_ROBIN, device_count, devices_bdf_id, devices_arch);
- CHECK_AS_EXPECTED(nullptr != ptr, HAILO_OUT_OF_HOST_MEMORY);
-
- return ptr;
-}
-
-std::string get_curr_pid_as_str()
-{
- return std::to_string(OsUtils::get_curr_pid());
-}
-
-hailo_status CoreOpsScheduler::start_mon()
-{
-#if defined(__GNUC__)
- m_last_measured_timestamp = std::chrono::steady_clock::now();
- m_mon_shutdown_event = Event::create_shared(Event::State::not_signalled);
- CHECK(nullptr != m_mon_shutdown_event, HAILO_OUT_OF_HOST_MEMORY);
- auto device_count = get_device_count();
- for (uint32_t i = 0; i < device_count; i++) {
- m_last_measured_utilization_timestamp[i] = {};
- m_device_has_drained_everything[i] = true;
- m_device_utilization[i] = 0;
- }
-
- auto tmp_file = open_temp_mon_file();
- CHECK_EXPECTED_AS_STATUS(tmp_file);
- m_mon_tmp_output = tmp_file.release();
-
- m_mon_thread = std::thread([this] ()
- {
- while (m_should_monitor) {
- auto status = m_mon_shutdown_event->wait(DEFAULT_SCHEDULER_MON_INTERVAL);
- if (HAILO_TIMEOUT == status) {
- dump_state();
- } else if (HAILO_SUCCESS != status) {
- LOGGER__ERROR("Scheduler monitor failed with status {}", status);
- return;
- }
- }
- return;
- });
-
- return HAILO_SUCCESS;
-#else
- return HAILO_NOT_IMPLEMENTED;
-#endif
-}
-
-#if defined(__GNUC__)
-Expected<std::shared_ptr<TempFile>> CoreOpsScheduler::open_temp_mon_file()
-{
- std::string file_name = get_curr_pid_as_str();
- auto tmp_file = TempFile::create(file_name, SCHEDULER_MON_TMP_DIR);
- CHECK_EXPECTED(tmp_file);
-
- auto tmp_file_ptr = make_shared_nothrow<TempFile>(tmp_file.release());
- CHECK_AS_EXPECTED(nullptr != tmp_file_ptr, HAILO_OUT_OF_HOST_MEMORY);
-
- return tmp_file_ptr;
-}
-
-void CoreOpsScheduler::dump_state()
-{
- auto file = LockedFile::create(m_mon_tmp_output->name(), "w");
- if (HAILO_SUCCESS != file.status()) {
- LOGGER__ERROR("Failed to open and lock file {}, with status: {}", m_mon_tmp_output->name(), file.status());
- return;
- }
-
- ProtoMon mon;
- mon.set_pid(get_curr_pid_as_str());
- time_dependent_events_cycle_calc();
- log_monitor_networks_infos(mon);
- log_monitor_device_infos(mon);
- log_monitor_frames_infos(mon);
-
- // Clear accumulators
- for (auto &handle_core_op_utilization_pair : m_core_op_utilization) {
- handle_core_op_utilization_pair.second = 0;
- }
- for (auto &handle_fps_pair : m_fps_accumulator) {
- handle_fps_pair.second = 0;
- }
- for (auto &handle_device_utilization_pair: m_device_utilization) {
- handle_device_utilization_pair.second = 0;
- }
-
- if (!mon.SerializeToFileDescriptor(file->get_fd())) {
- LOGGER__ERROR("Failed to SerializeToFileDescriptor(), with errno: {}", errno);
- }
-}
-#endif
-
-std::string CoreOpsScheduler::get_core_op_name(const scheduler_core_op_handle_t &core_op_handle)
-{
- assert(m_scheduled_core_ops.size() > core_op_handle);
- return m_scheduled_core_ops[core_op_handle]->get_core_op_name();
-}
-
-// TODO: HRT-9804 - Change monitor to use the tracer design mechanism (curently this functions uses private members)
-void CoreOpsScheduler::time_dependent_events_cycle_calc()
-{
- auto curr_time = std::chrono::steady_clock::now();
- m_last_measured_time_duration = std::chrono::duration_cast<std::chrono::duration<double>>(curr_time - m_last_measured_timestamp).count();
-
- for (auto device_info : m_devices) {
- if (!m_device_has_drained_everything[device_info->device_id]) {
- update_utilization_read_buffers_finished(device_info->device_id, device_info->current_core_op_handle, false);
- }
- }
-
- m_last_measured_timestamp = curr_time;
-}
-
-void CoreOpsScheduler::log_monitor_device_infos(ProtoMon &mon)
-{
- for (auto device_info : m_devices) {
- assert(contains(m_device_utilization, device_info->device_id));
- auto curr_device_utilization = m_device_utilization[device_info->device_id];
- auto utilization_precentage = ((curr_device_utilization * 100) / m_last_measured_time_duration);
-
- auto device_infos = mon.add_device_infos();
- device_infos->set_device_id(device_info->device_bdf_id);
- device_infos->set_utilization(utilization_precentage);
- device_infos->set_device_arch(device_info->device_arch);
- }
-}
-
-void CoreOpsScheduler::log_monitor_networks_infos(ProtoMon &mon)
-{
- for (uint32_t core_op_handle = 0; core_op_handle < m_core_op_utilization.size(); core_op_handle++) {
- assert(contains(m_core_op_utilization, core_op_handle));
- auto curr_core_op_utilization = m_core_op_utilization[core_op_handle];
- auto utilization = ((curr_core_op_utilization * 100) / m_last_measured_time_duration);
- auto outputs_count = static_cast<uint32_t>(m_scheduled_core_ops[core_op_handle]->get_outputs_names().size());
- auto fps = static_cast<double>((m_fps_accumulator[core_op_handle] / outputs_count) / m_last_measured_time_duration);
-
- auto net_info = mon.add_networks_infos();
- net_info->set_network_name(get_core_op_name(core_op_handle));
- net_info->set_utilization(utilization);
- net_info->set_fps(fps);
- }
-}
-
-void CoreOpsScheduler::log_monitor_frames_infos(ProtoMon &mon)
-{
- for (uint32_t core_op_handle = 0; core_op_handle < m_scheduled_core_ops.size(); core_op_handle++) {
- auto net_frames_info = mon.add_net_frames_infos();
- net_frames_info->set_network_name(get_core_op_name(core_op_handle));
-
- for (auto &stream_name : m_scheduled_core_ops[core_op_handle]->get_inputs_names()) {
- auto stream_frames_info = net_frames_info->add_streams_frames_infos();
- stream_frames_info->set_stream_name(stream_name);
- stream_frames_info->set_stream_direction(PROTO__STREAM_DIRECTION__HOST_TO_DEVICE);
- auto status = set_h2d_frames_counters(core_op_handle, stream_name, *stream_frames_info);
- if (HAILO_SUCCESS != status) {
- LOGGER__ERROR("Failed to set stream's {} frames count, status = {}", stream_name, status);
- continue;
- }
- }
-
- for (auto &stream_name : m_scheduled_core_ops[core_op_handle]->get_outputs_names()) {
- auto stream_frames_info = net_frames_info->add_streams_frames_infos();
- stream_frames_info->set_stream_name(stream_name);
- stream_frames_info->set_stream_direction(PROTO__STREAM_DIRECTION__DEVICE_TO_HOST);
- auto status = set_d2h_frames_counters(core_op_handle, stream_name, *stream_frames_info);
- if (HAILO_SUCCESS != status) {
- LOGGER__ERROR("Failed to set stream's {} frames count, status = {}", stream_name, status);
- continue;
- }
- }
- }
-}
-
-hailo_status CoreOpsScheduler::set_h2d_frames_counters(const scheduler_core_op_handle_t &core_op_handle, const std::string &stream_name,
- ProtoMonStreamFramesInfo &stream_frames_info)
-{
- assert(m_scheduled_core_ops.size() > core_op_handle);
- auto current_cng = m_scheduled_core_ops[core_op_handle]->get_core_op();
-
- auto input_stream = current_cng->get_input_stream_by_name(stream_name);
- CHECK_EXPECTED_AS_STATUS(input_stream);
-
- InputStreamBase &vdevice_input = static_cast<InputStreamBase&>(input_stream->get());
- auto buffer_frames_size = vdevice_input.get_buffer_frames_size();
- if (HAILO_SUCCESS == buffer_frames_size.status()) {
- stream_frames_info.set_buffer_frames_size(static_cast<int32_t>(buffer_frames_size.value()));
- } else {
- stream_frames_info.set_buffer_frames_size(SCHEDULER_MON_NAN_VAL);
- }
-
- auto pending_frames_count = vdevice_input.get_pending_frames_count();
- if (HAILO_SUCCESS == pending_frames_count.status()) {
- stream_frames_info.set_pending_frames_count(static_cast<int32_t>(pending_frames_count.value()));
- } else {
- stream_frames_info.set_pending_frames_count(SCHEDULER_MON_NAN_VAL);
- }
-
- return HAILO_SUCCESS;
-}
-
-hailo_status CoreOpsScheduler::set_d2h_frames_counters(const scheduler_core_op_handle_t &core_op_handle, const std::string &stream_name,
- ProtoMonStreamFramesInfo &stream_frames_info)
-{
- assert(m_scheduled_core_ops.size() > core_op_handle);
- auto current_cng = m_scheduled_core_ops[core_op_handle]->get_core_op();
-
- auto output_stream = current_cng->get_output_stream_by_name(stream_name);
- CHECK_EXPECTED_AS_STATUS(output_stream);
-
- OutputStreamBase &vdevice_output = static_cast<OutputStreamBase&>(output_stream->get());
- auto buffer_frames_size = vdevice_output.get_buffer_frames_size();
- if (HAILO_SUCCESS == buffer_frames_size.status()) {
- stream_frames_info.set_buffer_frames_size(static_cast<int32_t>(buffer_frames_size.value()));
- } else {
- stream_frames_info.set_buffer_frames_size(SCHEDULER_MON_NAN_VAL);
- }
-
- auto pending_frames_count = vdevice_output.get_pending_frames_count();
- if (HAILO_SUCCESS == pending_frames_count.status()) {
- stream_frames_info.set_pending_frames_count(static_cast<int32_t>(pending_frames_count.value()));
- } else {
- stream_frames_info.set_pending_frames_count(SCHEDULER_MON_NAN_VAL);
- }
-
- return HAILO_SUCCESS;
-}
-
-Expected<scheduler_core_op_handle_t > CoreOpsScheduler::add_core_op(std::shared_ptr<CoreOp> added_cng)
-{
- scheduler_core_op_handle_t core_op_handle = INVALID_CORE_OP_HANDLE;
- {
- std::unique_lock<std::mutex> lock(m_before_read_write_mutex);
-
- core_op_handle = static_cast<uint32_t>(m_scheduled_core_ops.size());
- TRACE(AddCoreOpTrace, "", added_cng->name(), DEFAULT_SCHEDULER_TIMEOUT.count(), DEFAULT_SCHEDULER_MIN_THRESHOLD, core_op_handle);
-
- auto stream_infos = added_cng->get_all_stream_infos();
- CHECK_EXPECTED(stream_infos);
-
- auto scheduled_core_op = ScheduledCoreOp::create(added_cng, stream_infos.value());
- CHECK_EXPECTED(scheduled_core_op);
-
- m_scheduled_core_ops.emplace_back(scheduled_core_op.release());
-
- m_changing_current_batch_size[core_op_handle] = false;
-
- for (const auto &stream_info : stream_infos.value()) {
- m_should_core_op_stop[core_op_handle][stream_info.name] = false;
- }
-
- for (auto& device_info : m_devices) {
- for (const auto &stream_info : stream_infos.value()) {
- if (HAILO_H2D_STREAM == stream_info.direction) {
- device_info->current_cycle_requested_transferred_frames_h2d[core_op_handle][stream_info.name] = 0;
- } else {
- device_info->current_cycle_finished_transferred_frames_d2h[core_op_handle][stream_info.name] = 0;
- device_info->current_cycle_finished_read_frames_d2h[core_op_handle][stream_info.name] = 0;
- }
- }
- }
-
- // Monitor members
- m_core_op_utilization[core_op_handle] = 0;
- m_fps_accumulator[core_op_handle] = 0;
-
- auto network_cvs = ScheduledCoreOpCV::create(added_cng);
- CHECK_EXPECTED(network_cvs);
- m_core_ops_cvs[core_op_handle] = network_cvs.release();
- m_core_op_priority[HAILO_SCHEDULER_PRIORITY_NORMAL].emplace_back(core_op_handle);
- }
-
- return core_op_handle;
-}
-
-bool CoreOpsScheduler::is_core_op_active(const scheduler_core_op_handle_t &core_op_handle)
-{
- for (auto device_info : m_devices) {
- if (core_op_handle == device_info->current_core_op_handle) {
- return true;
- }
- }
-
- return false;
-}
-
-bool CoreOpsScheduler::is_multi_device()
-{
- return m_devices.size() > 1;
-}
-
-hailo_status CoreOpsScheduler::wait_for_write(const scheduler_core_op_handle_t &core_op_handle, const std::string &stream_name,
- const std::chrono::milliseconds &timeout, const std::function<bool()> &should_cancel)
-{
- {
- std::unique_lock<std::mutex> lock(m_before_read_write_mutex);
-
- hailo_status status = HAILO_SUCCESS;
- auto wait_res = m_core_ops_cvs[core_op_handle]->wait_for(stream_name, lock, timeout, [this, core_op_handle, stream_name, &should_cancel, &status] {
-
- if (should_cancel()) {
- status = HAILO_STREAM_ABORTED_BY_USER;
- return true; // return true so that the wait will finish
- }
-
- if (should_core_op_stop(core_op_handle)) {
- status = HAILO_STREAM_ABORTED_BY_USER;
- return true; // return true so that the wait will finish
- }
-
- return m_scheduled_core_ops[core_op_handle]->can_stream_write(stream_name);
- });
- CHECK(wait_res, HAILO_TIMEOUT, "{} (H2D) failed with status={}, timeout={}ms", stream_name, HAILO_TIMEOUT, timeout.count());
- if (HAILO_STREAM_ABORTED_BY_USER == status) {
- return status;
- }
- CHECK_SUCCESS(status);
-
- m_scheduled_core_ops[core_op_handle]->mark_frame_sent();
- m_scheduled_core_ops[core_op_handle]->requested_write_frames().increase(stream_name);
- }
-
- return HAILO_SUCCESS;
-}
-
-hailo_status CoreOpsScheduler::signal_write_finish(const scheduler_core_op_handle_t &core_op_handle, const std::string &stream_name,
- bool did_write_fail)
-{
- {
- std::unique_lock<std::mutex> lock(m_before_read_write_mutex);
- assert(m_scheduled_core_ops.size() > core_op_handle);
- auto scheduled_core_op = m_scheduled_core_ops[core_op_handle];
-
- if (did_write_fail) {
- scheduled_core_op->requested_write_frames().decrease(stream_name);
- return HAILO_SUCCESS;
- }
-
- if (should_core_op_stop(core_op_handle)) {
- return HAILO_STREAM_ABORTED_BY_USER;
- }
-
- scheduled_core_op->finished_write_frames().increase(stream_name);
- scheduled_core_op->requested_write_frames().decrease(stream_name);
-
- auto device_id = CoreOpsSchedulerOracle::get_avail_device(*this, core_op_handle);
- if (INVALID_DEVICE_ID != device_id) {
- auto status = switch_core_op(core_op_handle, device_id);
- if (HAILO_STREAM_ABORTED_BY_USER == status) {
- LOGGER__INFO("switch_core_op has failed with status=HAILO_STREAM_ABORTED_BY_USER");
- return status;
- }
- CHECK_SUCCESS(status);
- }
-
- auto status = optimize_streaming_if_enabled(core_op_handle);
- if (HAILO_STREAM_ABORTED_BY_USER == status) {
- return status;
- }
- CHECK_SUCCESS(status);
- }
-
- return HAILO_SUCCESS;
-}
-
-hailo_status CoreOpsScheduler::switch_core_op(const scheduler_core_op_handle_t &core_op_handle, uint32_t device_id, bool /*keep_nn_config*/)
-{
- auto scheduled_core_op = m_scheduled_core_ops[core_op_handle];
- auto curr_device_info = m_devices[device_id];
-
- // initialize current cycle maps
- for (const auto &name : scheduled_core_op->get_inputs_names()) {
- curr_device_info->current_cycle_requested_transferred_frames_h2d[core_op_handle][name] = 0;
- }
-
- for (const auto &name : scheduled_core_op->get_outputs_names()) {
- curr_device_info->current_cycle_finished_transferred_frames_d2h[core_op_handle][name] = 0;
- curr_device_info->current_cycle_finished_read_frames_d2h[core_op_handle][name] = 0;
- }
-
- uint16_t batch_size = SINGLE_CONTEXT_BATCH_SIZE;
- uint16_t burst_size = static_cast<uint16_t>(scheduled_core_op->finished_write_frames_min_value());
- // In multi device finished write frame could be bigger then the vdma buffers we have, can be removed after dynamic desc binding.
- if (is_multi_device()) {
- burst_size = std::min(burst_size, get_min_avail_buffers_count(core_op_handle, device_id));
- // We limit the max burst size to (dev_count * max_batch) to keep former behavior (this was the buffer_pool size)
- // TODO: remove this limitation and work with user-controlled max_burst_size
- burst_size = std::min(burst_size, static_cast<uint16_t>(scheduled_core_op->get_max_batch_size() * get_device_count()));
- }
-
- if (scheduled_core_op->use_dynamic_batch_flow()) {
- batch_size = std::min(static_cast<uint16_t>(scheduled_core_op->finished_write_frames_min_value()), scheduled_core_op->get_max_batch_size());
- burst_size = batch_size;
- }
-
- bool has_same_batch_size_as_previous = (curr_device_info->current_batch_size == batch_size);
- curr_device_info->current_batch_size = batch_size;
-
- if (curr_device_info->current_core_op_handle != core_op_handle) {
- curr_device_info->is_switching_core_op = false;
- }
-
- if ((core_op_handle != curr_device_info->current_core_op_handle) || (!has_same_batch_size_as_previous)) {
- assert(m_scheduled_core_ops.size() > core_op_handle);
- auto next_active_cng = scheduled_core_op->get_core_op();
- auto next_active_cng_wrapper = std::dynamic_pointer_cast<VDeviceCoreOp>(next_active_cng);
- assert(nullptr != next_active_cng_wrapper);
- auto next_active_cng_expected = next_active_cng_wrapper->get_core_op_by_device_index(curr_device_info->device_id);
- CHECK_EXPECTED_AS_STATUS(next_active_cng_expected);
-
- std::shared_ptr<VdmaConfigCoreOp> current_active_vdma_cng = nullptr;
- if (curr_device_info->current_core_op_handle != INVALID_CORE_OP_HANDLE) {
- auto current_active_cng = m_scheduled_core_ops[curr_device_info->current_core_op_handle]->get_core_op();
- auto current_active_cng_bundle = std::dynamic_pointer_cast<VDeviceCoreOp>(current_active_cng);
- assert(nullptr != current_active_cng_bundle);
- auto current_active_cng_expected = current_active_cng_bundle->get_core_op_by_device_index(curr_device_info->device_id);
- CHECK_EXPECTED_AS_STATUS(current_active_cng_expected);
- current_active_vdma_cng = current_active_cng_expected.release();
- }
-
- TRACE(SwitchCoreOpTrace, "", core_op_handle);
- static const auto RESUME_PENDING_STREAM_TRANSFERS = true;
- auto status = VdmaConfigManager::switch_core_op(current_active_vdma_cng, next_active_cng_expected.value(), batch_size,
-
- RESUME_PENDING_STREAM_TRANSFERS);
- CHECK_SUCCESS(status, "Failed switching core-op");
- // Clear the ready_to_switch flag from old activation
- scheduled_core_op->mark_unready_to_switch();
-
- // Register to get interrupts - has to be after core-op is activated
- for (auto &output_stream : next_active_cng_expected.value()->get_output_streams()) {
- OutputStreamBase &vdevice_output = static_cast<OutputStreamBase&>(output_stream.get());
- status = vdevice_output.register_interrupt_callback(
- [this, name = output_stream.get().name(), format = vdevice_output.get_layer_info().format.order, scheduled_core_op, core_op_handle, device_id]
- (uint32_t frames) {
- auto should_notify_next = false;
- {
- std::unique_lock<std::mutex> lock(m_before_read_write_mutex);
- // In order to meet performance requirement we enable switch only after first frame is arrived.
- // TODO: remove this hack / move it to oracle and add another scheduling algorithm for it
- scheduled_core_op->mark_ready_to_switch();
- if (hailo_format_order_t::HAILO_FORMAT_ORDER_HAILO_NMS != format) {
- TRACE(OutputVdmaEnqueueTrace, "", core_op_handle, name, frames);
- // TODO: Remove d2h_finished_transferred_frames and use current_cycle_finished_transferred_frames_d2h instead
- scheduled_core_op->d2h_finished_transferred_frames(name) += frames;
- m_devices[device_id]->current_cycle_finished_transferred_frames_d2h[core_op_handle][name] += frames;
- }
-
- auto has_drained_everything = has_core_op_drained_everything(core_op_handle, device_id);
-
- if (m_should_monitor) {
- update_utilization_read_buffers_finished(device_id, core_op_handle, has_drained_everything);
- }
-
- // If ng finished and we didnt choose next lets choose without checking threshold
- if (!m_devices[device_id]->is_switching_core_op && has_drained_everything) {
- auto was_chosen = choose_next_core_op(device_id, true);
- if (!was_chosen) {
- choose_next_core_op(device_id, false);
- }
- }
-
- if (m_devices[device_id]->is_switching_core_op && has_drained_everything) {
- should_notify_next = true;
- }
- }
- // Notify stream that new frame was accepted (wait_for_read)
- m_core_ops_cvs[core_op_handle]->notify_one(name);
- if (should_notify_next) {
- auto next_core_op = m_devices[device_id]->next_core_op_handle;
- // Notify all the threads of the next ng (wait_for_read)
- m_core_ops_cvs[next_core_op]->notify_all();
- }
- });
- CHECK_SUCCESS(status);
- }
- }
-
- scheduled_core_op->set_last_run_timestamp(std::chrono::steady_clock::now()); // Mark timestamp on activation
- curr_device_info->current_core_op_handle = core_op_handle;
-
- // Finished switching batch size
- m_changing_current_batch_size[core_op_handle] = false;
-
- auto status = send_all_pending_buffers(core_op_handle, device_id, burst_size);
- if (HAILO_STREAM_ABORTED_BY_USER == status) {
- LOGGER__INFO("send_all_pending_buffers has failed with status=HAILO_STREAM_ABORTED_BY_USER");
- return status;
- }
- CHECK_SUCCESS(status);
-
- return HAILO_SUCCESS;
-}
-
-hailo_status CoreOpsScheduler::send_all_pending_buffers(const scheduler_core_op_handle_t &core_op_handle, uint32_t device_id, uint32_t burst_size)
-{
- auto current_device_info = m_devices[device_id];
- if ((INVALID_CORE_OP_HANDLE == current_device_info->current_core_op_handle) || (current_device_info->current_core_op_handle != core_op_handle)) {
- return HAILO_SUCCESS;
- }
-
- auto scheduled_core_op = m_scheduled_core_ops[core_op_handle];
-
- for (size_t i = 0; i < burst_size; i++) {
- auto finished_send = false;
- for (const auto &name : scheduled_core_op->get_inputs_names()) {
- if (scheduled_core_op->finished_write_frames(name) == 0) {
- finished_send = true;
- break;
- }
- }
- if (finished_send) {
- break;
- }
- for (const auto &name : scheduled_core_op->get_inputs_names()) {
- auto status = send_pending_buffer(core_op_handle, name, device_id);
- if (HAILO_STREAM_ABORTED_BY_USER == status) {
- LOGGER__INFO("send_pending_buffer has failed with status=HAILO_STREAM_ABORTED_BY_USER");
- return status;
- }
- CHECK_SUCCESS(status);
- }
- scheduled_core_op->push_device_index(device_id);
- scheduled_core_op->set_last_device_index(device_id);
-
- if (m_should_monitor) {
- update_utilization_send_started(device_id);
- }
- }
-
- return HAILO_SUCCESS;
-}
-
-hailo_status CoreOpsScheduler::send_pending_buffer(const scheduler_core_op_handle_t &core_op_handle, const std::string &stream_name,
- uint32_t device_id)
-{
- assert(m_scheduled_core_ops.size() > core_op_handle);
- auto scheduled_core_op = m_scheduled_core_ops[core_op_handle];
-
- auto current_cng = scheduled_core_op->get_core_op();
- auto input_stream = current_cng->get_input_stream_by_name(stream_name);
- CHECK_EXPECTED_AS_STATUS(input_stream);
-
- VDeviceInputStreamMultiplexerWrapper &vdevice_input = static_cast<VDeviceInputStreamMultiplexerWrapper&>(input_stream->get());
- TRACE(InputVdmaDequeueTrace, "", core_op_handle, stream_name);
- auto status = vdevice_input.send_pending_buffer(device_id);
- if (HAILO_STREAM_ABORTED_BY_USER == status) {
- LOGGER__INFO("send_pending_buffer has failed with status=HAILO_STREAM_ABORTED_BY_USER");
- return status;
- }
- CHECK_SUCCESS(status);
-
- m_devices[device_id]->current_cycle_requested_transferred_frames_h2d[core_op_handle][stream_name]++;
- scheduled_core_op->finished_write_frames().decrease(stream_name);
-
- scheduled_core_op->h2d_finished_transferred_frames().increase(stream_name);
-
- if (should_core_op_stop(core_op_handle)) {
- return HAILO_STREAM_ABORTED_BY_USER;
- }
-
- return HAILO_SUCCESS;
-}
-
-CoreOpsScheduler::ReadyInfo CoreOpsScheduler::is_core_op_ready(const scheduler_core_op_handle_t &core_op_handle, bool check_threshold)
-{
- ReadyInfo result;
- result.is_ready = false;
-
- if (should_core_op_stop(core_op_handle)) {
- // Do not switch to an aborted core-op
- return result;
- }
-
- auto scheduled_core_op = m_scheduled_core_ops[core_op_handle];
- // Check if there arent any write requests
- bool has_pending_writes = scheduled_core_op->finished_write_frames_min_value() > 0;
-
- // Check if there arent any read requests
- bool has_pending_user_reads = false;
- for (const auto &name : scheduled_core_op->get_outputs_names()) {
- if (scheduled_core_op->requested_read_frames(name) > 0) {
- has_pending_user_reads = true;
- break;
- }
- }
-
- std::vector<bool> over_threshold;
- over_threshold.reserve(scheduled_core_op->get_inputs_names().size());
- std::vector<bool> over_timeout;
- over_timeout.reserve(scheduled_core_op->get_inputs_names().size());
-
- if (check_threshold) {
- for (const auto &name : scheduled_core_op->get_inputs_names()) {
- auto threshold_exp = scheduled_core_op->get_threshold(name);
- if (!threshold_exp) {
- LOGGER__ERROR("Failed to get threshold for stream {}", name);
- return result;
- }
- auto threshold = (DEFAULT_SCHEDULER_MIN_THRESHOLD == threshold_exp.value()) ? 1 : threshold_exp.value();
- auto timeout_exp = scheduled_core_op->get_timeout();
- if (!timeout_exp) {
- LOGGER__ERROR("Failed to get timeout for stream {}", name);
- return result;
- }
- auto timeout = timeout_exp.release();
-
- // Check if there arent enough write requests to reach threshold and timeout didnt passed
- auto write_requests = scheduled_core_op->requested_write_frames(name) + scheduled_core_op->finished_write_frames(name);
- auto stream_over_threshold = write_requests >= threshold;
- auto stream_over_timeout = timeout <= (std::chrono::steady_clock::now() - scheduled_core_op->get_last_run_timestamp());
- over_threshold.push_back(stream_over_threshold);
- over_timeout.push_back(stream_over_timeout);
- if (stream_over_threshold || stream_over_timeout) {
- continue;
- } else {
- result.is_ready = false;
- return result;
- }
- }
- }
-
- result.threshold = std::all_of(over_threshold.begin(), over_threshold.end(), [](auto over) { return over; });
- result.timeout = std::all_of(over_timeout.begin(), over_timeout.end(), [](auto over) { return over; });
- result.is_ready = has_pending_writes && has_pending_user_reads;
-
- return result;
-}
-
-Expected<uint32_t> CoreOpsScheduler::wait_for_read(const scheduler_core_op_handle_t &core_op_handle, const std::string &stream_name,
- const std::chrono::milliseconds &timeout)
-{
- std::unique_lock<std::mutex> lock(m_before_read_write_mutex);
-
- auto scheduled_core_op = m_scheduled_core_ops[core_op_handle];
- scheduled_core_op->requested_read_frames().increase(stream_name);
-
- hailo_status status = HAILO_SUCCESS;
- auto wait_res = m_core_ops_cvs[core_op_handle]->wait_for(stream_name, lock, timeout, [this, core_op_handle, scheduled_core_op, stream_name, &status] {
-
- if (should_core_op_stop(core_op_handle)) {
- status = HAILO_STREAM_ABORTED_BY_USER;
- return true; // return true so that the wait will finish
- }
-
- auto device_id = CoreOpsSchedulerOracle::get_avail_device(*this, core_op_handle);
- if (INVALID_DEVICE_ID != device_id) {
- status = switch_core_op(core_op_handle, device_id);
- if (HAILO_SUCCESS != status) {
- return true; // return true so that the wait will finish
- }
- }
-
- return scheduled_core_op->can_stream_read(stream_name);
- });
- CHECK_AS_EXPECTED(wait_res, HAILO_TIMEOUT, "{} (D2H) failed with status={}, timeout={}ms", stream_name, HAILO_TIMEOUT, timeout.count());
- if (HAILO_STREAM_ABORTED_BY_USER == status) {
- return make_unexpected(status);
- }
- CHECK_SUCCESS_AS_EXPECTED(status);
-
- scheduled_core_op->requested_read_frames().decrease(stream_name);
-
- return scheduled_core_op->pop_device_index(stream_name);
-}
-
-hailo_status CoreOpsScheduler::signal_read_finish(const scheduler_core_op_handle_t &core_op_handle, const std::string &stream_name, uint32_t device_id)
-{
- auto should_notify_next = false;
- {
- std::unique_lock<std::mutex> lock(m_before_read_write_mutex);
-
- auto scheduled_core_op = m_scheduled_core_ops[core_op_handle];
-
- scheduled_core_op->finished_read_frames().increase(stream_name);
- m_devices[device_id]->current_cycle_finished_read_frames_d2h[core_op_handle][stream_name]++;
- scheduled_core_op->d2h_finished_transferred_frames().decrease(stream_name);
- m_fps_accumulator[core_op_handle]++;
-
- decrease_core_op_counters(core_op_handle);
-
- auto has_drained_everything = has_core_op_drained_everything(core_op_handle, device_id);
- if (scheduled_core_op->is_nms() && has_drained_everything) {
- // In NMS networks there is possibility that next wasn't choosen yet
- choose_next_core_op(device_id, true);
-
- // If we didnt choose with treshold or timeout lets choose without treshold
- if (!m_devices[device_id]->is_switching_core_op) {
- choose_next_core_op(device_id, false);
- }
-
- if (m_devices[device_id]->is_switching_core_op) {
- should_notify_next = true;
- }
-
- if (m_should_monitor) {
- update_utilization_read_buffers_finished(device_id, core_op_handle, has_drained_everything);
- }
- }
- }
-
- // Notify stream that frame was read and we have a space in the read buffers (wait_for_write)
- m_core_ops_cvs[core_op_handle]->notify_all();
-
- if (should_notify_next) {
- // Notify all the threads of the next ng, for nms networks this is the only place we know the network was finished (wait_for_read)
- m_core_ops_cvs[m_devices[device_id]->next_core_op_handle]->notify_all();
- }
-
- return HAILO_SUCCESS;
-}
-
-void CoreOpsScheduler::decrease_core_op_counters(const scheduler_core_op_handle_t &core_op_handle)
-{
- return m_scheduled_core_ops[core_op_handle]->decrease_current_core_op_counters();
-}
-
-bool CoreOpsScheduler::has_core_op_drained_everything(const scheduler_core_op_handle_t &core_op_handle, uint32_t device_id)
-{
- if (INVALID_CORE_OP_HANDLE == core_op_handle) {
- // If no core-op is running, consider it as drained
- return true;
- }
-
- if (core_op_all_streams_aborted(core_op_handle)) {
- // We treat core-op as drained only if all streams are aborted - to make sure there aren't any ongoing transfers
- return true;
- }
-
- if ((!m_scheduled_core_ops[core_op_handle]->is_nms()) && (is_multi_device() || m_scheduled_core_ops[core_op_handle]->use_dynamic_batch_flow())) {
- auto current_device_info = m_devices[device_id];
- auto max_transferred_h2d = get_max_value_of_unordered_map(current_device_info->current_cycle_requested_transferred_frames_h2d[core_op_handle]);
- auto min_transferred_d2h = get_min_value_of_unordered_map(current_device_info->current_cycle_finished_transferred_frames_d2h[core_op_handle]);
-
- return (max_transferred_h2d == min_transferred_d2h);
- }
-
- return m_scheduled_core_ops[core_op_handle]->has_core_op_drained_everything();
-}
-
-hailo_status CoreOpsScheduler::enable_stream(const scheduler_core_op_handle_t &core_op_handle, const std::string &stream_name)
-{
- {
- std::unique_lock<std::mutex> lock(m_before_read_write_mutex);
-
- if (!m_should_core_op_stop[core_op_handle][stream_name]) {
- return HAILO_SUCCESS;
- }
-
- m_should_core_op_stop[core_op_handle][stream_name] = false;
- }
- m_core_ops_cvs[core_op_handle]->notify_all();
-
- return HAILO_SUCCESS;
-}
-
-hailo_status CoreOpsScheduler::disable_stream(const scheduler_core_op_handle_t &core_op_handle, const std::string &stream_name)
-{
- {
- std::unique_lock<std::mutex> lock(m_before_read_write_mutex);
-
- if (m_should_core_op_stop[core_op_handle][stream_name]) {
- return HAILO_SUCCESS;
- }
-
- m_should_core_op_stop[core_op_handle][stream_name] = true;
- }
- m_core_ops_cvs[core_op_handle]->notify_all();
-
- return HAILO_SUCCESS;
-}
-
-hailo_status CoreOpsScheduler::set_timeout(const scheduler_core_op_handle_t &core_op_handle, const std::chrono::milliseconds &timeout, const std::string &/*network_name*/)
-{
- // TODO: call in loop for set_timeout with the relevant stream-names (of the given network)
- return m_scheduled_core_ops[core_op_handle]->set_timeout(timeout);
-}
-
-hailo_status CoreOpsScheduler::set_threshold(const scheduler_core_op_handle_t &core_op_handle, uint32_t threshold, const std::string &/*network_name*/)
-{
- // TODO: call in loop for set_timeout with the relevant stream-names (of the given network)
- return m_scheduled_core_ops[core_op_handle]->set_threshold(threshold);
-}
-
-hailo_status CoreOpsScheduler::set_priority(const scheduler_core_op_handle_t &core_op_handle, core_op_priority_t priority, const std::string &/*network_name*/)
-{
- CHECK(priority <= HAILO_SCHEDULER_PRIORITY_MAX, HAILO_INVALID_ARGUMENT);
- std::unique_lock<std::mutex> lock(m_before_read_write_mutex);
- auto old_priority = m_scheduled_core_ops[core_op_handle]->get_priority();
- auto &priority_vector = m_core_op_priority[old_priority];
- auto it = std::find(priority_vector.begin(), priority_vector.end(), core_op_handle);
- CHECK(it != priority_vector.end(), HAILO_INTERNAL_FAILURE);
-
- priority_vector.erase(it);
- m_scheduled_core_ops[core_op_handle]->set_priority(priority);
- m_core_op_priority[priority].push_back(core_op_handle);
-
- return HAILO_SUCCESS;
-}
-
-bool CoreOpsScheduler::choose_next_core_op(size_t device_id, bool check_threshold)
-{
- if (!m_devices[device_id]->is_switching_core_op) {
- return CoreOpsSchedulerOracle::choose_next_model(*this, m_devices[device_id]->device_id, check_threshold);
- }
- return false;
-}
-
-bool CoreOpsScheduler::should_core_op_stop(const scheduler_core_op_handle_t &core_op_handle)
-{
- for (const auto &name_flag_pair : m_should_core_op_stop[core_op_handle]) {
- if (name_flag_pair.second) {
- return true;
- }
- }
-
- return false;
-}
-
-bool CoreOpsScheduler::core_op_all_streams_aborted(const scheduler_core_op_handle_t &core_op_handle)
-{
- for (const auto &name_flag_pair : m_should_core_op_stop[core_op_handle]) {
- if (!name_flag_pair.second) {
- return false;
- }
- }
- return true;
-}
-
-void CoreOpsScheduler::notify_all()
-{
- {
- // Acquire mutex to make sure the notify_all will wake the blocking threads on the cv
- std::unique_lock<std::mutex> lock(m_before_read_write_mutex);
- }
- // TODO: consider notify only the relevant ng or stream
- for (auto &cng_cvs : m_core_ops_cvs) {
- cng_cvs.second->notify_all();
- }
-}
-
-hailo_status CoreOpsScheduler::optimize_streaming_if_enabled(const scheduler_core_op_handle_t &core_op_handle)
-{
- auto scheduled_core_op = m_scheduled_core_ops[core_op_handle];
-
- if ((!scheduled_core_op->use_dynamic_batch_flow()) && !(scheduled_core_op->is_ready_to_switch() &&
- CoreOpsSchedulerOracle::should_stop_streaming(*this, scheduled_core_op->get_priority()))) {
- for (uint32_t i = 0; i < m_devices.size(); i++) {
- uint32_t index = scheduled_core_op->get_last_device_index() + i + 1;
- index %= static_cast<uint32_t>(m_devices.size());
- auto device_info = m_devices[index];
- // If multi device check for space in the vdma buffers, the send pending buffer is waitable in the current implementation.
- // can be removed after dynamic descriptor binding support
- if (device_info->current_core_op_handle == core_op_handle &&
- (!is_multi_device() || (get_min_avail_buffers_count(core_op_handle, device_info->device_id) >= DEFAULT_BURST_SIZE))) {
- auto status = send_all_pending_buffers(core_op_handle, device_info->device_id, DEFAULT_BURST_SIZE);
- if (HAILO_STREAM_ABORTED_BY_USER == status) {
- LOGGER__INFO("send_all_pending_buffers has failed with status=HAILO_STREAM_ABORTED_BY_USER");
- return status;
- }
- CHECK_SUCCESS(status);
- }
- }
- }
-
- return HAILO_SUCCESS;
-}
-
-uint16_t CoreOpsScheduler::get_min_avail_buffers_count(const scheduler_core_op_handle_t &core_op_handle, uint32_t device_id)
-{
- auto device_info = m_devices[device_id];
- auto scheduled_core_op = m_scheduled_core_ops[core_op_handle];
-
- auto max_transferred_h2d = get_max_value_of_unordered_map(device_info->current_cycle_requested_transferred_frames_h2d[core_op_handle]);
- auto min_d2h_frames = scheduled_core_op->is_nms() ? get_min_value_of_unordered_map(device_info->current_cycle_finished_read_frames_d2h[core_op_handle]) :
- get_min_value_of_unordered_map(device_info->current_cycle_finished_transferred_frames_d2h[core_op_handle]);
- auto ongoing_frames = static_cast<uint16_t>(max_transferred_h2d - min_d2h_frames);
-
- uint16_t avail_buffers = static_cast<uint16_t>(scheduled_core_op->get_min_input_buffers_count(get_device_count()) - ongoing_frames);
-
- return avail_buffers;
-}
-
-void CoreOpsScheduler::update_utilization_timers(scheduler_device_idx_t device_id, scheduler_core_op_handle_t core_op_handle)
-{
- assert(contains(m_core_op_utilization, core_op_handle));
-
- auto time_diff = std::chrono::duration_cast<std::chrono::duration<double>>(
- std::chrono::steady_clock::now() - m_last_measured_utilization_timestamp[device_id]).count();
-
- m_device_utilization[device_id] += time_diff;
- m_core_op_utilization[core_op_handle] += time_diff;
-}
-
-void CoreOpsScheduler::update_utilization_timestamp(scheduler_device_idx_t device_id)
-{
- m_last_measured_utilization_timestamp[device_id] = std::chrono::steady_clock::now();
-}
-
-void CoreOpsScheduler::update_utilization_send_started(scheduler_device_idx_t device_id)
-{
- if (m_device_has_drained_everything[device_id]) {
- update_device_drained_state(device_id, false);
- update_utilization_timestamp(device_id);
- }
-}
-
-void CoreOpsScheduler::update_device_drained_state(scheduler_device_idx_t device_id, bool state)
-{
- m_device_has_drained_everything[device_id] = state;
-}
-
-void CoreOpsScheduler::update_utilization_read_buffers_finished(scheduler_device_idx_t device_id,
- scheduler_core_op_handle_t core_op_handle, bool is_drained_everything)
-{
- update_utilization_timers(device_id, core_op_handle);
- update_device_drained_state(device_id, is_drained_everything);
- if (!is_drained_everything) {
- update_utilization_timestamp(device_id);
- }
-}
-
-} /* namespace hailort */
\ No newline at end of file
+++ /dev/null
-/**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
- * Distributed under the MIT license (https://opensource.org/licenses/MIT)
- **/
-/**
- * @file network_group_scheduler.hpp
- * @brief Class declaration for CoreOpsScheduler that schedules core-ops to be active depending on the scheduling algorithm.
- **/
-
-#ifndef _HAILO_NETWORK_GROUP_SCHEDULER_HPP_
-#define _HAILO_NETWORK_GROUP_SCHEDULER_HPP_
-
-#include "hailo/hailort.h"
-#include "hailo/expected.hpp"
-
-#include "common/utils.hpp"
-#include "common/filesystem.hpp"
-
-#include "vdevice/scheduler/scheduler_mon.hpp"
-#include "vdevice/scheduler/scheduled_core_op_state.hpp"
-#include "vdevice/scheduler/scheduled_core_op_cv.hpp"
-#include "vdevice/scheduler/scheduler_base.hpp"
-
-
-namespace hailort
-{
-
-#define INVALID_CORE_OP_HANDLE (UINT32_MAX)
-#define INVALID_DEVICE_ID (UINT32_MAX)
-
-using scheduler_core_op_handle_t = uint32_t;
-using core_op_priority_t = uint8_t;
-using scheduler_device_idx_t = uint32_t;
-
-class CoreOpsScheduler;
-using CoreOpsSchedulerPtr = std::shared_ptr<CoreOpsScheduler>;
-
-// We use mostly weak pointer for the scheduler to prevent circular dependency of the pointers
-using CoreOpsSchedulerWeakPtr = std::weak_ptr<CoreOpsScheduler>;
-
-using stream_name_t = std::string;
-
-class CoreOpsScheduler : public SchedulerBase
-{
-public:
- static Expected<CoreOpsSchedulerPtr> create_round_robin(uint32_t device_count, std::vector<std::string> &devices_bdf_id,
- std::vector<std::string> &devices_arch);
- CoreOpsScheduler(hailo_scheduling_algorithm_t algorithm, uint32_t device_count, std::vector<std::string> &devices_bdf_id,
- std::vector<std::string> &devices_arch);
-
- virtual ~CoreOpsScheduler();
- CoreOpsScheduler(const CoreOpsScheduler &other) = delete;
- CoreOpsScheduler &operator=(const CoreOpsScheduler &other) = delete;
- CoreOpsScheduler &operator=(CoreOpsScheduler &&other) = delete;
- CoreOpsScheduler(CoreOpsScheduler &&other) noexcept = delete;
-
- Expected<scheduler_core_op_handle_t> add_core_op(std::shared_ptr<CoreOp> added_core_op);
-
- hailo_status wait_for_write(const scheduler_core_op_handle_t &core_op_handle, const std::string &stream_name,
- const std::chrono::milliseconds &timeout, const std::function<bool()> &should_cancel);
- hailo_status signal_write_finish(const scheduler_core_op_handle_t &core_op_handle, const std::string &stream_name, bool did_write_fail);
- Expected<uint32_t> wait_for_read(const scheduler_core_op_handle_t &core_op_handle, const std::string &stream_name,
- const std::chrono::milliseconds &timeout);
- hailo_status signal_read_finish(const scheduler_core_op_handle_t &core_op_handle, const std::string &stream_name, uint32_t device_id);
-
- hailo_status enable_stream(const scheduler_core_op_handle_t &core_op_handle, const std::string &stream_name);
- hailo_status disable_stream(const scheduler_core_op_handle_t &core_op_handle, const std::string &stream_name);
-
- hailo_status set_timeout(const scheduler_core_op_handle_t &core_op_handle, const std::chrono::milliseconds &timeout, const std::string &network_name);
- hailo_status set_threshold(const scheduler_core_op_handle_t &core_op_handle, uint32_t threshold, const std::string &network_name);
- hailo_status set_priority(const scheduler_core_op_handle_t &core_op_handle, core_op_priority_t priority, const std::string &network_name);
-
- virtual ReadyInfo is_core_op_ready(const scheduler_core_op_handle_t &core_op_handle, bool check_threshold) override;
- virtual bool has_core_op_drained_everything(const scheduler_core_op_handle_t &core_op_handle, uint32_t device_id) override;
-
- void notify_all();
-
-protected:
- bool choose_next_core_op(size_t device_id, bool check_threshold);
-
- std::unordered_map<scheduler_core_op_handle_t, std::atomic_bool> m_changing_current_batch_size;
- std::unordered_map<scheduler_core_op_handle_t, std::map<stream_name_t, std::atomic_bool>> m_should_core_op_stop;
-
-private:
- hailo_status switch_core_op(const scheduler_core_op_handle_t &core_op_handle, uint32_t device_id,
- bool keep_nn_config = false);
- void reset_current_core_op_timestamps(uint32_t device_id);
-
- hailo_status send_all_pending_buffers(const scheduler_core_op_handle_t &core_op_handle, uint32_t device_id, uint32_t burst_size);
- hailo_status send_pending_buffer(const scheduler_core_op_handle_t &core_op_handle, const std::string &stream_name, uint32_t device_id);
-
- void decrease_core_op_counters(const scheduler_core_op_handle_t &core_op_handle);
- bool should_core_op_stop(const scheduler_core_op_handle_t &core_op_handle);
- bool core_op_all_streams_aborted(const scheduler_core_op_handle_t &core_op_handle);
-
- std::string get_core_op_name(const scheduler_core_op_handle_t &core_op_handle);
- bool is_core_op_active(const scheduler_core_op_handle_t &core_op_handle);
- bool is_multi_device();
- hailo_status optimize_streaming_if_enabled(const scheduler_core_op_handle_t &network_group_handle);
- uint16_t get_min_avail_buffers_count(const scheduler_core_op_handle_t &network_group_handle, uint32_t device_id);
-
- hailo_status start_mon();
- void time_dependent_events_cycle_calc();
- void log_monitor_device_infos(ProtoMon &mon);
- void log_monitor_networks_infos(ProtoMon &mon);
- void log_monitor_frames_infos(ProtoMon &mon);
- void update_utilization_timers(scheduler_device_idx_t device_id, scheduler_core_op_handle_t core_op_handle);
- void update_utilization_timestamp(scheduler_device_idx_t device_id);
- void update_utilization_send_started(scheduler_device_idx_t device_id);
- void update_device_drained_state(scheduler_device_idx_t device_id, bool state);
- void update_utilization_read_buffers_finished(scheduler_device_idx_t device_id, scheduler_core_op_handle_t core_op_hanle, bool is_drained_everything);
- hailo_status set_h2d_frames_counters(const scheduler_core_op_handle_t &core_op_handle, const std::string &stream_name,
- ProtoMonStreamFramesInfo &stream_frames_info);
- hailo_status set_d2h_frames_counters(const scheduler_core_op_handle_t &core_op_handle, const std::string &stream_name,
- ProtoMonStreamFramesInfo &stream_frames_info);
-#if defined(__GNUC__)
- Expected<std::shared_ptr<TempFile>> open_temp_mon_file();
- void dump_state();
-#endif
-
- std::vector<std::shared_ptr<ScheduledCoreOp>> m_scheduled_core_ops;
- std::mutex m_before_read_write_mutex;
- std::unordered_map<scheduler_core_op_handle_t, std::shared_ptr<ScheduledCoreOpCV>> m_core_ops_cvs;
-
- // Params for the scheduler MON
- std::atomic_bool m_should_monitor;
- std::thread m_mon_thread;
- EventPtr m_mon_shutdown_event;
-#if defined(__GNUC__)
- std::shared_ptr<TempFile> m_mon_tmp_output;
-#endif
- std::chrono::time_point<std::chrono::steady_clock> m_last_measured_timestamp;
- double m_last_measured_time_duration;
- std::unordered_map<scheduler_device_idx_t, double> m_device_utilization;
- std::unordered_map<scheduler_device_idx_t, std::atomic_bool> m_device_has_drained_everything;
- std::unordered_map<scheduler_device_idx_t, std::chrono::time_point<std::chrono::steady_clock>> m_last_measured_utilization_timestamp;
- // TODO: Consider adding Accumulator classes for more info (min, max, mean, etc..)
- std::unordered_map<scheduler_core_op_handle_t, double> m_core_op_utilization;
- std::unordered_map<scheduler_core_op_handle_t, std::atomic_uint32_t> m_fps_accumulator;
-};
-
-} /* namespace hailort */
-
-#endif /* _HAILO_NETWORK_GROUP_SCHEDULER_HPP_ */
#include "common/utils.hpp"
-#include "vdevice/scheduler/scheduler_mon.hpp"
-
#include <condition_variable>
#define SINGLE_CONTEXT_BATCH_SIZE (1)
ScheduledCoreOp::ScheduledCoreOp(std::shared_ptr<CoreOp> core_op, std::chrono::milliseconds timeout,
- uint16_t max_batch_size, StreamInfoVector &stream_infos, std::string core_op_name) :
+ uint16_t max_batch_size, bool use_dynamic_batch_flow, StreamInfoVector &stream_infos, std::string core_op_name) :
m_core_op(core_op),
m_last_run_time_stamp(std::chrono::steady_clock::now()),
m_timeout(std::move(timeout)),
m_frame_was_sent(false),
m_max_batch_size(max_batch_size),
+ m_use_dynamic_batch_flow(use_dynamic_batch_flow),
m_priority(HAILO_SCHEDULER_PRIORITY_NORMAL),
- m_last_device_index(INVALID_DEVICE_ID),
+ m_last_device_id(INVALID_DEVICE_ID),
m_core_op_name(core_op_name),
m_inputs_names(),
m_outputs_names(),
- m_is_nms(false),
- m_ready_to_switch(false)
+ m_is_nms(false)
{
// Prepare empty counters for the added core-op
for (const auto &stream_info : stream_infos) {
m_min_threshold_per_stream[stream_info.name] = DEFAULT_SCHEDULER_MIN_THRESHOLD;
if (HAILO_H2D_STREAM == stream_info.direction) {
- m_requested_write_frames.insert(stream_info.name);
- m_finished_write_frames.insert(stream_info.name);
+ m_pending_to_send_frames.insert(stream_info.name);
m_h2d_finished_transferred_frames.insert(stream_info.name);
m_inputs_names.push_back(stream_info.name);
} else {
m_finished_read_frames.insert(stream_info.name);
m_d2h_finished_transferred_frames.insert(stream_info.name);
m_outputs_names.push_back(stream_info.name);
- m_output_streams_read_orders[stream_info.name] = std::queue<uint32_t>();
+
if (HAILO_FORMAT_ORDER_HAILO_NMS == stream_info.format.order) {
m_is_nms = true;
}
{
auto timeout = DEFAULT_SCHEDULER_TIMEOUT;
- uint16_t max_batch_size = CONTROL_PROTOCOL__IGNORE_DYNAMIC_BATCH_SIZE;
- if (added_core_op->get_supported_features().multi_context) {
- auto batch_size = added_core_op->get_stream_batch_size(stream_infos[0].name);
- CHECK_EXPECTED(batch_size);
- if (batch_size.value() > SINGLE_CONTEXT_BATCH_SIZE) {
- max_batch_size = batch_size.release();
- }
- }
-
- return make_shared_nothrow<ScheduledCoreOp>(added_core_op, timeout, max_batch_size, stream_infos, added_core_op->name());
-}
+ auto batch_size_expected = added_core_op->get_stream_batch_size(stream_infos[0].name);
+ CHECK_EXPECTED(batch_size_expected);
+ auto max_batch_size = batch_size_expected.release();
-bool ScheduledCoreOp::has_enough_space_in_read_buffers(uint32_t ongoing_frames)
-{
- auto output_streams = m_core_op->get_output_streams();
- for (auto &output_stream : output_streams) {
- OutputStreamBase &vdevice_output = static_cast<OutputStreamBase&>(output_stream.get());
- if (auto pending_frames_size = vdevice_output.get_buffer_frames_size()) {
- if (pending_frames_size.value() <= ongoing_frames) {
- return false;
- }
- // If couldnt get pending frames size and count (e.g. NMS layer), assume we have space - scheduler switch will prevent deadlocks here
- }
- }
- return true;
+ // DEFAULT_BATCH_SIZE and SINGLE_CONTEXT_BATCH_SIZE support streaming and therfore we are not using dynamic batch flow
+ auto use_dynamic_batch_flow = added_core_op->get_supported_features().multi_context && (max_batch_size > SINGLE_CONTEXT_BATCH_SIZE);
+ return make_shared_nothrow<ScheduledCoreOp>(added_core_op, timeout, max_batch_size, use_dynamic_batch_flow, stream_infos, added_core_op->name());
}
-uint16_t ScheduledCoreOp::get_min_input_buffers_count(uint32_t device_count)
+uint16_t ScheduledCoreOp::get_min_input_buffers_count()
{
auto input_streams = m_core_op->get_input_streams();
uint16_t buffers_count = UINT16_MAX;
for (auto &input_stream : input_streams) {
InputStreamBase &vdevice_input = static_cast<InputStreamBase&>(input_stream.get());
if (auto pending_frames_size = vdevice_input.get_buffer_frames_size()) {
- buffers_count = std::min(buffers_count, static_cast<uint16_t>(pending_frames_size.value() / device_count));
+ buffers_count = std::min(buffers_count, static_cast<uint16_t>(pending_frames_size.value()));
}
}
return buffers_count;
}
-bool ScheduledCoreOp::has_input_written_most_frames(const std::string &stream_name)
-{
- auto total_writes = total_written_frames_count();
- return total_writes[stream_name] == get_max_value_of_unordered_map(total_writes);
-}
-
-// TODO: Use get_pre_transfer_h2d_frames_count + get_h2d_transferred_frames_count
-// TODO: Avoid returning map (malloc)
-std::unordered_map<stream_name_t, uint32_t> ScheduledCoreOp::total_written_frames_count()
-{
- std::unordered_map<stream_name_t, uint32_t> write_sum;
- for (const auto &name : get_inputs_names()) {
- write_sum[name] = m_requested_write_frames[name] + m_finished_write_frames[name]
- + m_h2d_finished_transferred_frames[name];
- }
- return write_sum;
-}
-
-// TODO: Use max(m_d2h_finished_transferred_frames) == 0 instead
-bool ScheduledCoreOp::has_pending_frames()
+uint16_t ScheduledCoreOp::get_min_output_buffers_count()
{
- auto h2d_transferred_frames_count = m_h2d_finished_transferred_frames.get_max_value();
- for (const auto &name : get_outputs_names()) {
- if (m_finished_read_frames[name] < h2d_transferred_frames_count) {
- return true;
+ auto output_streams = m_core_op->get_output_streams();
+ uint16_t buffers_count = UINT16_MAX;
+ for (auto &output_stream : output_streams) {
+ OutputStreamBase &vdevice_input = static_cast<OutputStreamBase&>(output_stream.get());
+ if (auto pending_frames_size = vdevice_input.get_buffer_frames_size()) {
+ buffers_count = std::min(buffers_count, static_cast<uint16_t>(pending_frames_size.value()));
}
}
- return false;
-}
-
-bool ScheduledCoreOp::can_stream_read(const std::string &stream_name)
-{
- return !m_output_streams_read_orders[stream_name].empty();
-}
-
-bool ScheduledCoreOp::can_stream_write(const std::string &stream_name)
-{
- auto total_written_frames = total_written_frames_count()[stream_name];
- auto min_finished_read = finished_read_frames_min_value();
- auto ongoing_frames = (min_finished_read < total_written_frames) ? (total_written_frames - min_finished_read) : 0;
- return has_enough_space_in_read_buffers(ongoing_frames);
+ return buffers_count;
}
-
bool ScheduledCoreOp::use_dynamic_batch_flow()
{
- return (CONTROL_PROTOCOL__IGNORE_DYNAMIC_BATCH_SIZE != m_max_batch_size);
+ return m_use_dynamic_batch_flow;
}
bool ScheduledCoreOp::has_core_op_drained_everything()
void ScheduledCoreOp::decrease_current_core_op_counters()
{
- // Decrease only if counter is 2 or bigger because reaching 0 can cause states to change
- if (!m_h2d_finished_transferred_frames.all_values_bigger_or_equal(2)) {
+ if (!m_h2d_finished_transferred_frames.all_values_bigger_or_equal(1)) {
return;
}
- if (!m_finished_read_frames.all_values_bigger_or_equal(2)) {
+ if (!m_finished_read_frames.all_values_bigger_or_equal(1)) {
return;
}
}
}
-uint32_t ScheduledCoreOp::get_pre_transfer_h2d_frames_count()
-{
- std::unordered_map<stream_name_t, uint32_t> write_sum;
- for (const auto &name : get_inputs_names()) {
- write_sum[name] = m_requested_write_frames[name] + m_finished_write_frames[name];
- }
- return get_max_value_of_unordered_map(write_sum);
-}
-
hailo_status ScheduledCoreOp::set_timeout(const std::chrono::milliseconds &timeout, const stream_name_t &stream_name)
{
CHECK(!m_frame_was_sent, HAILO_INVALID_OPERATION,
hailo_status ScheduledCoreOp::set_threshold(uint32_t threshold, const stream_name_t &stream_name)
{
- CHECK((CONTROL_PROTOCOL__IGNORE_DYNAMIC_BATCH_SIZE == m_max_batch_size) ||
+ CHECK(!use_dynamic_batch_flow() ||
(threshold <= m_max_batch_size), HAILO_INVALID_ARGUMENT, "Threshold must be equal or lower than the maximum batch size!");
CHECK(!m_frame_was_sent, HAILO_INVALID_OPERATION,
m_priority = priority;
}
-uint32_t ScheduledCoreOp::get_last_device_index()
+device_id_t ScheduledCoreOp::get_last_device()
{
- return m_last_device_index;
+ return m_last_device_id;
}
-void ScheduledCoreOp::set_last_device_index(uint32_t device_index)
+void ScheduledCoreOp::set_last_device(const device_id_t &device_id)
{
- m_last_device_index = device_index;
+ m_last_device_id = device_id;
}
std::string ScheduledCoreOp::get_core_op_name()
uint16_t ScheduledCoreOp::get_max_batch_size()
{
- if (!use_dynamic_batch_flow()) {
- return SINGLE_CONTEXT_BATCH_SIZE;
+ if (CONTROL_PROTOCOL__IGNORE_DYNAMIC_BATCH_SIZE == m_max_batch_size) {
+ // In nms networks we dont know the output buffers count and therfore we are using the input buffer count
+ return is_nms() ? get_min_input_buffers_count() : get_min_output_buffers_count();
}
return m_max_batch_size;
}
-Counter &ScheduledCoreOp::requested_write_frames()
-{
- return m_requested_write_frames;
-}
-
-std::atomic_uint32_t &ScheduledCoreOp::requested_write_frames(const stream_name_t &stream_name)
-{
- return m_requested_write_frames[stream_name];
-}
-
-Counter &ScheduledCoreOp::finished_write_frames()
+Counter &ScheduledCoreOp::pending_to_send_frames()
{
- return m_finished_write_frames;
+ return m_pending_to_send_frames;
}
-std::atomic_uint32_t &ScheduledCoreOp::finished_write_frames(const stream_name_t &stream_name)
+std::atomic_uint32_t &ScheduledCoreOp::pending_to_send_frames(const stream_name_t &stream_name)
{
- return m_finished_write_frames[stream_name];
+ return m_pending_to_send_frames[stream_name];
}
-uint32_t ScheduledCoreOp::finished_write_frames_min_value()
+uint32_t ScheduledCoreOp::pending_to_send_frames_min_value()
{
- return m_finished_write_frames.get_min_value();
+ return m_pending_to_send_frames.get_min_value();
}
Counter &ScheduledCoreOp::h2d_finished_transferred_frames()
return m_h2d_finished_transferred_frames[stream_name];
}
+uint32_t ScheduledCoreOp::h2d_finished_transferred_frames_max_value()
+{
+ return m_h2d_finished_transferred_frames.get_max_value();
+}
+
Counter &ScheduledCoreOp::requested_read_frames()
{
return m_requested_read_frames;
return m_outputs_names;
}
-void ScheduledCoreOp::push_device_index(uint32_t device_index)
-{
- for (auto& stream_name : get_outputs_names()) {
- m_output_streams_read_orders[stream_name].push(device_index);
- }
-}
-
-uint32_t ScheduledCoreOp::pop_device_index(const stream_name_t &stream_name)
-{
- assert(contains(m_output_streams_read_orders, stream_name));
- assert(!m_output_streams_read_orders[stream_name].empty());
- auto device_index = m_output_streams_read_orders[stream_name].front();
- m_output_streams_read_orders[stream_name].pop();
-
- return device_index;
-}
-
-bool ScheduledCoreOp::is_ready_to_switch()
-{
- return m_ready_to_switch;
-}
-
-void ScheduledCoreOp::mark_ready_to_switch()
-{
- m_ready_to_switch = true;
-}
-
-void ScheduledCoreOp::mark_unready_to_switch()
-{
- m_ready_to_switch = false;
-}
-
} /* namespace hailort */
* Distributed under the MIT license (https://opensource.org/licenses/MIT)
**/
/**
- * @file network_group_scheduler.hpp
+ * @file scheduler.hpp
* @brief Class declaration for CoreOpsScheduler that schedules core-ops to be active depending on the scheduling algorithm.
**/
#include "core_op/core_op.hpp"
+#include "scheduler_base.hpp"
+
#include <condition_variable>
#include <queue>
#define DEFAULT_SCHEDULER_TIMEOUT (std::chrono::milliseconds(0))
#define DEFAULT_SCHEDULER_MIN_THRESHOLD (0)
-#define INVALID_DEVICE_ID (UINT32_MAX)
+#define INVALID_DEVICE_ID (std::to_string(UINT32_MAX))
using stream_name_t = std::string;
using core_op_priority_t = uint8_t;
ScheduledCoreOp &operator=(ScheduledCoreOp &&other) = delete;
ScheduledCoreOp(ScheduledCoreOp &&other) noexcept = delete;
- bool has_enough_space_in_read_buffers(uint32_t ongoing_frames);
- uint16_t get_min_input_buffers_count(uint32_t device_count);
- bool has_input_written_most_frames(const std::string &stream_name);
- std::unordered_map<stream_name_t, uint32_t> total_written_frames_count();
- bool has_pending_frames();
- bool can_stream_read(const std::string &stream_name);
- bool can_stream_write(const std::string &stream_name);
- bool use_dynamic_batch_flow();
- bool has_core_op_drained_everything();
- void decrease_current_core_op_counters();
- uint32_t get_pre_transfer_h2d_frames_count();
-
- bool is_ready_to_switch();
- void mark_ready_to_switch();
- void mark_unready_to_switch();
-
std::string get_core_op_name();
-
std::shared_ptr<CoreOp> get_core_op();
+ const std::vector<stream_name_t> &get_outputs_names();
+ const std::vector<stream_name_t> &get_inputs_names();
- void mark_frame_sent();
+ uint16_t get_min_input_buffers_count();
+ uint16_t get_min_output_buffers_count();
- std::chrono::time_point<std::chrono::steady_clock> get_last_run_timestamp();
- void set_last_run_timestamp(const std::chrono::time_point<std::chrono::steady_clock> ×tamp);
+ uint16_t get_max_batch_size();
+ bool use_dynamic_batch_flow();
+ bool has_core_op_drained_everything();
+
+ device_id_t get_last_device();
+ void set_last_device(const device_id_t &device_id);
Expected<std::chrono::milliseconds> get_timeout(const stream_name_t &stream_name = "");
hailo_status set_timeout(const std::chrono::milliseconds &timeout, const stream_name_t &stream_name = "");
Expected<uint32_t> get_threshold(const stream_name_t &stream_name);
hailo_status set_threshold(uint32_t threshold, const stream_name_t &stream_name = "");
-
core_op_priority_t get_priority();
void set_priority(core_op_priority_t priority);
- uint32_t get_last_device_index();
- void set_last_device_index(uint32_t device_index);
+ std::chrono::time_point<std::chrono::steady_clock> get_last_run_timestamp();
+ void set_last_run_timestamp(const std::chrono::time_point<std::chrono::steady_clock> ×tamp);
- uint16_t get_max_batch_size();
+ void mark_frame_sent();
+ void decrease_current_core_op_counters();
- Counter &requested_write_frames();
- std::atomic_uint32_t &requested_write_frames(const stream_name_t &stream_name);
- Counter &finished_write_frames();
- std::atomic_uint32_t &finished_write_frames(const stream_name_t &stream_name);
- uint32_t finished_write_frames_min_value();
+ Counter &pending_to_send_frames();
+ std::atomic_uint32_t &pending_to_send_frames(const stream_name_t &stream_name);
+ uint32_t pending_to_send_frames_min_value();
Counter &h2d_finished_transferred_frames();
std::atomic_uint32_t &h2d_finished_transferred_frames(const stream_name_t &stream_name);
+ uint32_t h2d_finished_transferred_frames_max_value();
Counter &requested_read_frames();
std::atomic_uint32_t &requested_read_frames(const stream_name_t &stream_name);
Counter &d2h_finished_transferred_frames();
std::atomic_uint32_t &d2h_finished_transferred_frames(const stream_name_t &stream_name);
+
Counter &finished_read_frames();
std::atomic_uint32_t &finished_read_frames(const stream_name_t &stream_name);
uint32_t finished_read_frames_min_value();
- const std::vector<stream_name_t> &get_outputs_names();
- const std::vector<stream_name_t> &get_inputs_names();
bool is_nms()
{
return m_is_nms;
}
- void push_device_index(uint32_t device_index);
- uint32_t pop_device_index(const stream_name_t &stream_name);
-
ScheduledCoreOp(std::shared_ptr<CoreOp> core_op, std::chrono::milliseconds timeout,
- uint16_t max_batch_size, StreamInfoVector &stream_infos, std::string core_op_name);
+ uint16_t max_batch_size, bool use_dynamic_batch_flow, StreamInfoVector &stream_infos, std::string core_op_name);
private:
std::shared_ptr<CoreOp> m_core_op;
-
std::chrono::time_point<std::chrono::steady_clock> m_last_run_time_stamp;
std::chrono::milliseconds m_timeout;
-
std::atomic_bool m_frame_was_sent;
uint16_t m_max_batch_size;
+ bool m_use_dynamic_batch_flow;
- Counter m_requested_write_frames; // 'wait_for_write()' has been called
- Counter m_finished_write_frames; // 'signal_finished_write()' has been called - frame is written in buffer (writes are a-sync)
+ Counter m_pending_to_send_frames; // 'signal_frame_pending_to_send()' has been called - frame is written in buffer (writes are a-sync)
Counter m_h2d_finished_transferred_frames; // Frame has been transferred to device (intrpt was raised)
core_op_priority_t m_priority;
- std::atomic_uint32_t m_last_device_index;
+ device_id_t m_last_device_id;
std::string m_core_op_name;
std::vector<stream_name_t> m_inputs_names;
std::vector<stream_name_t> m_outputs_names;
- std::unordered_map<stream_name_t, std::queue<uint32_t>> m_output_streams_read_orders;
-
bool m_is_nms;
-
- // TODO: Remove this flag when the old scheduling mode will be deprecated
- std::atomic_bool m_ready_to_switch;
};
} /* namespace hailort */
--- /dev/null
+/**
+ * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+**/
+/**
+ * @file scheduled_stream.cpp
+ * @brief Internal stream implementation for scheduled streams
+ *
+ **/
+
+#include "scheduled_stream.hpp"
+
+#include "utils/profiler/tracer_macros.hpp"
+
+namespace hailort
+{
+
+/** Input stream **/
+Expected<std::unique_ptr<ScheduledInputStream>> ScheduledInputStream::create(
+ std::map<device_id_t, std::reference_wrapper<VdmaInputStreamBase>> &&streams,
+ const scheduler_core_op_handle_t &core_op_handle,
+ EventPtr &&core_op_activated_event,
+ const LayerInfo &layer_info,
+ CoreOpsSchedulerWeakPtr core_ops_scheduler)
+{
+ auto status = HAILO_UNINITIALIZED;
+ auto local_vdevice_stream = make_unique_nothrow<ScheduledInputStream>(std::move(streams),
+ core_op_handle, std::move(core_op_activated_event), layer_info,
+ core_ops_scheduler, status);
+ CHECK_NOT_NULL_AS_EXPECTED(local_vdevice_stream, HAILO_OUT_OF_HOST_MEMORY);
+ CHECK_SUCCESS_AS_EXPECTED(status);
+
+ return local_vdevice_stream;
+}
+
+hailo_status ScheduledInputStreamBase::abort()
+{
+ return abort_impl(m_core_op_handle);
+}
+
+hailo_status ScheduledInputStreamBase::abort_impl(scheduler_core_op_handle_t core_op_handle)
+{
+ auto status = HAILO_SUCCESS; // Best effort
+ assert(1 == m_streams.size());
+ auto abort_status = m_streams.begin()->second.get().abort();
+ if (HAILO_SUCCESS != status) {
+ LOGGER__ERROR("Failed to abort input stream. (status: {} device: {})", status, m_streams.begin()->second.get().get_dev_id());
+ status = abort_status;
+ }
+
+ auto core_ops_scheduler = m_core_ops_scheduler.lock();
+ CHECK(core_ops_scheduler, HAILO_INTERNAL_FAILURE);
+
+ auto disable_status = core_ops_scheduler->disable_stream(core_op_handle, name());
+ if (HAILO_SUCCESS != disable_status) {
+ LOGGER__ERROR("Failed to disable stream in the core-op scheduler. (status: {})", disable_status);
+ status = disable_status;
+ }
+
+ return status;
+}
+
+hailo_status ScheduledInputStreamBase::clear_abort()
+{
+ return clear_abort_impl(m_core_op_handle);
+}
+
+hailo_status ScheduledInputStreamBase::flush()
+{
+ auto core_ops_scheduler = m_core_ops_scheduler.lock();
+ CHECK(core_ops_scheduler, HAILO_INTERNAL_FAILURE);
+
+ auto status = core_ops_scheduler->flush_pending_buffers(m_core_op_handle, name(), get_timeout());
+ if (HAILO_STREAM_ABORTED_BY_USER == status) {
+ LOGGER__INFO("Got HAILO_STREAM_ABORTED_BY_USER in flush of stream {}", name());
+ return status;
+ }
+ CHECK_SUCCESS(status);
+
+ return VDeviceInputStreamBase::flush();
+}
+
+hailo_status ScheduledInputStreamBase::clear_abort_impl(scheduler_core_op_handle_t core_op_handle)
+{
+ auto status = HAILO_SUCCESS; // Best effort
+ assert(1 == m_streams.size());
+ auto clear_abort_status = m_streams.begin()->second.get().clear_abort();
+ if ((HAILO_SUCCESS != clear_abort_status) && (HAILO_STREAM_NOT_ACTIVATED != clear_abort_status)) {
+ LOGGER__ERROR("Failed to clear abort input stream. (status: {} device: {})", clear_abort_status, m_streams.begin()->second.get().get_dev_id());
+ status = clear_abort_status;
+ }
+
+ auto core_ops_scheduler = m_core_ops_scheduler.lock();
+ CHECK(core_ops_scheduler, HAILO_INTERNAL_FAILURE);
+
+ auto enable_status = core_ops_scheduler->enable_stream(core_op_handle, name());
+ if (HAILO_SUCCESS != enable_status) {
+ LOGGER__ERROR("Failed to enable stream in the core-op scheduler. (status: {})", enable_status);
+ status = enable_status;
+ }
+
+ return status;
+}
+
+hailo_status ScheduledInputStream::write_impl(const MemoryView &buffer, const std::function<bool()> &should_cancel)
+{
+ auto core_ops_scheduler = m_core_ops_scheduler.lock();
+ CHECK(core_ops_scheduler, HAILO_INTERNAL_FAILURE);
+
+ assert(1 == m_streams.size());
+ auto status = m_streams.begin()->second.get().write_buffer_only(buffer, should_cancel);
+ if (HAILO_SUCCESS != status) {
+ LOGGER__INFO("Write to stream has failed! status = {}", status);
+ return status;
+ }
+
+ auto write_finish_status = core_ops_scheduler->signal_frame_pending_to_send(m_core_op_handle, name());
+ if (HAILO_STREAM_ABORTED_BY_USER == write_finish_status) {
+ return write_finish_status;
+ }
+ CHECK_SUCCESS(write_finish_status);
+
+ return HAILO_SUCCESS;
+}
+
+Expected<std::unique_ptr<ScheduledAsyncInputStream>> ScheduledAsyncInputStream::create(
+ std::map<device_id_t, std::reference_wrapper<VdmaInputStreamBase>> &&streams,
+ const scheduler_core_op_handle_t &core_op_handle,
+ EventPtr &&core_op_activated_event,
+ const LayerInfo &layer_info,
+ CoreOpsSchedulerWeakPtr core_ops_scheduler)
+{
+ auto max_queue_size_per_stream = streams.begin()->second.get().get_buffer_frames_size();
+ CHECK_EXPECTED(max_queue_size_per_stream);
+ const auto max_queue_size = max_queue_size_per_stream.value() * streams.size();
+
+ auto status = HAILO_UNINITIALIZED;
+ auto local_vdevice_stream = make_unique_nothrow<ScheduledAsyncInputStream>(std::move(streams),
+ core_op_handle, std::move(core_op_activated_event), layer_info,
+ core_ops_scheduler, max_queue_size, status);
+ CHECK_NOT_NULL_AS_EXPECTED(local_vdevice_stream, HAILO_OUT_OF_HOST_MEMORY);
+ CHECK_SUCCESS_AS_EXPECTED(status);
+
+ return local_vdevice_stream;
+}
+
+hailo_status ScheduledAsyncInputStream::send_pending_buffer(const device_id_t &device_id)
+{
+ // TODO HRT-10583 - allow option to remove reorder queue
+ auto pending_buffer = m_pending_buffers.dequeue();
+ CHECK_EXPECTED_AS_STATUS(pending_buffer);
+
+ pending_buffer->callback = m_callback_reorder_queue.wrap_callback(pending_buffer->callback);
+ assert(contains(m_streams, device_id));
+ auto status = m_streams.at(device_id).get().write_async(pending_buffer.release());
+ if (HAILO_SUCCESS != status) {
+ m_callback_reorder_queue.cancel_last_callback();
+ }
+ return status;
+}
+
+hailo_status ScheduledAsyncInputStream::wait_for_async_ready(size_t transfer_size, std::chrono::milliseconds timeout)
+{
+ (void)transfer_size;
+ return m_pending_buffers.wait_for_room(timeout);
+}
+
+hailo_status ScheduledAsyncInputStream::write_async(TransferRequest &&transfer_request)
+{
+ auto core_ops_scheduler = m_core_ops_scheduler.lock();
+ CHECK(core_ops_scheduler, HAILO_INTERNAL_FAILURE);
+
+ auto status = m_pending_buffers.enqueue(std::move(transfer_request));
+ CHECK_SUCCESS(status);
+
+ auto write_finish_status = core_ops_scheduler->signal_frame_pending_to_send(m_core_op_handle, name());
+ if (HAILO_STREAM_ABORTED_BY_USER == write_finish_status) {
+ return write_finish_status;
+ }
+ CHECK_SUCCESS(write_finish_status);
+
+ return HAILO_SUCCESS;
+}
+
+Expected<size_t> ScheduledAsyncInputStream::get_async_max_queue_size() const
+{
+ return m_pending_buffers.max_size();
+}
+
+
+hailo_status ScheduledAsyncInputStream::abort()
+{
+ m_pending_buffers.abort();
+ return ScheduledInputStreamBase::abort();
+}
+
+hailo_status ScheduledAsyncInputStream::clear_abort()
+{
+ m_pending_buffers.clear_abort();
+ return ScheduledInputStreamBase::clear_abort();
+}
+
+hailo_status ScheduledAsyncInputStream::write_impl(const MemoryView &, const std::function<bool()> &)
+{
+ LOGGER__ERROR("Sync write is not supported by async streams");
+ return HAILO_NOT_SUPPORTED;
+}
+
+/** Output stream **/
+Expected<std::unique_ptr<ScheduledOutputStream>> ScheduledOutputStream::create(
+ std::map<device_id_t, std::reference_wrapper<VdmaOutputStreamBase>> &&streams,
+ const scheduler_core_op_handle_t &core_op_handle,
+ const LayerInfo &layer_info,
+ EventPtr &&core_op_activated_event,
+ CoreOpsSchedulerWeakPtr core_ops_scheduler)
+{
+ auto status = HAILO_UNINITIALIZED;
+ auto stream = make_unique_nothrow<ScheduledOutputStream>(std::move(streams), core_op_handle,
+ layer_info, std::move(core_op_activated_event), core_ops_scheduler, status);
+ CHECK_NOT_NULL_AS_EXPECTED(stream, HAILO_OUT_OF_HOST_MEMORY);
+ CHECK_SUCCESS_AS_EXPECTED(status);
+ return stream;
+}
+
+ScheduledOutputStream::ScheduledOutputStream(
+ std::map<device_id_t, std::reference_wrapper<VdmaOutputStreamBase>> &&streams,
+ const scheduler_core_op_handle_t &core_op_handle,
+ const LayerInfo &layer_info,
+ EventPtr &&core_op_activated_event,
+ CoreOpsSchedulerWeakPtr core_ops_scheduler,
+ hailo_status &status) : ScheduledOutputStreamBase(std::move(streams), core_op_handle, layer_info,
+ std::move(core_op_activated_event), core_ops_scheduler, status)
+ {
+ for (auto &stream_pair : m_streams) {
+ stream_pair.second.get().register_interrupt_callback(
+ [scheduler_weak=m_core_ops_scheduler, core_op_handle=m_core_op_handle, name=name(), device_id=stream_pair.first]() {
+ auto scheduler = scheduler_weak.lock();
+ assert(scheduler);
+ scheduler->signal_frame_transferred_d2h(core_op_handle, name, device_id);
+ }
+ );
+ }
+ }
+
+hailo_status ScheduledOutputStream::set_next_device_to_read(const device_id_t &device_id)
+{
+ std::lock_guard<std::mutex> lock(m_device_read_order_mutex);
+ m_device_read_order.push(device_id);
+ return HAILO_SUCCESS;
+}
+
+hailo_status ScheduledOutputStreamBase::abort()
+{
+ return abort_impl(m_core_op_handle);
+}
+
+hailo_status ScheduledOutputStreamBase::abort_impl(scheduler_core_op_handle_t core_op_handle)
+{
+ auto status = HAILO_SUCCESS; // Best effort
+ for (const auto &pair : m_streams) {
+ auto &stream = pair.second;
+ auto abort_status = stream.get().abort();
+ if (HAILO_SUCCESS != status) {
+ LOGGER__ERROR("Failed to abort output stream. (status: {} device: {})", status, stream.get().get_dev_id());
+ status = abort_status;
+ }
+ }
+ auto core_ops_scheduler = m_core_ops_scheduler.lock();
+ CHECK(core_ops_scheduler, HAILO_INTERNAL_FAILURE);
+
+ auto disable_status = core_ops_scheduler->disable_stream(core_op_handle, name());
+ if (HAILO_SUCCESS != disable_status) {
+ LOGGER__ERROR("Failed to disable stream in the core-op scheduler. (status: {})", disable_status);
+ status = disable_status;
+ }
+
+ return status;
+}
+
+hailo_status ScheduledOutputStreamBase::clear_abort()
+{
+ return clear_abort_impl(m_core_op_handle);
+}
+
+hailo_status ScheduledOutputStreamBase::clear_abort_impl(scheduler_core_op_handle_t core_op_handle)
+{
+ auto status = HAILO_SUCCESS; // Best effort
+ for (const auto &pair : m_streams) {
+ auto &stream = pair.second;
+ auto clear_abort_status = stream.get().clear_abort();
+ if ((HAILO_SUCCESS != clear_abort_status) && (HAILO_STREAM_NOT_ACTIVATED != clear_abort_status)) {
+ LOGGER__ERROR("Failed to clear abort output stream. (status: {} device: {})", clear_abort_status, stream.get().get_dev_id());
+ status = clear_abort_status;
+ }
+ }
+
+ auto core_ops_scheduler = m_core_ops_scheduler.lock();
+ CHECK(core_ops_scheduler, HAILO_INTERNAL_FAILURE);
+
+ auto enable_status = core_ops_scheduler->enable_stream(core_op_handle, name());
+ if (HAILO_SUCCESS != enable_status) {
+ LOGGER__ERROR("Failed to enable stream in the core-op scheduler. (status: {})", enable_status);
+ status = enable_status;
+ }
+
+ return status;
+}
+
+hailo_status ScheduledOutputStream::read(MemoryView buffer)
+{
+ auto core_ops_scheduler = m_core_ops_scheduler.lock();
+ CHECK(core_ops_scheduler, HAILO_INTERNAL_FAILURE);
+
+ auto status = core_ops_scheduler->signal_frame_pending_to_read(m_core_op_handle, name());
+ CHECK_SUCCESS(status);
+
+ auto device_id = wait_for_read();
+ if (HAILO_STREAM_ABORTED_BY_USER == device_id.status()) {
+ LOGGER__INFO("Read from stream was aborted.");
+ return device_id.status();
+ }
+ CHECK_EXPECTED_AS_STATUS(device_id);
+
+ assert(contains(m_streams, device_id.value()));
+ status = m_streams.at(device_id.value()).get().read(buffer);
+ if (HAILO_SUCCESS != status) {
+ LOGGER__INFO("Read from stream has failed! status = {}", status);
+ return status;
+ }
+
+ status = core_ops_scheduler->signal_read_finish(m_core_op_handle, name(), device_id.value());
+ if (HAILO_STREAM_ABORTED_BY_USER == status) {
+ return status;
+ }
+ CHECK_SUCCESS(status);
+
+ return HAILO_SUCCESS;
+}
+
+Expected<device_id_t> ScheduledOutputStream::wait_for_read()
+{
+ auto core_ops_scheduler = m_core_ops_scheduler.lock();
+ CHECK_AS_EXPECTED(core_ops_scheduler, HAILO_INTERNAL_FAILURE);
+
+ auto status = core_ops_scheduler->wait_for_read(m_core_op_handle, name(), get_timeout(), [this]() {
+ std::lock_guard<std::mutex> lock(m_device_read_order_mutex);
+ return !m_device_read_order.empty();
+ });
+ if (HAILO_STREAM_ABORTED_BY_USER == status) {
+ LOGGER__INFO("Read from stream was aborted.");
+ return make_unexpected(status);
+ }
+ CHECK_SUCCESS_AS_EXPECTED(status);
+
+ std::lock_guard<std::mutex> lock(m_device_read_order_mutex);
+ auto device_id = m_device_read_order.front();
+ m_device_read_order.pop();
+ return device_id;
+}
+
+} /* namespace hailort */
#include "stream_common/stream_internal.hpp"
#include "vdevice/vdevice_internal.hpp"
#include "vdevice/vdevice_stream.hpp"
+#include "vdevice/callback_reorder_queue.hpp"
#include "vdma/vdma_device.hpp"
namespace hailort
{
-class ScheduledInputStream : public InputVDeviceBaseStream {
+
+class ScheduledInputStreamBase : public VDeviceInputStreamBase {
public:
- ScheduledInputStream(
- std::vector<std::reference_wrapper<VdmaInputStream>> &&streams,
+ ScheduledInputStreamBase(
+ std::map<device_id_t, std::reference_wrapper<VdmaInputStreamBase>> &&streams,
const scheduler_core_op_handle_t &core_op_handle,
EventPtr &&core_op_activated_event,
const LayerInfo &layer_info,
CoreOpsSchedulerWeakPtr core_ops_scheduler,
hailo_status &status) :
- InputVDeviceBaseStream(std::move(streams), std::move(core_op_activated_event), layer_info, status),
+ VDeviceInputStreamBase(std::move(streams), std::move(core_op_activated_event), layer_info, status),
m_core_op_handle(core_op_handle),
m_core_ops_scheduler(core_ops_scheduler)
{}
- virtual hailo_status abort() override;
- virtual hailo_status clear_abort() override;
- virtual bool is_scheduled() override { return true; };
+ virtual bool is_scheduled() override final { return true; };
virtual void notify_all() override
{
}
scheduler->notify_all();
- for (auto &stream : m_streams) {
+ for (const auto &pair : m_streams) {
+ auto &stream = pair.second;
stream.get().notify_all();
}
}
-protected:
- virtual Expected<size_t> sync_write_raw_buffer(const MemoryView &buffer,
- const std::function<bool()> &should_cancel = []() { return false; });
-
- Expected<size_t> sync_write_raw_buffer_impl(const MemoryView &buffer, scheduler_core_op_handle_t core_op_handle,
- const std::function<bool()> &should_cancel);
+ virtual hailo_status abort() override;
+ virtual hailo_status clear_abort() override;
+ virtual hailo_status flush() override;
+protected:
scheduler_core_op_handle_t m_core_op_handle;
CoreOpsSchedulerWeakPtr m_core_ops_scheduler;
hailo_status clear_abort_impl(scheduler_core_op_handle_t core_op_handle);
};
-class ScheduledOutputStream : public OutputVDeviceBaseStream {
+class ScheduledInputStream : public ScheduledInputStreamBase {
public:
- ScheduledOutputStream(
- std::vector<std::reference_wrapper<VdmaOutputStream>> &&streams,
+ static Expected<std::unique_ptr<ScheduledInputStream>> create(
+ std::map<device_id_t, std::reference_wrapper<VdmaInputStreamBase>> &&streams,
const scheduler_core_op_handle_t &core_op_handle,
+ EventPtr &&core_op_activated_event,
const LayerInfo &layer_info,
+ CoreOpsSchedulerWeakPtr core_ops_scheduler);
+
+ ScheduledInputStream(
+ std::map<device_id_t, std::reference_wrapper<VdmaInputStreamBase>> &&streams,
+ const scheduler_core_op_handle_t &core_op_handle,
EventPtr &&core_op_activated_event,
+ const LayerInfo &layer_info,
CoreOpsSchedulerWeakPtr core_ops_scheduler,
hailo_status &status) :
- OutputVDeviceBaseStream(std::move(streams), layer_info, std::move(core_op_activated_event), status),
+ ScheduledInputStreamBase(std::move(streams), core_op_handle, std::move(core_op_activated_event), layer_info,
+ core_ops_scheduler, status)
+ {}
+
+protected:
+ virtual hailo_status write_impl(const MemoryView &buffer, const std::function<bool()> &should_cancel) override;
+};
+
+class TransferRequestsQueue final {
+public:
+ TransferRequestsQueue(size_t max_size) :
+ m_max_size(max_size)
+ {}
+
+ ~TransferRequestsQueue()
+ {
+ while (!m_queue.empty()) {
+ auto &request = m_queue.front();
+ request.callback(HAILO_STREAM_ABORTED_BY_USER);
+ m_queue.pop();
+ }
+ }
+
+ TransferRequestsQueue(const TransferRequestsQueue &) = delete;
+ TransferRequestsQueue &operator=(const TransferRequestsQueue &) = delete;
+
+ hailo_status wait_for_room(std::chrono::milliseconds timeout)
+ {
+ std::unique_lock<std::mutex> lock(m_mutex);
+ auto result = m_dequeue_cv.wait_for(lock, timeout,
+ [&] {
+ return m_is_aborted || (m_queue.size() < m_max_size);
+ });
+ if (!result) {
+ return HAILO_TIMEOUT;
+ }
+ if (m_is_aborted) {
+ return HAILO_STREAM_ABORTED_BY_USER;
+ }
+ return HAILO_SUCCESS;
+ }
+
+ hailo_status enqueue(TransferRequest &&transfer_request)
+ {
+ std::unique_lock<std::mutex> lock(m_mutex);
+ if (m_is_aborted) {
+ return HAILO_STREAM_ABORTED_BY_USER;
+ }
+ CHECK(m_queue.size() < m_max_size, HAILO_QUEUE_IS_FULL, "No space left in stream queue");
+ m_queue.emplace(std::move(transfer_request));
+ return HAILO_SUCCESS;
+ }
+
+ Expected<TransferRequest> dequeue()
+ {
+ TransferRequest transfer_request{};
+ {
+ std::unique_lock<std::mutex> lock(m_mutex);
+ if (m_is_aborted) {
+ return make_unexpected(HAILO_STREAM_ABORTED_BY_USER);
+ }
+ CHECK_AS_EXPECTED(!m_queue.empty(), HAILO_INTERNAL_FAILURE, "Queue should not be empty");
+ transfer_request = m_queue.front();
+ m_queue.pop();
+ }
+ m_dequeue_cv.notify_one();
+ return transfer_request;
+ }
+
+ void abort()
+ {
+ {
+ std::unique_lock<std::mutex> lock(m_mutex);
+ m_is_aborted = true;
+ }
+
+ m_dequeue_cv.notify_all();
+ }
+
+ void clear_abort()
+ {
+ std::unique_lock<std::mutex> lock(m_mutex);
+ m_is_aborted = false;
+ }
+
+ size_t max_size() const { return m_max_size; }
+
+private:
+ // TODO: use SpscQueue (HRT-10554)
+ const size_t m_max_size;
+ std::mutex m_mutex;
+ bool m_is_aborted = false;
+ std::condition_variable m_dequeue_cv;
+ std::queue<TransferRequest> m_queue;
+};
+
+class ScheduledAsyncInputStream : public ScheduledInputStreamBase {
+public:
+
+ static Expected<std::unique_ptr<ScheduledAsyncInputStream>> create(
+ std::map<device_id_t, std::reference_wrapper<VdmaInputStreamBase>> &&streams,
+ const scheduler_core_op_handle_t &core_op_handle,
+ EventPtr &&core_op_activated_event,
+ const LayerInfo &layer_info,
+ CoreOpsSchedulerWeakPtr core_ops_scheduler);
+
+ ScheduledAsyncInputStream(
+ std::map<device_id_t, std::reference_wrapper<VdmaInputStreamBase>> &&streams,
+ const scheduler_core_op_handle_t &core_op_handle,
+ EventPtr &&core_op_activated_event,
+ const LayerInfo &layer_info,
+ CoreOpsSchedulerWeakPtr core_ops_scheduler,
+ size_t max_queue_size,
+ hailo_status &status) :
+ ScheduledInputStreamBase(std::move(streams), core_op_handle, std::move(core_op_activated_event), layer_info,
+ core_ops_scheduler, status),
+ m_pending_buffers(max_queue_size),
+ m_callback_reorder_queue(max_queue_size) // TODO HRT-1058 - use reorder queue only when needed
+ {}
+
+ virtual hailo_status send_pending_buffer(const device_id_t &device_id) override;
+ virtual hailo_status wait_for_async_ready(size_t transfer_size, std::chrono::milliseconds timeout) override;
+ virtual hailo_status write_async(TransferRequest &&transfer_request) override;
+ virtual Expected<size_t> get_async_max_queue_size() const override;
+ virtual hailo_status abort() override;
+ virtual hailo_status clear_abort() override;
+
+protected:
+ virtual hailo_status write_impl(const MemoryView &, const std::function<bool()> &) override;
+
+ // All buffers written by the user using write_async are first stored in this queue.
+ // When the scheduler decides to activate the network on a specific device, send_pending_buffer is called, and
+ // the buffers are sent to the underlying stream.
+ TransferRequestsQueue m_pending_buffers;
+ CallbackReorderQueue m_callback_reorder_queue;
+};
+
+class ScheduledOutputStreamBase : public VDeviceOutputStreamBase {
+public:
+ ScheduledOutputStreamBase(
+ std::map<device_id_t, std::reference_wrapper<VdmaOutputStreamBase>> &&streams,
+ const scheduler_core_op_handle_t &core_op_handle,
+ const LayerInfo &layer_info,
+ EventPtr &&core_op_activated_event,
+ CoreOpsSchedulerWeakPtr core_ops_scheduler,
+ hailo_status &status) :
+ VDeviceOutputStreamBase(std::move(streams), layer_info, std::move(core_op_activated_event), status),
m_core_op_handle(core_op_handle),
m_core_ops_scheduler(core_ops_scheduler)
{}
+ virtual bool is_scheduled() override { return true; };
+
virtual hailo_status abort() override;
virtual hailo_status clear_abort() override;
- virtual bool is_scheduled() override { return true; };
protected:
- virtual hailo_status read(MemoryView buffer) override;
- hailo_status read_impl(MemoryView buffer, scheduler_core_op_handle_t core_op_handle);
scheduler_core_op_handle_t m_core_op_handle;
CoreOpsSchedulerWeakPtr m_core_ops_scheduler;
hailo_status clear_abort_impl(scheduler_core_op_handle_t core_op_handle);
};
+
+class ScheduledOutputStream : public ScheduledOutputStreamBase {
+public:
+ static Expected<std::unique_ptr<ScheduledOutputStream>> create(
+ std::map<device_id_t, std::reference_wrapper<VdmaOutputStreamBase>> &&streams,
+ const scheduler_core_op_handle_t &core_op_handle,
+ const LayerInfo &layer_info,
+ EventPtr &&core_op_activated_event,
+ CoreOpsSchedulerWeakPtr core_ops_scheduler);
+
+ ScheduledOutputStream(
+ std::map<device_id_t, std::reference_wrapper<VdmaOutputStreamBase>> &&streams,
+ const scheduler_core_op_handle_t &core_op_handle,
+ const LayerInfo &layer_info,
+ EventPtr &&core_op_activated_event,
+ CoreOpsSchedulerWeakPtr core_ops_scheduler,
+ hailo_status &status);
+
+ virtual hailo_status set_next_device_to_read(const device_id_t &device_id) override;
+
+protected:
+ virtual hailo_status read(MemoryView buffer) override;
+
+private:
+
+ // Returns device id to read from
+ Expected<device_id_t> wait_for_read();
+
+ std::queue<device_id_t> m_device_read_order;
+ std::mutex m_device_read_order_mutex;
+};
+
} /* namespace hailort */
#endif /* HAILO_SCHEDULED_STREAM_HPP_ */
--- /dev/null
+/**
+ * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+ **/
+/**
+ * @file scheduler.cpp
+ * @brief: Network scheduler
+ **/
+
+#include "common/os_utils.hpp"
+
+
+#include "vdevice/scheduler/scheduler.hpp"
+#include "vdevice/vdevice_core_op.hpp"
+#include "vdevice/scheduler/scheduler_oracle.hpp"
+#include "vdevice/vdevice_stream_multiplexer_wrapper.hpp"
+#include "hef/hef_internal.hpp"
+#include "utils/profiler/tracer_macros.hpp"
+
+#include <fstream>
+
+
+namespace hailort
+{
+
+#define SINGLE_CONTEXT_BATCH_SIZE (1)
+#define DEFAULT_BURST_SIZE (1)
+
+// TODO: use device handles instead device count
+CoreOpsScheduler::CoreOpsScheduler(hailo_scheduling_algorithm_t algorithm, std::vector<std::string> &devices_ids,
+ std::vector<std::string> &devices_arch) :
+ SchedulerBase(algorithm, devices_ids, devices_arch),
+ m_should_core_op_stop(),
+ m_before_read_write_mutex(),
+ m_core_ops_cvs(),
+ m_scheduler_cv()
+{
+ TRACE(SchedulerStartTrace, get_device_count());
+ for (const auto &pair : m_devices) {
+ auto &device_info = pair.second;
+ TRACE(AddDeviceTrace, device_info->device_id, device_info->device_arch);
+ }
+
+ m_is_running = true;
+ m_scheduler_thread = std::thread(&CoreOpsScheduler::worker_thread_main, this);
+ m_execute_worker_thread = true;
+}
+
+CoreOpsScheduler::~CoreOpsScheduler()
+{
+ for (const auto &pair : m_devices) {
+ auto &device_info = pair.second;
+ if (INVALID_CORE_OP_HANDLE != device_info->current_core_op_handle) {
+ auto current_core_op = m_scheduled_core_ops[device_info->current_core_op_handle]->get_core_op();
+ auto current_core_op_bundle = std::dynamic_pointer_cast<VDeviceCoreOp>(current_core_op);
+ assert(nullptr != current_core_op_bundle);
+ auto vdma_core_op = current_core_op_bundle->get_core_op_by_device_id(device_info->device_id);
+ if (!vdma_core_op) {
+ LOGGER__ERROR("Error retrieving core-op in scheduler destructor");
+ } else {
+ if (HAILO_SUCCESS != VdmaConfigManager::deactivate_core_op(vdma_core_op.value())) {
+ LOGGER__ERROR("Error deactivating core-op when destroying scheduler");
+ }
+ }
+ }
+ }
+
+ // signal scheduler thread to stop and join
+ {
+ std::unique_lock<std::mutex> lock(m_before_read_write_mutex);
+ m_is_running = false;
+ m_execute_worker_thread = true;
+ }
+ m_scheduler_cv.notify_one();
+ if (m_scheduler_thread.joinable()) {
+ m_scheduler_thread.join();
+ }
+}
+
+Expected<CoreOpsSchedulerPtr> CoreOpsScheduler::create_round_robin(std::vector<std::string> &devices_bdf_id, std::vector<std::string> &devices_arch)
+{
+ auto ptr = make_shared_nothrow<CoreOpsScheduler>(HAILO_SCHEDULING_ALGORITHM_ROUND_ROBIN, devices_bdf_id, devices_arch);
+ CHECK_AS_EXPECTED(nullptr != ptr, HAILO_OUT_OF_HOST_MEMORY);
+
+ return ptr;
+}
+
+std::string CoreOpsScheduler::get_core_op_name(const scheduler_core_op_handle_t &core_op_handle)
+{
+ assert(m_scheduled_core_ops.size() > core_op_handle);
+ return m_scheduled_core_ops[core_op_handle]->get_core_op_name();
+}
+
+Expected<scheduler_core_op_handle_t > CoreOpsScheduler::add_core_op(std::shared_ptr<CoreOp> added_cng)
+{
+ scheduler_core_op_handle_t core_op_handle = INVALID_CORE_OP_HANDLE;
+ {
+ std::unique_lock<std::mutex> lock(m_before_read_write_mutex);
+ core_op_handle = static_cast<uint32_t>(m_scheduled_core_ops.size());
+
+ auto stream_infos = added_cng->get_all_stream_infos();
+ CHECK_EXPECTED(stream_infos);
+
+ auto scheduled_core_op = ScheduledCoreOp::create(added_cng, stream_infos.value());
+ CHECK_EXPECTED(scheduled_core_op);
+
+ bool is_nms = scheduled_core_op->get()->is_nms();
+ TRACE(AddCoreOpTrace, "", added_cng->name(), DEFAULT_SCHEDULER_TIMEOUT.count(), DEFAULT_SCHEDULER_MIN_THRESHOLD,
+ core_op_handle, is_nms);
+
+ m_scheduled_core_ops.emplace_back(scheduled_core_op.release());
+
+
+ for (const auto &stream_info : stream_infos.value()) {
+ m_should_core_op_stop[core_op_handle][stream_info.name] = false;
+ }
+
+ for (const auto &pair : m_devices) {
+ auto &device_info = pair.second;
+ for (const auto &stream_info : stream_infos.value()) {
+ if (HAILO_H2D_STREAM == stream_info.direction) {
+ device_info->current_cycle_requested_transferred_frames_h2d[core_op_handle][stream_info.name] = 0;
+ } else {
+ device_info->current_cycle_finished_transferred_frames_d2h[core_op_handle][stream_info.name] = 0;
+ device_info->pending_to_read_frames[core_op_handle][stream_info.name] = 0;
+ }
+ }
+ }
+
+ auto network_cvs = ScheduledCoreOpCV::create(added_cng);
+ CHECK_EXPECTED(network_cvs);
+ m_core_ops_cvs[core_op_handle] = network_cvs.release();
+ m_core_op_priority[HAILO_SCHEDULER_PRIORITY_NORMAL].emplace_back(core_op_handle);
+ }
+
+ return core_op_handle;
+}
+
+bool CoreOpsScheduler::is_core_op_active(const scheduler_core_op_handle_t &core_op_handle)
+{
+ for (const auto &pair : m_devices) {
+ auto &device_info = pair.second;
+ if (core_op_handle == device_info->current_core_op_handle) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+bool CoreOpsScheduler::is_multi_device()
+{
+ return m_devices.size() > 1;
+}
+
+hailo_status CoreOpsScheduler::signal_frame_pending_to_send(const scheduler_core_op_handle_t &core_op_handle, const std::string &stream_name)
+{
+ {
+ std::unique_lock<std::mutex> lock(m_before_read_write_mutex);
+ assert(m_scheduled_core_ops.size() > core_op_handle);
+ auto scheduled_core_op = m_scheduled_core_ops[core_op_handle];
+
+ if (should_core_op_stop(core_op_handle)) {
+ return HAILO_STREAM_ABORTED_BY_USER;
+ }
+
+ TRACE(WriteFrameTrace, "", core_op_handle, stream_name);
+
+ m_scheduled_core_ops[core_op_handle]->mark_frame_sent();
+ scheduled_core_op->pending_to_send_frames().increase(stream_name);
+ m_execute_worker_thread = true;
+ }
+ m_scheduler_cv.notify_one();
+
+ return HAILO_SUCCESS;
+}
+
+hailo_status CoreOpsScheduler::switch_core_op(const scheduler_core_op_handle_t &core_op_handle, const device_id_t &device_id, bool /*keep_nn_config*/)
+{
+ auto scheduled_core_op = m_scheduled_core_ops[core_op_handle];
+ assert(contains(m_devices, device_id));
+ auto curr_device_info = m_devices[device_id];
+ curr_device_info->is_switching_core_op = false;
+
+ // initialize current cycle maps
+ for (const auto &name : scheduled_core_op->get_inputs_names()) {
+ curr_device_info->current_cycle_requested_transferred_frames_h2d[core_op_handle][name] = 0;
+ }
+
+ for (const auto &name : scheduled_core_op->get_outputs_names()) {
+ curr_device_info->current_cycle_finished_transferred_frames_d2h[core_op_handle][name] = 0;
+ }
+
+ uint16_t batch_size = std::min(scheduled_core_op->get_max_batch_size(), get_min_avail_buffers_count(core_op_handle, device_id));
+ uint16_t hw_batch_size = SINGLE_CONTEXT_BATCH_SIZE;
+
+ if (scheduled_core_op->use_dynamic_batch_flow()) {
+ batch_size = std::min(static_cast<uint16_t>(scheduled_core_op->pending_to_send_frames_min_value()), batch_size);
+ hw_batch_size = batch_size;
+ }
+
+ if (batch_size == 0) {
+ return HAILO_SUCCESS;
+ }
+
+ bool has_same_hw_batch_size_as_previous = scheduled_core_op->use_dynamic_batch_flow() ? (curr_device_info->current_batch_size == batch_size) : true;
+ curr_device_info->current_batch_size = batch_size;
+
+ if ((core_op_handle != curr_device_info->current_core_op_handle) || (!has_same_hw_batch_size_as_previous)) {
+ assert(m_scheduled_core_ops.size() > core_op_handle);
+ auto next_active_cng = scheduled_core_op->get_core_op();
+ auto next_active_cng_wrapper = std::dynamic_pointer_cast<VDeviceCoreOp>(next_active_cng);
+ assert(nullptr != next_active_cng_wrapper);
+ auto next_active_cng_expected = next_active_cng_wrapper->get_core_op_by_device_id(curr_device_info->device_id);
+ CHECK_EXPECTED_AS_STATUS(next_active_cng_expected);
+
+ std::shared_ptr<VdmaConfigCoreOp> current_active_vdma_cng = nullptr;
+ if (curr_device_info->current_core_op_handle != INVALID_CORE_OP_HANDLE) {
+ auto current_active_cng = m_scheduled_core_ops[curr_device_info->current_core_op_handle]->get_core_op();
+ auto current_active_cng_bundle = std::dynamic_pointer_cast<VDeviceCoreOp>(current_active_cng);
+ assert(nullptr != current_active_cng_bundle);
+ auto current_active_cng_expected = current_active_cng_bundle->get_core_op_by_device_id(curr_device_info->device_id);
+ CHECK_EXPECTED_AS_STATUS(current_active_cng_expected);
+ current_active_vdma_cng = current_active_cng_expected.release();
+
+ // Flushing h2d channel in order to make sure we got all interrupts before switching the network.
+ for (auto &stream : current_active_vdma_cng->get_input_streams()) {
+ auto status = stream.get().flush();
+ if (HAILO_STREAM_ABORTED_BY_USER == status) {
+ continue;
+ }
+ CHECK_SUCCESS(status);
+ }
+ }
+
+ TRACE(SwitchCoreOpTrace, device_id, core_op_handle);
+ static const auto RESUME_PENDING_STREAM_TRANSFERS = true;
+ auto status = VdmaConfigManager::switch_core_op(current_active_vdma_cng, next_active_cng_expected.value(), hw_batch_size,
+ RESUME_PENDING_STREAM_TRANSFERS);
+ CHECK_SUCCESS(status, "Failed switching core-op");
+ }
+
+ scheduled_core_op->set_last_run_timestamp(std::chrono::steady_clock::now()); // Mark timestamp on activation
+ curr_device_info->current_core_op_handle = core_op_handle;
+
+ auto status = send_all_pending_buffers(core_op_handle, device_id, batch_size);
+ if (HAILO_STREAM_ABORTED_BY_USER == status) {
+ LOGGER__INFO("send_all_pending_buffers has failed with status=HAILO_STREAM_ABORTED_BY_USER");
+ return status;
+ }
+ CHECK_SUCCESS(status);
+
+ return HAILO_SUCCESS;
+}
+
+void CoreOpsScheduler::signal_read_finish_impl(const scheduler_core_op_handle_t &core_op_handle,
+ const std::string &stream_name, const device_id_t &device_id)
+{
+ TRACE(ReadFrameTrace, "", core_op_handle, stream_name);
+
+ auto scheduled_core_op = m_scheduled_core_ops[core_op_handle];
+ scheduled_core_op->requested_read_frames().decrease(stream_name);
+ scheduled_core_op->finished_read_frames().increase(stream_name);
+ scheduled_core_op->d2h_finished_transferred_frames().decrease(stream_name);
+
+ if (m_devices[device_id]->pending_to_read_frames[core_op_handle][stream_name] > 0) {
+ m_devices[device_id]->pending_to_read_frames[core_op_handle][stream_name]--;
+ }
+
+ decrease_core_op_counters(core_op_handle);
+
+ auto has_drained_everything = has_core_op_drained_everything(core_op_handle, device_id);
+ if (scheduled_core_op->is_nms() && has_drained_everything) {
+ // In NMS networks there is possibility that next wasn't choosen yet
+ choose_next_core_op(device_id, true);
+
+ // If we didn't choose with threshold or timeout lets choose without threshold
+ if (!m_devices[device_id]->is_switching_core_op) {
+ choose_next_core_op(device_id, false);
+ }
+
+ if (has_drained_everything) {
+ TRACE(CoreOpIdleTrace, device_id, core_op_handle);
+ }
+ }
+
+ m_execute_worker_thread = true;
+}
+
+hailo_status CoreOpsScheduler::send_all_pending_buffers(const scheduler_core_op_handle_t &core_op_handle, const device_id_t &device_id, uint32_t burst_size)
+{
+ auto current_device_info = m_devices[device_id];
+ if ((INVALID_CORE_OP_HANDLE == current_device_info->current_core_op_handle) || (current_device_info->current_core_op_handle != core_op_handle)) {
+ return HAILO_SUCCESS;
+ }
+
+ auto scheduled_core_op = m_scheduled_core_ops[core_op_handle];
+
+ for (size_t i = 0; i < burst_size; i++) {
+ auto finished_send = false;
+ for (const auto &name : scheduled_core_op->get_inputs_names()) {
+ if (scheduled_core_op->pending_to_send_frames(name) == 0) {
+ finished_send = true;
+ break;
+ }
+ }
+ if (finished_send) {
+ break;
+ }
+
+ for (const auto &name : scheduled_core_op->get_outputs_names()) {
+ auto output_stream = scheduled_core_op->get_core_op()->get_output_stream_by_name(name);
+ CHECK_EXPECTED_AS_STATUS(output_stream);
+
+ auto &output_stream_base = static_cast<OutputStreamBase&>(output_stream->get());
+ auto status = output_stream_base.set_next_device_to_read(device_id);
+ CHECK_SUCCESS(status);
+ }
+
+ for (const auto &name : scheduled_core_op->get_inputs_names()) {
+ auto status = send_pending_buffer(core_op_handle, name, device_id);
+ if (HAILO_STREAM_ABORTED_BY_USER == status) {
+ LOGGER__INFO("send_pending_buffer has failed with status=HAILO_STREAM_ABORTED_BY_USER");
+ return status;
+ }
+ CHECK_SUCCESS(status);
+ }
+ scheduled_core_op->set_last_device(device_id);
+ }
+
+ return HAILO_SUCCESS;
+}
+
+hailo_status CoreOpsScheduler::send_pending_buffer(const scheduler_core_op_handle_t &core_op_handle, const std::string &stream_name,
+ const device_id_t &device_id)
+{
+ assert(m_scheduled_core_ops.size() > core_op_handle);
+ auto scheduled_core_op = m_scheduled_core_ops[core_op_handle];
+
+ auto current_cng = scheduled_core_op->get_core_op();
+ auto input_stream = current_cng->get_input_stream_by_name(stream_name);
+ CHECK_EXPECTED_AS_STATUS(input_stream);
+
+ auto &input_stream_base = static_cast<InputStreamBase&>(input_stream->get());
+ auto status = input_stream_base.send_pending_buffer(device_id);
+ if (HAILO_STREAM_ABORTED_BY_USER == status) {
+ LOGGER__INFO("send_pending_buffer has failed with status=HAILO_STREAM_ABORTED_BY_USER");
+ return status;
+ }
+ CHECK_SUCCESS(status);
+
+ TRACE(InputVdmaDequeueTrace, device_id, core_op_handle, stream_name);
+
+ m_devices[device_id]->current_cycle_requested_transferred_frames_h2d[core_op_handle][stream_name]++;
+ scheduled_core_op->pending_to_send_frames().decrease(stream_name);
+ // Notifying for flush
+ m_core_ops_cvs[core_op_handle]->notify_one(stream_name);
+
+ scheduled_core_op->h2d_finished_transferred_frames().increase(stream_name);
+
+ if (should_core_op_stop(core_op_handle)) {
+ return HAILO_STREAM_ABORTED_BY_USER;
+ }
+
+ return HAILO_SUCCESS;
+}
+
+CoreOpsScheduler::ReadyInfo CoreOpsScheduler::is_core_op_ready(const scheduler_core_op_handle_t &core_op_handle, bool check_threshold)
+{
+ ReadyInfo result;
+ result.is_ready = false;
+
+ if (should_core_op_stop(core_op_handle)) {
+ // Do not switch to an aborted core-op
+ return result;
+ }
+
+ auto scheduled_core_op = m_scheduled_core_ops[core_op_handle];
+ // Check if there arent any write requests
+ const bool has_pending_writes = scheduled_core_op->pending_to_send_frames_min_value() > 0;
+
+ // Check for read request on all the output streams
+ const bool has_avail_pending_to_read_buffers = get_min_avail_output_buffers(core_op_handle) > 0;
+
+ std::vector<bool> over_threshold;
+ over_threshold.reserve(scheduled_core_op->get_inputs_names().size());
+ std::vector<bool> over_timeout;
+ over_timeout.reserve(scheduled_core_op->get_inputs_names().size());
+
+ if (check_threshold) {
+ for (const auto &name : scheduled_core_op->get_inputs_names()) {
+ auto threshold_exp = scheduled_core_op->get_threshold(name);
+ if (!threshold_exp) {
+ LOGGER__ERROR("Failed to get threshold for stream {}", name);
+ return result;
+ }
+ auto threshold = (DEFAULT_SCHEDULER_MIN_THRESHOLD == threshold_exp.value()) ? 1 : threshold_exp.value();
+ auto timeout_exp = scheduled_core_op->get_timeout();
+ if (!timeout_exp) {
+ LOGGER__ERROR("Failed to get timeout for stream {}", name);
+ return result;
+ }
+ auto timeout = timeout_exp.release();
+
+ // Check if there arent enough write requests to reach threshold and timeout didnt passed
+ uint32_t write_requests = scheduled_core_op->pending_to_send_frames(name);
+ auto stream_over_threshold = write_requests >= threshold;
+ auto stream_over_timeout = timeout <= (std::chrono::steady_clock::now() - scheduled_core_op->get_last_run_timestamp());
+ over_threshold.push_back(stream_over_threshold);
+ over_timeout.push_back(stream_over_timeout);
+ if (stream_over_threshold || stream_over_timeout) {
+ continue;
+ } else {
+ result.is_ready = false;
+ return result;
+ }
+ }
+ result.over_threshold = std::all_of(over_threshold.begin(), over_threshold.end(), [](auto over) { return over; });
+ result.over_timeout = std::all_of(over_timeout.begin(), over_timeout.end(), [](auto over) { return over; });
+ }
+
+ result.is_ready = has_pending_writes && has_avail_pending_to_read_buffers;
+
+ return result;
+}
+
+hailo_status CoreOpsScheduler::wait_for_read(const scheduler_core_op_handle_t &core_op_handle,
+ const std::string &stream_name, const std::chrono::milliseconds &timeout, const std::function<bool()> &predicate)
+{
+ std::unique_lock<std::mutex> lock(m_before_read_write_mutex);
+
+ hailo_status status = HAILO_SUCCESS;
+ auto wait_res = m_core_ops_cvs[core_op_handle]->wait_for(stream_name, lock, timeout,
+ [this, core_op_handle, predicate, &stream_name, &status] {
+ if (m_should_core_op_stop[core_op_handle][stream_name]) {
+ status = HAILO_STREAM_ABORTED_BY_USER;
+ return true; // return true so that the wait will finish
+ }
+
+ return predicate();
+ });
+ CHECK(wait_res, HAILO_TIMEOUT, "{} (D2H) failed with status={}, timeout={}ms", stream_name, HAILO_TIMEOUT, timeout.count());
+ if (HAILO_STREAM_ABORTED_BY_USER == status) {
+ return status;
+ }
+ CHECK_SUCCESS(status);
+
+ return HAILO_SUCCESS;
+}
+
+hailo_status CoreOpsScheduler::signal_frame_pending_to_read(const scheduler_core_op_handle_t &core_op_handle, const std::string &stream_name)
+{
+ {
+ std::unique_lock<std::mutex> lock(m_before_read_write_mutex);
+
+ auto scheduled_core_op = m_scheduled_core_ops[core_op_handle];
+ scheduled_core_op->requested_read_frames().increase(stream_name);
+ m_execute_worker_thread = true;
+ }
+ m_scheduler_cv.notify_one();
+
+ return HAILO_SUCCESS;
+}
+
+void CoreOpsScheduler::signal_frame_transferred_d2h(const scheduler_core_op_handle_t &core_op_handle,
+ const std::string &stream_name, const device_id_t &device_id)
+{
+ {
+ std::unique_lock<std::mutex> lock(m_before_read_write_mutex);
+
+ auto scheduled_core_op = m_scheduled_core_ops[core_op_handle];
+ if (!scheduled_core_op->is_nms()) {
+ TRACE(OutputVdmaEnqueueTrace, "", core_op_handle, stream_name, 1);
+ // TODO: Remove d2h_finished_transferred_frames and use current_cycle_finished_transferred_frames_d2h instead
+ scheduled_core_op->d2h_finished_transferred_frames().increase(stream_name);
+ m_devices[device_id]->pending_to_read_frames[core_op_handle][stream_name] += 1;
+ m_devices[device_id]->current_cycle_finished_transferred_frames_d2h[core_op_handle][stream_name] += 1;
+ }
+
+ auto has_drained_everything = has_core_op_drained_everything(core_op_handle, device_id);
+
+ if (has_drained_everything) {
+ TRACE(CoreOpIdleTrace, device_id, core_op_handle);
+ }
+
+ // If ng finished and we didn't choose next lets choose without checking threshold
+ if (!m_devices[device_id]->is_switching_core_op && has_drained_everything) {
+ auto was_chosen = choose_next_core_op(device_id, true);
+ if (!was_chosen) {
+ choose_next_core_op(device_id, false);
+ }
+ }
+
+ if (m_devices[device_id]->is_switching_core_op) {
+ m_execute_worker_thread = true;
+ }
+ }
+
+ // Notify stream that new frame was accepted (wait_for read operation)
+ m_core_ops_cvs[core_op_handle]->notify_one(stream_name);
+ m_scheduler_cv.notify_one();
+}
+
+hailo_status CoreOpsScheduler::signal_read_finish(const scheduler_core_op_handle_t &core_op_handle,
+ const std::string &stream_name, const device_id_t &device_id)
+{
+ {
+ std::unique_lock<std::mutex> lock(m_before_read_write_mutex);
+ signal_read_finish_impl(core_op_handle, stream_name, device_id);
+ }
+ m_scheduler_cv.notify_one();
+ return HAILO_SUCCESS;
+}
+
+void CoreOpsScheduler::decrease_core_op_counters(const scheduler_core_op_handle_t &core_op_handle)
+{
+ return m_scheduled_core_ops[core_op_handle]->decrease_current_core_op_counters();
+}
+
+bool CoreOpsScheduler::has_core_op_drained_everything(const scheduler_core_op_handle_t &core_op_handle, const device_id_t &device_id)
+{
+ if (core_op_all_streams_aborted(core_op_handle)) {
+ // We treat core-op as drained only if all streams are aborted - to make sure there aren't any ongoing transfers
+ return true;
+ }
+
+ if (INVALID_CORE_OP_HANDLE == core_op_handle) {
+ // If no core-op is running, consider it as drained
+ return true;
+ }
+
+ if ((!m_scheduled_core_ops[core_op_handle]->is_nms()) && (is_multi_device() || m_scheduled_core_ops[core_op_handle]->use_dynamic_batch_flow())) {
+ auto current_device_info = m_devices[device_id];
+ auto max_transferred_h2d = get_max_value_of_unordered_map(current_device_info->current_cycle_requested_transferred_frames_h2d[core_op_handle]);
+ auto min_transferred_d2h = get_min_value_of_unordered_map(current_device_info->current_cycle_finished_transferred_frames_d2h[core_op_handle]);
+
+ return (max_transferred_h2d == min_transferred_d2h);
+ }
+
+ return m_scheduled_core_ops[core_op_handle]->has_core_op_drained_everything();
+}
+
+hailo_status CoreOpsScheduler::flush_pending_buffers(const scheduler_core_op_handle_t &core_op_handle, const std::string &stream_name,
+ const std::chrono::milliseconds &timeout)
+{
+ std::unique_lock<std::mutex> lock(m_before_read_write_mutex);
+
+ hailo_status status = HAILO_SUCCESS;
+ auto wait_res = m_core_ops_cvs[core_op_handle]->wait_for(stream_name, lock, timeout,
+ [this, core_op_handle, &stream_name, &status] {
+ if (should_core_op_stop(core_op_handle)) {
+ status = HAILO_STREAM_ABORTED_BY_USER;
+ return true; // return true so that the wait will finish
+ }
+
+ assert(m_scheduled_core_ops.size() > core_op_handle);
+ auto scheduled_core_op = m_scheduled_core_ops[core_op_handle];
+ auto pending = scheduled_core_op->pending_to_send_frames(stream_name).load();
+ return (pending == 0);
+ });
+ CHECK(wait_res, HAILO_TIMEOUT, "{} (H2D) failed with status={}, timeout={}ms", stream_name, HAILO_TIMEOUT, timeout.count());
+ if (HAILO_STREAM_ABORTED_BY_USER == status) {
+ LOGGER__INFO("flush pending buffers was aborted in stream ={}", stream_name);
+ return status;
+ }
+ CHECK_SUCCESS(status);
+
+ return HAILO_SUCCESS;
+}
+
+hailo_status CoreOpsScheduler::enable_stream(const scheduler_core_op_handle_t &core_op_handle, const std::string &stream_name)
+{
+ {
+ std::unique_lock<std::mutex> lock(m_before_read_write_mutex);
+
+ if (!m_should_core_op_stop[core_op_handle][stream_name]) {
+ return HAILO_SUCCESS;
+ }
+
+ m_should_core_op_stop[core_op_handle][stream_name] = false;
+ }
+ m_core_ops_cvs[core_op_handle]->notify_all();
+
+ return HAILO_SUCCESS;
+}
+
+hailo_status CoreOpsScheduler::disable_stream(const scheduler_core_op_handle_t &core_op_handle, const std::string &stream_name)
+{
+ {
+ std::unique_lock<std::mutex> lock(m_before_read_write_mutex);
+
+ if (m_should_core_op_stop[core_op_handle][stream_name]) {
+ return HAILO_SUCCESS;
+ }
+
+ m_should_core_op_stop[core_op_handle][stream_name] = true;
+ }
+ m_core_ops_cvs[core_op_handle]->notify_all();
+
+ return HAILO_SUCCESS;
+}
+
+hailo_status CoreOpsScheduler::set_timeout(const scheduler_core_op_handle_t &core_op_handle, const std::chrono::milliseconds &timeout, const std::string &/*network_name*/)
+{
+ // TODO: call in loop for set_timeout with the relevant stream-names (of the given network)
+ return m_scheduled_core_ops[core_op_handle]->set_timeout(timeout);
+}
+
+hailo_status CoreOpsScheduler::set_threshold(const scheduler_core_op_handle_t &core_op_handle, uint32_t threshold, const std::string &/*network_name*/)
+{
+ // TODO: call in loop for set_timeout with the relevant stream-names (of the given network)
+ return m_scheduled_core_ops[core_op_handle]->set_threshold(threshold);
+}
+
+hailo_status CoreOpsScheduler::set_priority(const scheduler_core_op_handle_t &core_op_handle, core_op_priority_t priority, const std::string &/*network_name*/)
+{
+ CHECK(priority <= HAILO_SCHEDULER_PRIORITY_MAX, HAILO_INVALID_ARGUMENT);
+ std::unique_lock<std::mutex> lock(m_before_read_write_mutex);
+ auto old_priority = m_scheduled_core_ops[core_op_handle]->get_priority();
+ auto &priority_vector = m_core_op_priority[old_priority];
+ auto it = std::find(priority_vector.begin(), priority_vector.end(), core_op_handle);
+ CHECK(it != priority_vector.end(), HAILO_INTERNAL_FAILURE);
+
+ priority_vector.erase(it);
+ m_scheduled_core_ops[core_op_handle]->set_priority(priority);
+ m_core_op_priority[priority].push_back(core_op_handle);
+
+ return HAILO_SUCCESS;
+}
+
+bool CoreOpsScheduler::choose_next_core_op(const device_id_t &device_id, bool check_threshold)
+{
+ if (!m_devices[device_id]->is_switching_core_op) {
+ return CoreOpsSchedulerOracle::choose_next_model(*this, m_devices[device_id]->device_id, check_threshold) != INVALID_CORE_OP_HANDLE;
+ }
+ return false;
+}
+
+bool CoreOpsScheduler::should_core_op_stop(const scheduler_core_op_handle_t &core_op_handle)
+{
+ for (const auto &name_flag_pair : m_should_core_op_stop[core_op_handle]) {
+ if (name_flag_pair.second) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+bool CoreOpsScheduler::core_op_all_streams_aborted(const scheduler_core_op_handle_t &core_op_handle)
+{
+ for (const auto &name_flag_pair : m_should_core_op_stop[core_op_handle]) {
+ if (!name_flag_pair.second) {
+ return false;
+ }
+ }
+ return true;
+}
+
+void CoreOpsScheduler::notify_all()
+{
+ {
+ // Acquire mutex to make sure the notify_all will wake the blocking threads on the cv
+ std::unique_lock<std::mutex> lock(m_before_read_write_mutex);
+ }
+ // TODO: consider notify only the relevant ng or stream
+ for (auto &cng_cvs : m_core_ops_cvs) {
+ cng_cvs.second->notify_all();
+ }
+}
+
+hailo_status CoreOpsScheduler::optimize_streaming_if_enabled(const scheduler_core_op_handle_t &core_op_handle)
+{
+ auto scheduled_core_op = m_scheduled_core_ops[core_op_handle];
+ if (!scheduled_core_op->use_dynamic_batch_flow()) {
+ auto next_pair = m_devices.upper_bound(scheduled_core_op->get_last_device()); // Get last device and go to the next device in the map
+ if (m_devices.end() == next_pair){ // In case we reached to the end of the map - start from the beggining
+ next_pair = m_devices.begin();
+ }
+ auto &device_info = next_pair->second;
+ if (device_info->current_core_op_handle == core_op_handle && !device_info->is_switching_core_op &&
+ !CoreOpsSchedulerOracle::should_stop_streaming(*this, scheduled_core_op->get_priority(), device_info->device_id) &&
+ (get_min_avail_buffers_count(core_op_handle, device_info->device_id) >= DEFAULT_BURST_SIZE)) {
+ auto status = send_all_pending_buffers(core_op_handle, device_info->device_id, DEFAULT_BURST_SIZE);
+ if (HAILO_STREAM_ABORTED_BY_USER == status) {
+ LOGGER__INFO("send_all_pending_buffers has failed with status=HAILO_STREAM_ABORTED_BY_USER");
+ return status;
+ }
+ CHECK_SUCCESS(status);
+ }
+ }
+ return HAILO_SUCCESS;
+}
+
+uint16_t CoreOpsScheduler::get_min_avail_buffers_count(const scheduler_core_op_handle_t &core_op_handle, const device_id_t &device_id)
+{
+ auto scheduled_core_op = m_scheduled_core_ops[core_op_handle];
+ auto device_info = m_devices[device_id];
+
+ uint16_t avail_buffer_count = UINT16_MAX;
+ for (auto &output_stream : scheduled_core_op->get_core_op()->get_output_streams()) {
+ auto &vdevice_output = static_cast<OutputStreamBase&>(output_stream.get());
+ if (auto buffer_size_in_frames = vdevice_output.get_buffer_frames_size()) {
+ auto &pending_frames_in_buffer = device_info->pending_to_read_frames[core_op_handle][vdevice_output.name()];
+ auto ongoing_frames = get_max_value_of_unordered_map(device_info->current_cycle_requested_transferred_frames_h2d[core_op_handle]) -
+ device_info->current_cycle_finished_transferred_frames_d2h[core_op_handle][vdevice_output.name()];
+ assert(*buffer_size_in_frames >= (pending_frames_in_buffer + ongoing_frames));
+ avail_buffer_count = std::min(avail_buffer_count, static_cast<uint16_t>(*buffer_size_in_frames - pending_frames_in_buffer - ongoing_frames));
+ }
+ }
+
+ auto transferred_frames = get_max_value_of_unordered_map(device_info->current_cycle_requested_transferred_frames_h2d[core_op_handle]) -
+ get_min_value_of_unordered_map(device_info->current_cycle_finished_transferred_frames_d2h[core_op_handle]);
+ if (is_multi_device()) {
+ auto avail_input_buffer_count = static_cast<uint16_t>((scheduled_core_op->get_min_input_buffers_count()) - transferred_frames);
+ avail_buffer_count = std::min(avail_input_buffer_count, avail_buffer_count);
+ }
+
+ return avail_buffer_count;
+}
+
+uint16_t CoreOpsScheduler::get_min_avail_output_buffers(const scheduler_core_op_handle_t &core_op_handle)
+{
+ auto scheduled_core_op = m_scheduled_core_ops[core_op_handle];
+ auto sent_frames = scheduled_core_op->h2d_finished_transferred_frames_max_value() -
+ scheduled_core_op->finished_read_frames_min_value();
+
+ return static_cast<uint16_t>((scheduled_core_op->get_min_output_buffers_count()) - sent_frames);
+}
+
+void CoreOpsScheduler::worker_thread_main()
+{
+ OsUtils::set_current_thread_name("SCHEDULER");
+ std::unique_lock<std::mutex> lock(m_before_read_write_mutex);
+ while (m_is_running) {
+
+ m_scheduler_cv.wait(lock, [this]() {
+ return m_execute_worker_thread.load();
+ });
+ m_execute_worker_thread = false;
+
+ if (!m_is_running) {
+ break;
+ }
+
+ for (uint32_t core_op_handle = 0; core_op_handle < m_scheduled_core_ops.size(); core_op_handle++) {
+ auto status = optimize_streaming_if_enabled(core_op_handle);
+ if (HAILO_STREAM_ABORTED_BY_USER == status) {
+ continue;
+ }
+
+ if (HAILO_SUCCESS != status) {
+ if (m_is_running) {
+ LOGGER__ERROR("Scheduler thread failed with status={}", status);
+ }
+ break;
+ }
+ }
+
+ auto oracle_decisions = CoreOpsSchedulerOracle::get_oracle_decisions(*this);
+
+ for (const auto &run_params : oracle_decisions) {
+ auto status = switch_core_op(run_params.core_op_handle, run_params.device_id);
+ if (HAILO_STREAM_ABORTED_BY_USER == status) {
+ continue;
+ }
+
+ if (HAILO_SUCCESS != status) {
+ if (m_is_running) {
+ LOGGER__ERROR("Scheduler thread failed with status={}", status);
+ }
+ break;
+ }
+ }
+ }
+}
+
+} /* namespace hailort */
\ No newline at end of file
--- /dev/null
+/**
+ * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+ **/
+/**
+ * @file scheduler.hpp
+ * @brief Class declaration for CoreOpsScheduler that schedules core-ops to be active depending on the scheduling algorithm.
+ **/
+
+#ifndef _HAILO_SCHEDULER_HPP_
+#define _HAILO_SCHEDULER_HPP_
+
+#include "hailo/hailort.h"
+#include "hailo/expected.hpp"
+
+#include "common/utils.hpp"
+#include "common/filesystem.hpp"
+
+#include "vdevice/scheduler/scheduled_core_op_state.hpp"
+#include "vdevice/scheduler/scheduled_core_op_cv.hpp"
+#include "vdevice/scheduler/scheduler_base.hpp"
+
+
+namespace hailort
+{
+
+#define INVALID_CORE_OP_HANDLE (UINT32_MAX)
+
+using scheduler_core_op_handle_t = uint32_t;
+using core_op_priority_t = uint8_t;
+
+class CoreOpsScheduler;
+using CoreOpsSchedulerPtr = std::shared_ptr<CoreOpsScheduler>;
+
+// We use mostly weak pointer for the scheduler to prevent circular dependency of the pointers
+using CoreOpsSchedulerWeakPtr = std::weak_ptr<CoreOpsScheduler>;
+
+using stream_name_t = std::string;
+
+class CoreOpsScheduler : public SchedulerBase
+{
+public:
+ static Expected<CoreOpsSchedulerPtr> create_round_robin(std::vector<std::string> &devices_ids,
+ std::vector<std::string> &devices_arch);
+ CoreOpsScheduler(hailo_scheduling_algorithm_t algorithm, std::vector<std::string> &devices_ids,
+ std::vector<std::string> &devices_arch);
+
+ virtual ~CoreOpsScheduler();
+ CoreOpsScheduler(const CoreOpsScheduler &other) = delete;
+ CoreOpsScheduler &operator=(const CoreOpsScheduler &other) = delete;
+ CoreOpsScheduler &operator=(CoreOpsScheduler &&other) = delete;
+ CoreOpsScheduler(CoreOpsScheduler &&other) noexcept = delete;
+
+ Expected<scheduler_core_op_handle_t> add_core_op(std::shared_ptr<CoreOp> added_core_op);
+
+ hailo_status signal_frame_pending_to_send(const scheduler_core_op_handle_t &core_op_handle, const std::string &stream_name);
+
+ hailo_status wait_for_read(const scheduler_core_op_handle_t &core_op_handle, const std::string &stream_name,
+ const std::chrono::milliseconds &timeout, const std::function<bool()> &predicate);
+
+ hailo_status signal_frame_pending_to_read(const scheduler_core_op_handle_t &core_op_handle, const std::string &stream_name);
+
+ void signal_frame_transferred_d2h(const scheduler_core_op_handle_t &core_op_handle,
+ const std::string &stream_name, const device_id_t &device_id);
+ hailo_status signal_read_finish(const scheduler_core_op_handle_t &core_op_handle, const std::string &stream_name,
+ const device_id_t &device_id);
+
+ hailo_status enable_stream(const scheduler_core_op_handle_t &core_op_handle, const std::string &stream_name);
+ hailo_status disable_stream(const scheduler_core_op_handle_t &core_op_handle, const std::string &stream_name);
+
+ hailo_status set_timeout(const scheduler_core_op_handle_t &core_op_handle, const std::chrono::milliseconds &timeout, const std::string &network_name);
+ hailo_status set_threshold(const scheduler_core_op_handle_t &core_op_handle, uint32_t threshold, const std::string &network_name);
+ hailo_status set_priority(const scheduler_core_op_handle_t &core_op_handle, core_op_priority_t priority, const std::string &network_name);
+
+ virtual ReadyInfo is_core_op_ready(const scheduler_core_op_handle_t &core_op_handle, bool check_threshold) override;
+ virtual bool has_core_op_drained_everything(const scheduler_core_op_handle_t &core_op_handle, const device_id_t &device_id) override;
+ hailo_status flush_pending_buffers(const scheduler_core_op_handle_t &core_op_handle, const std::string &stream_name, const std::chrono::milliseconds &timeout);
+
+ void notify_all();
+
+protected:
+ bool choose_next_core_op(const device_id_t &device_id, bool check_threshold);
+
+ std::unordered_map<scheduler_core_op_handle_t, std::map<stream_name_t, std::atomic_bool>> m_should_core_op_stop;
+
+private:
+ hailo_status switch_core_op(const scheduler_core_op_handle_t &core_op_handle, const device_id_t &device_id,
+ bool keep_nn_config = false);
+ // Needs to be called with m_before_read_write_mutex held.
+ void signal_read_finish_impl(const scheduler_core_op_handle_t &core_op_handle, const std::string &stream_name,
+ const device_id_t &device_id);
+
+ hailo_status send_all_pending_buffers(const scheduler_core_op_handle_t &core_op_handle, const device_id_t &device_id, uint32_t burst_size);
+ hailo_status send_pending_buffer(const scheduler_core_op_handle_t &core_op_handle, const std::string &stream_name, const device_id_t &device_id);
+
+ void decrease_core_op_counters(const scheduler_core_op_handle_t &core_op_handle);
+ bool should_core_op_stop(const scheduler_core_op_handle_t &core_op_handle);
+ bool core_op_all_streams_aborted(const scheduler_core_op_handle_t &core_op_handle);
+
+ std::string get_core_op_name(const scheduler_core_op_handle_t &core_op_handle);
+ bool is_core_op_active(const scheduler_core_op_handle_t &core_op_handle);
+ bool is_multi_device();
+
+ hailo_status optimize_streaming_if_enabled(const scheduler_core_op_handle_t &core_op_handle);
+ uint16_t get_min_avail_buffers_count(const scheduler_core_op_handle_t &core_op_handle, const device_id_t &device_id);
+ uint16_t get_min_avail_output_buffers(const scheduler_core_op_handle_t &core_op_handle);
+
+ void worker_thread_main();
+
+ std::vector<std::shared_ptr<ScheduledCoreOp>> m_scheduled_core_ops;
+ std::mutex m_before_read_write_mutex;
+ std::unordered_map<scheduler_core_op_handle_t, std::shared_ptr<ScheduledCoreOpCV>> m_core_ops_cvs;
+
+ std::atomic_bool m_is_running;
+ std::atomic_bool m_execute_worker_thread;
+ std::thread m_scheduler_thread;
+ std::condition_variable m_scheduler_cv;
+};
+} /* namespace hailort */
+
+#endif /* _HAILO_SCHEDULER_HPP_ */
#include "common/utils.hpp"
#include "common/filesystem.hpp"
+#include "stream_common/stream_internal.hpp"
+
#include <condition_variable>
#define DEFAULT_SCHEDULER_MIN_THRESHOLD (0)
#define INVALID_CORE_OP_HANDLE (UINT32_MAX)
-#define INVALID_DEVICE_ID (UINT32_MAX)
using scheduler_core_op_handle_t = uint32_t;
using core_op_priority_t = uint8_t;
using stream_name_t = std::string;
struct ActiveDeviceInfo {
- ActiveDeviceInfo(uint32_t device_id, const std::string &device_bdf_id, const std::string &device_arch) :
+ ActiveDeviceInfo(const device_id_t &device_id, const std::string &device_arch) :
current_core_op_handle(INVALID_CORE_OP_HANDLE), next_core_op_handle(INVALID_CORE_OP_HANDLE), is_switching_core_op(false),
current_batch_size(0), current_cycle_requested_transferred_frames_h2d(), current_cycle_finished_transferred_frames_d2h(),
- current_cycle_finished_read_frames_d2h(), device_id(device_id), device_bdf_id(device_bdf_id), device_arch(device_arch)
+ pending_to_read_frames(), device_id(device_id), device_arch(device_arch)
{}
scheduler_core_op_handle_t current_core_op_handle;
scheduler_core_op_handle_t next_core_op_handle;
std::atomic_uint32_t current_batch_size;
std::unordered_map<scheduler_core_op_handle_t, std::unordered_map<stream_name_t, std::atomic_uint32_t>> current_cycle_requested_transferred_frames_h2d;
std::unordered_map<scheduler_core_op_handle_t, std::unordered_map<stream_name_t, std::atomic_uint32_t>> current_cycle_finished_transferred_frames_d2h;
- std::unordered_map<scheduler_core_op_handle_t, std::unordered_map<stream_name_t, std::atomic_uint32_t>> current_cycle_finished_read_frames_d2h;
- uint32_t device_id;
- std::string device_bdf_id;
+ std::unordered_map<scheduler_core_op_handle_t, std::unordered_map<stream_name_t, std::atomic_uint32_t>> pending_to_read_frames;
+ device_id_t device_id;
std::string device_arch;
};
}
struct ReadyInfo {
- bool threshold = false;
- bool timeout = false;
+ bool over_threshold = false;
+ bool over_timeout = false;
bool is_ready = false;
};
virtual ReadyInfo is_core_op_ready(const scheduler_core_op_handle_t &core_op_handle, bool check_threshold) = 0;
- virtual bool has_core_op_drained_everything(const scheduler_core_op_handle_t &core_op_handle, uint32_t device_id) = 0;
+ virtual bool has_core_op_drained_everything(const scheduler_core_op_handle_t &core_op_handle, const device_id_t &device_id) = 0;
virtual uint32_t get_device_count() const
{
return static_cast<uint32_t>(m_devices.size());
}
- virtual std::shared_ptr<ActiveDeviceInfo> get_devices_info(uint32_t device_id)
+ virtual std::shared_ptr<ActiveDeviceInfo> get_device_info(const device_id_t &device_id)
{
return m_devices[device_id];
}
+
+ virtual std::map<device_id_t, std::shared_ptr<ActiveDeviceInfo>> &get_device_infos()
+ {
+ return m_devices;
+ }
virtual std::map<core_op_priority_t, std::vector<scheduler_core_op_handle_t>> get_core_op_priority_map()
{
return m_core_op_priority;
}
- virtual scheduler_core_op_handle_t get_last_choosen_core_op(core_op_priority_t priority)
+ virtual scheduler_core_op_handle_t get_next_core_op(core_op_priority_t priority)
{
- return m_last_choosen_core_op[priority];
+ if (!contains(m_next_core_op, priority)) {
+ m_next_core_op[priority] = 0;
+ }
+ return m_next_core_op[priority];
}
- virtual void set_last_choosen_core_op(const core_op_priority_t priority, const scheduler_core_op_handle_t &core_op_handle)
+ virtual void set_next_core_op(const core_op_priority_t priority, const scheduler_core_op_handle_t &core_op_handle)
{
- m_last_choosen_core_op[priority] = core_op_handle;
+ m_next_core_op[priority] = core_op_handle;
}
protected:
- SchedulerBase(hailo_scheduling_algorithm_t algorithm, uint32_t device_count, std::vector<std::string> &devices_bdf_id,
+ SchedulerBase(hailo_scheduling_algorithm_t algorithm, std::vector<std::string> &devices_ids,
std::vector<std::string> &devices_arch) : m_algorithm(algorithm)
{
- for (uint32_t i = 0; i < device_count; i++) {
- m_devices.push_back(make_shared_nothrow<ActiveDeviceInfo>(i, devices_bdf_id[i], devices_arch[i]));
+ for (uint32_t i = 0; i < devices_ids.size(); i++) {
+ m_devices[devices_ids.at(i)] = make_shared_nothrow<ActiveDeviceInfo>(devices_ids[i], devices_arch[i]);
}
};
SchedulerBase &operator=(SchedulerBase &&other) = delete;
SchedulerBase(SchedulerBase &&other) noexcept = delete;
- std::vector<std::shared_ptr<ActiveDeviceInfo>> m_devices;
+ std::map<device_id_t, std::shared_ptr<ActiveDeviceInfo>> m_devices;
+
std::map<core_op_priority_t, std::vector<scheduler_core_op_handle_t>> m_core_op_priority;
hailo_scheduling_algorithm_t m_algorithm;
- std::unordered_map<core_op_priority_t, scheduler_core_op_handle_t> m_last_choosen_core_op;
+ std::unordered_map<core_op_priority_t, scheduler_core_op_handle_t> m_next_core_op;
};
} /* namespace hailort */
+++ /dev/null
-/**
- * Copyright (c) 2022 Hailo Technologies Ltd. All rights reserved.
- * Distributed under the MIT license (https://opensource.org/licenses/MIT)
-**/
-/**
- * @file scheduler_mon.hpp
- * @brief Defines for scheduler monitor of networks.
- **/
-
-#ifndef _HAILO_SCHEDULER_MON_HPP_
-#define _HAILO_SCHEDULER_MON_HPP_
-
-#include "hailo/hailort.h"
-
-#include "common/filesystem.hpp"
-
-#if defined(_MSC_VER)
-#pragma warning(push)
-#pragma warning(disable: 4244 4267 4127)
-#else
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wconversion"
-#endif
-#include "scheduler_mon.pb.h"
-#if defined(_MSC_VER)
-#pragma warning( pop )
-#else
-#pragma GCC diagnostic pop
-#endif
-
-#include <iostream>
-#include <string>
-
-
-namespace hailort
-{
-
-#define SCHEDULER_MON_TMP_DIR ("/tmp/hmon_files/")
-#define SCHEDULER_MON_ENV_VAR ("HAILO_MONITOR")
-#define DEFAULT_SCHEDULER_MON_INTERVAL (std::chrono::seconds(1))
-#define SCHEDULER_MON_NAN_VAL (-1)
-
-class SchedulerMon
-{
-public:
-
- static bool should_monitor()
- {
- #if defined(__GNUC__)
- auto mon_var = std::getenv(SCHEDULER_MON_ENV_VAR);
- return (mon_var != nullptr) && strncmp(mon_var, "1", 1) == 0;
- #else
- // TODO: HRT-7304 - Add support for windows
- return false;
- #endif
- }
-};
-
-} /* namespace hailort */
-
-#endif /* _HAILO_SCHEDULER_MON_HPP_ */
namespace hailort
{
-bool CoreOpsSchedulerOracle::choose_next_model(SchedulerBase &scheduler, uint32_t device_id, bool check_threshold)
+scheduler_core_op_handle_t CoreOpsSchedulerOracle::choose_next_model(SchedulerBase &scheduler, const device_id_t &device_id, bool check_threshold)
{
- auto device_info = scheduler.get_devices_info(device_id);
+ auto device_info = scheduler.get_device_info(device_id);
auto priority_map = scheduler.get_core_op_priority_map();
for (auto iter = priority_map.rbegin(); iter != priority_map.rend(); ++iter) {
auto priority_group_size = iter->second.size();
for (uint32_t i = 0; i < priority_group_size; i++) {
- uint32_t index = scheduler.get_last_choosen_core_op(iter->first) + i + 1;
+ uint32_t index = scheduler.get_next_core_op(iter->first) + i;
index %= static_cast<uint32_t>(priority_group_size);
auto core_op_handle = iter->second[index];
- if (!is_core_op_active(scheduler, core_op_handle)) {
- auto ready_info = scheduler.is_core_op_ready(core_op_handle, check_threshold);
- if (ready_info.is_ready) {
- TRACE(ChooseCoreOpTrace, "", core_op_handle, ready_info.threshold, ready_info.timeout, iter->first);
- device_info->is_switching_core_op = true;
- device_info->next_core_op_handle = core_op_handle;
- scheduler.set_last_choosen_core_op(iter->first, index);
-
- return true;
- }
+ auto ready_info = scheduler.is_core_op_ready(core_op_handle, check_threshold);
+ if (ready_info.is_ready) {
+ TRACE(ChooseCoreOpTrace, "", core_op_handle, ready_info.over_threshold, ready_info.over_timeout, iter->first);
+ device_info->is_switching_core_op = true;
+ device_info->next_core_op_handle = core_op_handle;
+ // Set next to run as next in round-robin
+ index = ((index + 1) % static_cast<uint32_t>(priority_group_size));
+ scheduler.set_next_core_op(iter->first, index);
+ return core_op_handle;
}
}
}
- return false;
+ return INVALID_CORE_OP_HANDLE;
}
-// TODO: return device handle instead index
-uint32_t CoreOpsSchedulerOracle::get_avail_device(SchedulerBase &scheduler, scheduler_core_op_handle_t core_op_handle)
-{
- const bool check_threshold = false;
- auto device_count = scheduler.get_device_count();
-
- // Check if should be next
- /* Checking (INVALID_CORE_OP_HANDLE == m_current_core_op) for activating the first time the scheduler is running.
- In this case we don't want to check threshold. */
- for (uint32_t device_index = 0; device_index < device_count; device_index++) {
- auto active_device_info = scheduler.get_devices_info(device_index);
- if (active_device_info->is_switching_core_op && scheduler.has_core_op_drained_everything(active_device_info->current_core_op_handle, active_device_info->device_id) &&
- (((INVALID_CORE_OP_HANDLE == active_device_info->current_core_op_handle) &&
- scheduler.is_core_op_ready(core_op_handle, check_threshold).is_ready) ||
- (active_device_info->next_core_op_handle == core_op_handle))) {
- return active_device_info->device_id;
- }
- }
-
- // Check if device Idle
- // We dont need to check if the core op is ready, because the device is idle and if we arrive here frame is already sent and as a space in the output buffer.
- for (uint32_t device_index = 0; device_index < device_count; device_index++) {
- auto active_device_info = scheduler.get_devices_info(device_index);
- if (!active_device_info->is_switching_core_op && scheduler.has_core_op_drained_everything(active_device_info->current_core_op_handle, active_device_info->device_id)) {
- return active_device_info->device_id;
- }
- }
-
- return INVALID_DEVICE_ID;
-}
-
-bool CoreOpsSchedulerOracle::should_stop_streaming(SchedulerBase &scheduler, core_op_priority_t core_op_priority)
+bool CoreOpsSchedulerOracle::should_stop_streaming(SchedulerBase &scheduler, core_op_priority_t core_op_priority, const device_id_t &device_id)
{
auto priority_map = scheduler.get_core_op_priority_map();
for (auto iter = priority_map.rbegin(); (iter != priority_map.rend()) && (iter->first >= core_op_priority); ++iter) {
auto priority_group_size = iter->second.size();
for (uint32_t i = 0; i < priority_group_size; i++) {
- uint32_t index = scheduler.get_last_choosen_core_op(iter->first) + i + 1;
+ uint32_t index = scheduler.get_next_core_op(iter->first) + i;
index %= static_cast<uint32_t>(priority_group_size);
auto core_op_handle = iter->second[index];
// We dont want to stay with the same network group if there is a other qualified network group
- if ((!is_core_op_active(scheduler, core_op_handle)) && scheduler.is_core_op_ready(core_op_handle, true).is_ready) {
+ if ((!is_core_op_active(scheduler, core_op_handle)) && scheduler.is_core_op_ready(core_op_handle, true).is_ready &&
+ is_core_op_finished_batch(scheduler, device_id)) {
return true;
}
}
bool CoreOpsSchedulerOracle::is_core_op_active(SchedulerBase &scheduler, scheduler_core_op_handle_t core_op_handle)
{
- auto device_count = scheduler.get_device_count();
- for (uint32_t device_index = 0; device_index < device_count; device_index++) {
- auto active_device_info = scheduler.get_devices_info(device_index);
+ auto &devices = scheduler.get_device_infos();
+ for (const auto &pair : devices) {
+ auto &active_device_info = pair.second;
if (core_op_handle == active_device_info->current_core_op_handle) {
return true;
}
return false;
}
+bool CoreOpsSchedulerOracle::is_core_op_finished_batch(SchedulerBase &scheduler, const device_id_t &device_id)
+{
+ auto device_info = scheduler.get_device_info(device_id);
+ auto max_transferred_h2d = get_max_value_of_unordered_map(device_info->current_cycle_requested_transferred_frames_h2d[device_info->current_core_op_handle]);
+
+ return device_info->current_batch_size <= max_transferred_h2d;
+}
+
+std::vector<RunParams> CoreOpsSchedulerOracle::get_oracle_decisions(SchedulerBase &scheduler)
+{
+ auto &devices = scheduler.get_device_infos();
+ std::vector<RunParams> oracle_decision;
+
+ for (const auto &pair : devices) {
+ auto &active_device_info = pair.second;
+
+ // Check if device is switching ng
+ if (active_device_info->is_switching_core_op) {
+ oracle_decision.push_back({active_device_info->next_core_op_handle, active_device_info->device_id});
+ }
+
+ // Check if device is idle
+ if (!active_device_info->is_switching_core_op &&
+ scheduler.has_core_op_drained_everything(active_device_info->current_core_op_handle, active_device_info->device_id)) {
+ auto core_op_handle = choose_next_model(scheduler, active_device_info->device_id, false);
+ if (core_op_handle != INVALID_CORE_OP_HANDLE) {
+ oracle_decision.push_back({core_op_handle, active_device_info->device_id});
+ }
+ }
+ }
+
+ return oracle_decision;
+}
+
} /* namespace hailort */
namespace hailort
{
+struct RunParams {
+ scheduler_core_op_handle_t core_op_handle;
+ device_id_t device_id;
+};
+
class CoreOpsSchedulerOracle
{
public:
- static bool choose_next_model(SchedulerBase &scheduler, uint32_t device_id, bool check_threshold);
- static uint32_t get_avail_device(SchedulerBase &scheduler, scheduler_core_op_handle_t core_op_handle);
- static bool should_stop_streaming(SchedulerBase &scheduler, core_op_priority_t core_op_priority);
+ static scheduler_core_op_handle_t choose_next_model(SchedulerBase &scheduler, const device_id_t &device_id, bool check_threshold);
+ static std::vector<RunParams> get_oracle_decisions(SchedulerBase &scheduler);
+ static bool should_stop_streaming(SchedulerBase &scheduler, core_op_priority_t core_op_priority, const device_id_t &device_id);
private:
CoreOpsSchedulerOracle() {}
// TODO: Consider returning a vector of devices (we can use this function in other places)
static bool is_core_op_active(SchedulerBase &scheduler, scheduler_core_op_handle_t core_op_handle);
+ static bool is_core_op_finished_batch(SchedulerBase &scheduler, const device_id_t &device_id);
};
} /* namespace hailort */
static hailo_status validate_device_ids_match(const hailo_vdevice_params_t ¶ms,
const std::set<std::string> &old_ids)
{
- std::set<std::string> new_ids;
+ const auto group_id_name = (nullptr == params.group_id ? "NULL" : params.group_id);
+ CHECK(old_ids.size() == static_cast<size_t>(params.device_count), HAILO_INVALID_OPERATION,
+ "VDevice invalid device count for group_id {}", group_id_name);
+
for (uint32_t i = 0; i < params.device_count; i++) {
- // TODO: maybe needs to normalize domain?
- new_ids.insert(params.device_ids[i].id);
+ auto device_id_found = std::find_if(old_ids.begin(), old_ids.end(),
+ [&](const std::string &device_id) {
+ return Device::device_ids_equal(params.device_ids[i].id, device_id);
+ });
+ CHECK(device_id_found != old_ids.end(), HAILO_INVALID_OPERATION,
+ "Device {} not used by group_id {}", params.device_ids[i].id, group_id_name);
}
- CHECK(old_ids == new_ids, HAILO_INVALID_OPERATION, "Different VDevice ids used by group_id {}", (nullptr == params.group_id ? "NULL" : params.group_id));
return HAILO_SUCCESS;
}
-hailo_status validate_same_vdevice(const hailo_vdevice_params_t ¶ms, const VDevice &vdevice)
+static hailo_status validate_same_vdevice(const hailo_vdevice_params_t ¶ms, const VDevice &vdevice)
{
// Validate device ids
if (params.device_ids != nullptr) {
Expected<std::unique_ptr<VDevice>> VDeviceHandle::create(const hailo_vdevice_params_t ¶ms)
{
- auto status = VDeviceBase::validate_params(params);
- CHECK_SUCCESS_AS_EXPECTED(status);
-
auto &manager = SharedResourceManager<std::string, VDeviceBase>::get_instance();
auto create = [¶ms]() {
return VDeviceBase::create(params);
#ifdef HAILO_SUPPORT_MULTI_PROCESS
-VDeviceClient::VDeviceClient(std::unique_ptr<HailoRtRpcClient> client, uint32_t handle)
+VDeviceClient::VDeviceClient(std::unique_ptr<HailoRtRpcClient> client, uint32_t handle, std::vector<std::unique_ptr<Device>> &&devices)
: m_client(std::move(client))
, m_handle(handle)
+ , m_devices(std::move(devices))
{}
VDeviceClient::~VDeviceClient()
// The vdevice in the service will destruct the ConfiguredNetworkGroupBase,
// and then the ConfiguredNetworkGroupClient destructor will be called - causing double destruction on ConfiguredNetworkGroupBase.
m_network_groups.clear();
- auto reply = m_client->VDevice_release(m_handle);
+ auto reply = m_client->VDevice_release(m_handle, OsUtils::get_curr_pid());
if (reply != HAILO_SUCCESS) {
LOGGER__CRITICAL("VDevice_release failed!");
}
auto reply = client->VDevice_create(params, OsUtils::get_curr_pid());
CHECK_EXPECTED(reply);
- auto client_vdevice = std::unique_ptr<VDeviceClient>(new VDeviceClient(std::move(client), reply.value()));
+ auto handle = reply.value();
+ auto devices = client->VDevice_get_physical_devices(handle);
+ CHECK_EXPECTED(devices);
+
+ auto client_vdevice = std::unique_ptr<VDeviceClient>(new VDeviceClient(std::move(client), handle, devices.release()));
CHECK_AS_EXPECTED(client_vdevice != nullptr, HAILO_OUT_OF_HOST_MEMORY);
return std::unique_ptr<VDevice>(std::move(client_vdevice));
Expected<std::vector<std::reference_wrapper<Device>>> VDeviceClient::get_physical_devices() const
{
- LOGGER__ERROR("ConfiguredNetworkGroup::get_physical_devices function is not supported when using multi-process service");
- return make_unexpected(HAILO_INVALID_OPERATION);
+ std::vector<std::reference_wrapper<Device>> devices_refs;
+
+ for (auto &device : m_devices) {
+ devices_refs.push_back(*device);
+ }
+
+ return devices_refs;
}
Expected<std::vector<std::string>> VDeviceClient::get_physical_devices_ids() const
Expected<std::unique_ptr<VDevice>> VDevice::create(const hailo_vdevice_params_t ¶ms)
{
+ auto status = VDeviceBase::validate_params(params);
+ CHECK_SUCCESS_AS_EXPECTED(status);
+
std::unique_ptr<VDevice> vdevice;
+
if (params.multi_process_service) {
#ifdef HAILO_SUPPORT_MULTI_PROCESS
+ CHECK_AS_EXPECTED(params.scheduling_algorithm != HAILO_SCHEDULING_ALGORITHM_NONE, HAILO_INVALID_ARGUMENT,
+ "Multi-process service is supported only with HailoRT scheduler, please choose scheduling algorithm");
auto expected_vdevice = VDeviceClient::create(params);
CHECK_EXPECTED(expected_vdevice);
vdevice = expected_vdevice.release();
device_archs.reserve(params.device_count);
std::string vdevice_ids = "VDevice Infos:";
- for (const auto &device : devices) {
+ for (const auto &pair : devices) {
+ auto &device = pair.second;
auto id_info_str = device->get_dev_id();
device_ids.emplace_back(id_info_str);
auto device_arch = device->get_architecture();
CoreOpsSchedulerPtr scheduler_ptr;
if (HAILO_SCHEDULING_ALGORITHM_NONE != params.scheduling_algorithm) {
if (HAILO_SCHEDULING_ALGORITHM_ROUND_ROBIN == params.scheduling_algorithm) {
- auto core_ops_scheduler = CoreOpsScheduler::create_round_robin(params.device_count, device_ids, device_archs);
+ auto core_ops_scheduler = CoreOpsScheduler::create_round_robin(device_ids, device_archs);
CHECK_EXPECTED(core_ops_scheduler);
scheduler_ptr = core_ops_scheduler.release();
} else {
for (const auto &network_params_pair : local_config_params.value()) {
std::vector<std::shared_ptr<CoreOp>> core_ops;
+ const bool use_multiplexer = should_use_multiplexer(network_params_pair.second);
+
std::shared_ptr<VDeviceCoreOp> identical_core_op = nullptr;
- if (m_core_ops_scheduler && PipelineMultiplexer::should_use_multiplexer()) {
+ if (use_multiplexer) {
for (auto &network_group : m_vdevice_core_ops) {
- if ((network_group->equals(hef, network_params_pair)) && (1 == network_group->get_input_streams().size())) {
- // TODO (HRT-8634): Support multi-inputs NGs (multi networks)
+ if (network_group->multiplexer_supported() && network_group->equals(hef, network_params_pair)) {
identical_core_op = network_group;
break;
}
}
}
- std::shared_ptr<VDeviceCoreOp> vdevice_netwrok_group = nullptr;
+ std::shared_ptr<VDeviceCoreOp> vdevice_network_group = nullptr;
if (identical_core_op) {
- auto vdevice_netwrok_group_exp = VDeviceCoreOp::duplicate(identical_core_op);
- CHECK_EXPECTED(vdevice_netwrok_group_exp);
+ auto vdevice_network_group_exp = VDeviceCoreOp::duplicate(identical_core_op);
+ CHECK_EXPECTED(vdevice_network_group_exp);
- vdevice_netwrok_group = vdevice_netwrok_group_exp.release();
- vdevice_netwrok_group->set_core_op_handle(identical_core_op->core_op_handle());
- vdevice_netwrok_group->create_vdevice_streams_from_duplicate(identical_core_op);
+ vdevice_network_group = vdevice_network_group_exp.release();
+ vdevice_network_group->set_core_op_handle(identical_core_op->core_op_handle());
+ auto status = vdevice_network_group->create_vdevice_streams_from_duplicate(identical_core_op);
+ CHECK_SUCCESS_AS_EXPECTED(status);
} else {
- auto vdevice_netwrok_group_expected = create_vdevice_network_group(hef, network_params_pair);
- CHECK_EXPECTED(vdevice_netwrok_group_expected);
- vdevice_netwrok_group = vdevice_netwrok_group_expected.release();
- m_vdevice_core_ops.push_back(vdevice_netwrok_group);
+ auto vdevice_network_group_expected = create_vdevice_network_group(hef, network_params_pair, use_multiplexer);
+ CHECK_EXPECTED(vdevice_network_group_expected);
+ vdevice_network_group = vdevice_network_group_expected.release();
+ m_vdevice_core_ops.push_back(vdevice_network_group);
}
- core_ops.push_back(vdevice_netwrok_group);
- auto net_flow_ops = hef.pimpl->post_process_ops(vdevice_netwrok_group->name());
- auto net_group_expected = ConfiguredNetworkGroupBase::create(network_params_pair.second, std::move(core_ops), std::move(net_flow_ops));
+ core_ops.push_back(vdevice_network_group);
+ auto metadata = hef.pimpl->network_group_metadata(vdevice_network_group->name());
+ auto net_group_expected = ConfiguredNetworkGroupBase::create(network_params_pair.second, std::move(core_ops), std::move(metadata));
CHECK_EXPECTED(net_group_expected);
auto network_group_ptr = net_group_expected.release();
Expected<hailo_stream_interface_t> VDeviceBase::get_default_streams_interface() const
{
- auto stream_interface = m_devices[0]->get_default_streams_interface();
+ auto stream_interface = m_devices.begin()->second.get()->get_default_streams_interface();
CHECK_EXPECTED(stream_interface);
- for (auto &dev : m_devices) {
+ for (const auto &pair : m_devices) {
+ auto &dev = pair.second;
auto current_stream_interface = dev->get_default_streams_interface();
CHECK_EXPECTED(current_stream_interface);
CHECK_AS_EXPECTED(*current_stream_interface == *stream_interface, HAILO_INTERNAL_FAILURE,
return stream_interface.release();
}
-Expected<std::vector<std::unique_ptr<Device>>> VDeviceBase::create_devices(const hailo_vdevice_params_t ¶ms)
+Expected<std::map<device_id_t, std::unique_ptr<Device>>> VDeviceBase::create_devices(const hailo_vdevice_params_t ¶ms)
{
- std::vector<std::unique_ptr<Device>> devices;
- devices.reserve(params.device_count);
+ std::map<device_id_t, std::unique_ptr<Device>> devices;
const bool user_specific_devices = (params.device_ids != nullptr);
}
CHECK_SUCCESS_AS_EXPECTED(status);
}
- devices.emplace_back(device.release());
+ devices[device_id] = device.release();
}
CHECK_AS_EXPECTED(params.device_count == devices.size(), HAILO_OUT_OF_PHYSICAL_DEVICES,
"Failed to create vdevice. there are not enough free devices. requested: {}, found: {}",
Expected<NetworkGroupsParamsMap> VDeviceBase::create_local_config_params(Hef &hef, const NetworkGroupsParamsMap &configure_params)
{
- for (auto &device : m_devices) {
+ for (const auto &pair : m_devices) {
+ auto &device = pair.second;
auto status = dynamic_cast<DeviceBase&>(*device).check_hef_is_compatible(hef);
CHECK_SUCCESS_AS_EXPECTED(status);
}
auto local_config_params = configure_params;
if (local_config_params.empty()) {
// All stream iface should be the same
- auto config_params_exp = m_devices[0]->create_configure_params(hef);
+ auto config_params_exp = m_devices.begin()->second->create_configure_params(hef);
CHECK_EXPECTED(config_params_exp);
local_config_params = config_params_exp.release();
}
return local_config_params;
}
-Expected<std::shared_ptr<VDeviceCoreOp>> VDeviceBase::create_vdevice_network_group(Hef &hef, const std::pair<const std::string, ConfigureNetworkParams> ¶ms)
+Expected<std::shared_ptr<VDeviceCoreOp>> VDeviceBase::create_vdevice_network_group(Hef &hef,
+ const std::pair<const std::string, ConfigureNetworkParams> ¶ms, bool use_multiplexer)
{
- std::vector<std::shared_ptr<CoreOp>> core_ops_bundle; // bundle of the same CoreOps for all devices
- core_ops_bundle.reserve(m_devices.size());
+ std::map<device_id_t, std::vector<std::shared_ptr<CoreOp>>> core_ops_bundle;
// configure all the devices to this ng and then push the core ops to bundle vector
- for (auto &device : m_devices) {
+ for (const auto &pair : m_devices) {
+ auto &device = pair.second;
auto ng_vector = device->configure(hef, { std::make_pair(params.first, params.second) });
CHECK_EXPECTED(ng_vector);
assert(1 == ng_vector->size());
auto network_group_base = std::dynamic_pointer_cast<ConfiguredNetworkGroupBase>(ng_vector.value()[0]);
+
+ auto networks_info = network_group_base->get_network_infos();
+ CHECK_EXPECTED(networks_info);
+ if (m_core_ops_scheduler && 1 < networks_info->size()) {
+ LOGGER__WARNING("Configuring '{}' which is a multi-networks model with scheduler enabled."
+ " The model will be scheduled only when all inputs and outputs of the network group will be ready",
+ network_group_base->name());
+ }
+
auto ng_core_ops = network_group_base->get_core_ops();
+ auto &core_ops_vector = core_ops_bundle.emplace(device->get_dev_id(), std::vector<std::shared_ptr<CoreOp>>{}).first->second;
- core_ops_bundle.insert(core_ops_bundle.begin(), ng_core_ops.begin(), ng_core_ops.end());
+ core_ops_vector.insert(core_ops_vector.begin(), ng_core_ops.begin(), ng_core_ops.end());
}
- auto vdevice_netwrok_group_exp = VDeviceCoreOp::create(core_ops_bundle, m_core_ops_scheduler, hef.hash());
- CHECK_EXPECTED(vdevice_netwrok_group_exp);
- auto vdevice_netwrok_group = vdevice_netwrok_group_exp.release();
+
+ auto vdevice_network_group_exp = VDeviceCoreOp::create(core_ops_bundle, m_core_ops_scheduler, hef.hash());
+ CHECK_EXPECTED(vdevice_network_group_exp);
+ auto vdevice_network_group = vdevice_network_group_exp.release();
auto ng_handle = INVALID_CORE_OP_HANDLE;
if (m_core_ops_scheduler) {
- auto core_op_handle_exp = m_core_ops_scheduler->add_core_op(vdevice_netwrok_group);
+ auto core_op_handle_exp = m_core_ops_scheduler->add_core_op(vdevice_network_group);
CHECK_EXPECTED(core_op_handle_exp);
ng_handle = core_op_handle_exp.release();
}
- vdevice_netwrok_group->set_core_op_handle(ng_handle);
- auto status = vdevice_netwrok_group->create_vdevice_streams_from_config_params(make_shared_nothrow<PipelineMultiplexer>(), ng_handle);
+ vdevice_network_group->set_core_op_handle(ng_handle);
+
+ std::shared_ptr<PipelineMultiplexer> multiplexer = nullptr;
+ if (use_multiplexer) {
+ multiplexer = make_shared_nothrow<PipelineMultiplexer>();
+ CHECK_NOT_NULL_AS_EXPECTED(multiplexer, HAILO_OUT_OF_HOST_MEMORY);
+ }
+
+ auto status = vdevice_network_group->create_vdevice_streams_from_config_params(multiplexer, ng_handle);
CHECK_SUCCESS_AS_EXPECTED(status);
- return vdevice_netwrok_group;
+ return vdevice_network_group;
}
+bool VDeviceBase::should_use_multiplexer(const ConfigureNetworkParams &network_params)
+{
+ const auto &stream_params_by_name = network_params.stream_params_by_name;
+ const auto input_counts = std::count_if(stream_params_by_name.begin(), stream_params_by_name.end(),
+ [](const std::pair<std::string, hailo_stream_parameters_t> &stream_params) {
+ return HAILO_H2D_STREAM == stream_params.second.direction;
+ });
+
+ const bool has_async_stream = std::any_of(stream_params_by_name.begin(), stream_params_by_name.end(),
+ [](const std::pair<std::string, hailo_stream_parameters_t> &stream_params) {
+ return 0 != (stream_params.second.flags & HAILO_STREAM_FLAGS_ASYNC);
+ });
+
+ return
+ PipelineMultiplexer::is_multiplexer_supported() &&
+ m_core_ops_scheduler &&
+ input_counts == 1 && // TODO (HRT-8634): Support multi-inputs NGs (multi networks)
+ !has_async_stream; // TODO (HRT-10557): Support async multiplexer
+}
} /* namespace hailort */
{
Expected<std::unique_ptr<ActivatedNetworkGroup>> VDeviceActivatedCoreOp::create(
- std::vector<std::shared_ptr<CoreOp>> &core_ops,
+ std::map<device_id_t, std::vector<std::shared_ptr<CoreOp>>> &core_ops,
std::map<std::string, std::shared_ptr<InputStream>> &input_streams,
std::map<std::string, std::shared_ptr<OutputStream>> &output_streams,
const hailo_activate_network_group_params_t &network_group_params,
auto status = HAILO_UNINITIALIZED;
std::vector<std::unique_ptr<ActivatedNetworkGroup>> activated_network_groups;
activated_network_groups.reserve(core_ops.size());
- for (auto core_op : core_ops) {
- auto ang = core_op->create_activated_network_group(network_group_params, dynamic_batch_size,
- resume_pending_stream_transfers);
- CHECK_EXPECTED(ang);
- activated_network_groups.emplace_back(ang.release());
+ for (const auto &pair : core_ops) {
+ auto &core_op_vector = pair.second;
+ for (auto &core_op : core_op_vector) {
+ auto ang = core_op->create_activated_network_group(network_group_params, dynamic_batch_size,
+ resume_pending_stream_transfers);
+ CHECK_EXPECTED(ang);
+ activated_network_groups.emplace_back(ang.release());
+ }
}
auto ang = VDeviceActivatedCoreOp(std::move(activated_network_groups), input_streams, output_streams,
network_group_params, core_op_activated_event, deactivation_time_accumulator, status);
}
-Expected<std::shared_ptr<VDeviceCoreOp>> VDeviceCoreOp::create(std::vector<std::shared_ptr<CoreOp>> core_ops,
+Expected<std::shared_ptr<VDeviceCoreOp>> VDeviceCoreOp::create(const std::map<device_id_t, std::vector<std::shared_ptr<CoreOp>>> &core_ops,
CoreOpsSchedulerWeakPtr core_ops_scheduler, const std::string &hef_hash)
{
auto status = HAILO_UNINITIALIZED;
}
-VDeviceCoreOp::VDeviceCoreOp(std::vector<std::shared_ptr<CoreOp>> core_ops,
+VDeviceCoreOp::VDeviceCoreOp(const std::map<device_id_t, std::vector<std::shared_ptr<CoreOp>>> &core_ops,
CoreOpsSchedulerWeakPtr core_ops_scheduler, const std::string &hef_hash, hailo_status &status) :
- CoreOp(core_ops[0]->m_config_params, core_ops[0]->m_metadata, status),
+ CoreOp((core_ops.begin()->second)[0]->m_config_params, (core_ops.begin()->second)[0]->m_metadata, status),
m_core_ops(std::move(core_ops)),
m_core_ops_scheduler(core_ops_scheduler),
m_scheduler_handle(INVALID_CORE_OP_HANDLE),
Expected<hailo_stream_interface_t> VDeviceCoreOp::get_default_streams_interface()
{
- auto first_streams_interface = m_core_ops[0]->get_default_streams_interface();
+ auto first_streams_interface = (m_core_ops.begin()->second)[0]->get_default_streams_interface();
CHECK_EXPECTED(first_streams_interface);
#ifndef NDEBUG
// Check that all physical devices has the same interface
- for (auto &core_op : m_core_ops) {
- auto iface = core_op->get_default_streams_interface();
- CHECK_EXPECTED(iface);
- CHECK_AS_EXPECTED(iface.value() == first_streams_interface.value(), HAILO_INTERNAL_FAILURE,
- "Not all default stream interfaces are the same");
+ for (const auto &pair : m_core_ops) {
+ auto &core_op_vector = pair.second;
+ for (auto &core_op : core_op_vector) {
+ auto iface = core_op->get_default_streams_interface();
+ CHECK_EXPECTED(iface);
+ CHECK_AS_EXPECTED(iface.value() == first_streams_interface.value(), HAILO_INTERNAL_FAILURE,
+ "Not all default stream interfaces are the same");
+ }
}
#endif
return first_streams_interface;
}
-hailo_status VDeviceCoreOp::create_vdevice_streams_from_config_params(std::shared_ptr<PipelineMultiplexer> multiplexer, scheduler_core_op_handle_t scheduler_handle)
+hailo_status VDeviceCoreOp::create_vdevice_streams_from_config_params(std::shared_ptr<PipelineMultiplexer> multiplexer,
+ scheduler_core_op_handle_t scheduler_handle)
{
// TODO - HRT-6931 - raise error on this case
if (((m_config_params.latency & HAILO_LATENCY_MEASURE) == HAILO_LATENCY_MEASURE) && (1 < m_core_ops.size())) {
TRACE(CreateCoreOpInputStreamsTrace, "", name(), input_stream.first, (uint32_t)expected_queue_size.value());
}
for (const auto &output_stream : m_output_streams) {
- if ((hailo_format_order_t::HAILO_FORMAT_ORDER_HAILO_NMS == (static_cast<OutputStreamBase&>(*output_stream.second).get_layer_info().format.order)) ||
- (HAILO_STREAM_INTERFACE_ETH == static_cast<OutputStreamBase&>(*output_stream.second).get_interface())) {
+ if (hailo_format_order_t::HAILO_FORMAT_ORDER_HAILO_NMS == (static_cast<OutputStreamBase&>(*output_stream.second).get_layer_info().format.order)) {
+ TRACE(CreateCoreOpOutputStreamsTrace, "", name(), output_stream.first, SCHEDULER_MON_NAN_VAL);
+ continue;
+ }
+ if (HAILO_STREAM_INTERFACE_ETH == static_cast<OutputStreamBase&>(*output_stream.second).get_interface()) {
continue;
}
auto expected_queue_size = static_cast<OutputStreamBase&>(*output_stream.second).get_buffer_frames_size();
TRACE(CreateCoreOpOutputStreamsTrace, "", name(), output_stream.first, (uint32_t)expected_queue_size.value());
}
- auto status = m_multiplexer->add_core_op_instance(m_multiplexer_handle, *this);
- CHECK_SUCCESS(status);
+ if (m_multiplexer) {
+ auto status = m_multiplexer->add_core_op_instance(m_multiplexer_handle, *this);
+ CHECK_SUCCESS(status);
+ }
return HAILO_SUCCESS;
}
auto edge_layer = get_layer_info(stream_name);
CHECK_EXPECTED_AS_STATUS(edge_layer);
- if (HailoRTCommon::is_vdma_stream_interface(stream_params.stream_interface)){
- std::vector<std::reference_wrapper<VdmaInputStream>> low_level_streams;
- low_level_streams.reserve(m_core_ops.size());
- for (auto &core_op : m_core_ops) {
- auto stream = core_op->get_input_stream_by_name(stream_name);
- CHECK(stream, HAILO_INTERNAL_FAILURE);
- low_level_streams.emplace_back(dynamic_cast<VdmaInputStream&>(stream.release().get()));
+ if (HailoRTCommon::is_vdma_stream_interface(stream_params.stream_interface)) {
+ std::map<device_id_t, std::reference_wrapper<VdmaInputStreamBase>> low_level_streams;
+ for (const auto &pair : m_core_ops) {
+ auto &device_id = pair.first;
+ auto &core_op_vector = pair.second;
+ for (auto &core_op : core_op_vector) {
+ auto stream = core_op->get_input_stream_by_name(stream_name);
+ CHECK(stream, HAILO_INTERNAL_FAILURE);
+ low_level_streams.emplace(device_id, dynamic_cast<VdmaInputStreamBase&>(stream.release().get()));
+ }
}
- auto input_stream = InputVDeviceBaseStream::create(std::move(low_level_streams), edge_layer.value(),
- scheduler_handle, m_core_op_activated_event, m_core_ops_scheduler);
+ auto input_stream = VDeviceInputStreamBase::create(std::move(low_level_streams), stream_params,
+ edge_layer.value(), scheduler_handle, m_core_op_activated_event, m_core_ops_scheduler);
CHECK_EXPECTED_AS_STATUS(input_stream);
- auto input_stream_wrapper = VDeviceInputStreamMultiplexerWrapper::create(input_stream.release(), edge_layer->network_name, multiplexer, scheduler_handle);
- CHECK_EXPECTED_AS_STATUS(input_stream_wrapper);
- m_input_streams.insert(make_pair(stream_name, input_stream_wrapper.release()));
+
+ if (multiplexer) {
+ auto input_stream_wrapper = VDeviceInputStreamMultiplexerWrapper::create(input_stream.release(), edge_layer->network_name, multiplexer, scheduler_handle);
+ CHECK_EXPECTED_AS_STATUS(input_stream_wrapper);
+ m_input_streams.insert(make_pair(stream_name, input_stream_wrapper.release()));
+ } else {
+ m_input_streams.insert(make_pair(stream_name, input_stream.release()));
+ }
+
} else {
assert(1 == m_core_ops.size());
- auto stream = m_core_ops[0]->get_input_stream_by_name(stream_name);
+ auto stream = (m_core_ops.begin()->second)[0]->get_input_stream_by_name(stream_name);
CHECK(stream, HAILO_INTERNAL_FAILURE);
assert(1 == m_core_ops.size());
- assert(contains(m_core_ops[0]->m_input_streams, stream_name));
- m_input_streams.insert(make_pair(stream_name, m_core_ops[0]->m_input_streams.at(stream_name)));
+ assert(contains((m_core_ops.begin()->second)[0]->m_input_streams, stream_name));
+ m_input_streams.insert(make_pair(stream_name, m_core_ops.begin()->second[0]->m_input_streams.at(stream_name)));
}
return HAILO_SUCCESS;
CHECK_EXPECTED_AS_STATUS(edge_layer);
if (HailoRTCommon::is_vdma_stream_interface(stream_params.stream_interface)) {
- std::vector<std::reference_wrapper<VdmaOutputStream>> low_level_streams;
- low_level_streams.reserve(m_core_ops.size());
- for (auto &core_op : m_core_ops) {
- auto stream = core_op->get_output_stream_by_name(stream_name);
- CHECK(stream, HAILO_INTERNAL_FAILURE);
- low_level_streams.emplace_back(dynamic_cast<VdmaOutputStream&>(stream.release().get()));
+ std::map<device_id_t, std::reference_wrapper<VdmaOutputStreamBase>> low_level_streams;
+ for (const auto &pair : m_core_ops) {
+ auto &device_id = pair.first;
+ auto &core_op_vector = pair.second;
+ for (auto &core_op : core_op_vector) {
+ auto stream = core_op->get_output_stream_by_name(stream_name);
+ CHECK(stream, HAILO_INTERNAL_FAILURE);
+ low_level_streams.emplace(device_id, dynamic_cast<VdmaOutputStreamBase&>(stream.release().get()));
+ }
}
- auto output_stream = OutputVDeviceBaseStream::create(std::move(low_level_streams), edge_layer.value(),
- scheduler_handle, m_core_op_activated_event, m_core_ops_scheduler);
+ auto output_stream = VDeviceOutputStreamBase::create(std::move(low_level_streams), stream_params,
+ edge_layer.value(), scheduler_handle, m_core_op_activated_event, m_core_ops_scheduler);
CHECK_EXPECTED_AS_STATUS(output_stream);
- auto output_stream_wrapper = VDeviceOutputStreamMultiplexerWrapper::create(output_stream.release(), edge_layer->network_name, multiplexer, scheduler_handle);
- CHECK_EXPECTED_AS_STATUS(output_stream_wrapper);
- m_output_streams.insert(make_pair(stream_name, output_stream_wrapper.release()));
+
+ if (multiplexer) {
+ // We allow multiplexer only on scheduled streams.
+ auto output_stream_wrapper = VDeviceOutputStreamMultiplexerWrapper::create(output_stream.release(), edge_layer->network_name, multiplexer, scheduler_handle);
+ CHECK_EXPECTED_AS_STATUS(output_stream_wrapper);
+ m_output_streams.insert(make_pair(stream_name, output_stream_wrapper.release()));
+ } else {
+ m_output_streams.insert(make_pair(stream_name, output_stream.release()));
+ }
} else {
assert(1 == m_core_ops.size());
- assert(contains(m_core_ops[0]->m_output_streams, stream_name));
- m_output_streams.insert(make_pair(stream_name, m_core_ops[0]->m_output_streams.at(stream_name)));
+ assert(contains((m_core_ops.begin()->second)[0]->m_output_streams, stream_name));
+ m_output_streams.insert(make_pair(stream_name, (m_core_ops.begin()->second)[0]->m_output_streams.at(stream_name)));
}
return HAILO_SUCCESS;
LOGGER__WARNING("Latency measurement is not supported on more than 1 physical device.");
}
+ assert(other->m_multiplexer != nullptr);
m_multiplexer = other->m_multiplexer;
m_multiplexer_handle = other->multiplexer_duplicates_count() + 1;
Expected<std::shared_ptr<LatencyMetersMap>> VDeviceCoreOp::get_latency_meters()
{
- return m_core_ops[0]->get_latency_meters();
+ return m_core_ops.begin()->second[0]->get_latency_meters();
}
Expected<vdma::BoundaryChannelPtr> VDeviceCoreOp::get_boundary_vdma_channel_by_stream_name(const std::string &stream_name)
CHECK_AS_EXPECTED(1 == m_core_ops.size(), HAILO_INVALID_OPERATION,
"get_boundary_vdma_channel_by_stream_name function is not supported on more than 1 physical device.");
- return m_core_ops[0]->get_boundary_vdma_channel_by_stream_name(stream_name);
+ return m_core_ops.begin()->second[0]->get_boundary_vdma_channel_by_stream_name(stream_name);
}
void VDeviceCoreOp::set_vstreams_multiplexer_callbacks(std::vector<OutputVStream> &output_vstreams)
}
}
-Expected<std::shared_ptr<VdmaConfigCoreOp>> VDeviceCoreOp::get_core_op_by_device_index(uint32_t device_index)
+Expected<std::shared_ptr<VdmaConfigCoreOp>> VDeviceCoreOp::get_core_op_by_device_id(const device_id_t &device_id)
{
- CHECK_AS_EXPECTED(device_index < m_core_ops.size(), HAILO_INVALID_ARGUMENT);
- auto core_op = std::dynamic_pointer_cast<VdmaConfigCoreOp>(m_core_ops[device_index]);
+ CHECK_AS_EXPECTED(m_core_ops.count(device_id), HAILO_INVALID_ARGUMENT);
+ auto core_op = std::dynamic_pointer_cast<VdmaConfigCoreOp>(m_core_ops[device_id][0]);
CHECK_NOT_NULL_AS_EXPECTED(core_op, HAILO_INTERNAL_FAILURE);
return core_op;
}
return res;
}
+Expected<HwInferResults> VDeviceCoreOp::run_hw_infer_estimator()
+{
+ CHECK_AS_EXPECTED(1 == m_core_ops.size(), HAILO_INVALID_OPERATION,
+ "run_hw_infer_estimator function is not supported on more than 1 physical device.");
+ return m_core_ops.begin()->second[0]->run_hw_infer_estimator();
+}
+
} /* namespace hailort */
#include "hailo/network_group.hpp"
#include "hailo/vstream.hpp"
-#include "vdevice/scheduler/network_group_scheduler.hpp"
+#include "vdevice/scheduler/scheduler.hpp"
#include "vdevice/pipeline_multiplexer.hpp"
#include <cstdint>
class VDeviceActivatedCoreOp : public ActivatedCoreOp
{
public:
- static Expected<std::unique_ptr<ActivatedNetworkGroup>> create(std::vector<std::shared_ptr<CoreOp>> &core_ops,
+ static Expected<std::unique_ptr<ActivatedNetworkGroup>> create(std::map<device_id_t, std::vector<std::shared_ptr<CoreOp>>> &core_ops,
std::map<std::string, std::shared_ptr<InputStream>> &input_streams,
std::map<std::string, std::shared_ptr<OutputStream>> &output_streams,
const hailo_activate_network_group_params_t &network_group_params, EventPtr core_op_activated_event,
class VDeviceCoreOp : public CoreOp
{
public:
- static Expected<std::shared_ptr<VDeviceCoreOp>> create(std::vector<std::shared_ptr<CoreOp>> core_ops,
+ static Expected<std::shared_ptr<VDeviceCoreOp>> create(const std::map<device_id_t, std::vector<std::shared_ptr<CoreOp>>> &core_ops,
CoreOpsSchedulerWeakPtr core_ops_scheduler, const std::string &hef_hash);
static Expected<std::shared_ptr<VDeviceCoreOp>> duplicate(std::shared_ptr<VDeviceCoreOp> other);
return false;
}
- uint32_t multiplexer_duplicates_count()
+ uint32_t multiplexer_duplicates_count() const
{
- assert(m_multiplexer->instances_count() > 0);
- return static_cast<uint32_t>(m_multiplexer->instances_count() - 1);
+ if (m_multiplexer) {
+ assert(m_multiplexer->instances_count() > 0);
+ return static_cast<uint32_t>(m_multiplexer->instances_count() - 1);
+ } else {
+ return 0;
+ }
+ }
+
+ bool multiplexer_supported() const
+ {
+ return nullptr != m_multiplexer;
}
virtual Expected<hailo_stream_interface_t> get_default_streams_interface() override;
const hailo_activate_network_group_params_t &network_group_params, uint16_t dynamic_batch_size,
bool resume_pending_stream_transfers) override;
- Expected<std::shared_ptr<VdmaConfigCoreOp>> get_core_op_by_device_index(uint32_t device_index);
+ Expected<std::shared_ptr<VdmaConfigCoreOp>> get_core_op_by_device_id(const device_id_t &device_bdf_id);
+
+ virtual Expected<HwInferResults> run_hw_infer_estimator() override;
private:
- VDeviceCoreOp(std::vector<std::shared_ptr<CoreOp>> core_ops, CoreOpsSchedulerWeakPtr core_ops_scheduler,
+ VDeviceCoreOp(const std::map<device_id_t, std::vector<std::shared_ptr<CoreOp>>> &core_ops, CoreOpsSchedulerWeakPtr core_ops_scheduler,
const std::string &hef_hash, hailo_status &status);
- std::vector<std::shared_ptr<CoreOp>> m_core_ops;
+ std::map<device_id_t, std::vector<std::shared_ptr<CoreOp>>> m_core_ops;
CoreOpsSchedulerWeakPtr m_core_ops_scheduler;
scheduler_core_op_handle_t m_scheduler_handle;
multiplexer_core_op_handle_t m_multiplexer_handle;
#include "vdma/vdma_device.hpp"
#include "vdma/vdma_config_manager.hpp"
#include "vdevice/vdevice_core_op.hpp"
-#include "vdevice/scheduler/network_group_scheduler.hpp"
+#include "vdevice/scheduler/scheduler.hpp"
#ifdef HAILO_SUPPORT_MULTI_PROCESS
#include "service/hailort_rpc_client.hpp"
{
// Return Expected for future functionality
std::vector<std::reference_wrapper<Device>> devices_refs;
- for (auto &device : m_devices) {
+ for (const auto &pair : m_devices) {
+ auto &device = pair.second;
devices_refs.push_back(*device);
}
return devices_refs;
{
std::vector<std::string> device_ids;
device_ids.reserve(m_devices.size());
- for (auto &device : m_devices) {
- device_ids.push_back(device.get()->get_dev_id());
+ for (const auto &pair : m_devices) {
+ auto &id = pair.first;
+ device_ids.push_back(id);
}
return device_ids;
}
static hailo_status validate_params(const hailo_vdevice_params_t ¶ms);
private:
- VDeviceBase(std::vector<std::unique_ptr<Device>> &&devices, CoreOpsSchedulerPtr core_ops_scheduler) :
+ VDeviceBase(std::map<device_id_t, std::unique_ptr<Device>> &&devices, CoreOpsSchedulerPtr core_ops_scheduler) :
m_devices(std::move(devices)), m_core_ops_scheduler(core_ops_scheduler)
{}
- static Expected<std::vector<std::unique_ptr<Device>>> create_devices(const hailo_vdevice_params_t ¶ms);
+ static Expected<std::map<device_id_t, std::unique_ptr<Device>>> create_devices(const hailo_vdevice_params_t ¶ms);
static Expected<std::vector<std::string>> get_device_ids(const hailo_vdevice_params_t ¶ms);
Expected<NetworkGroupsParamsMap> create_local_config_params(Hef &hef, const NetworkGroupsParamsMap &configure_params);
- Expected<std::shared_ptr<VDeviceCoreOp>> create_vdevice_network_group(Hef &hef, const std::pair<const std::string, ConfigureNetworkParams> ¶ms);
+ Expected<std::shared_ptr<VDeviceCoreOp>> create_vdevice_network_group(Hef &hef,
+ const std::pair<const std::string, ConfigureNetworkParams> ¶ms, bool use_multiplexer);
+ bool should_use_multiplexer(const ConfigureNetworkParams ¶ms);
- std::vector<std::unique_ptr<Device>> m_devices;
+ std::map<device_id_t, std::unique_ptr<Device>> m_devices;
CoreOpsSchedulerPtr m_core_ops_scheduler;
std::vector<std::shared_ptr<VDeviceCoreOp>> m_vdevice_core_ops;
std::vector<std::shared_ptr<ConfiguredNetworkGroup>> m_network_groups; // TODO: HRT-9547 - Remove when ConfiguredNetworkGroup will be kept in global context
virtual hailo_status after_fork_in_child() override;
private:
- VDeviceClient(std::unique_ptr<HailoRtRpcClient> client, uint32_t handle);
+ VDeviceClient(std::unique_ptr<HailoRtRpcClient> client, uint32_t handle, std::vector<std::unique_ptr<hailort::Device>> &&devices);
hailo_status create_client();
std::unique_ptr<HailoRtRpcClient> m_client;
uint32_t m_handle;
+ std::vector<std::unique_ptr<Device>> m_devices;
std::vector<std::shared_ptr<ConfiguredNetworkGroup>> m_network_groups;
};
--- /dev/null
+/**
+ * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+ **/
+/**
+ * @file vdevice_native_stream.cpp
+ * @brief Internal stream implementation for native streams
+ *
+ **/
+
+#include "vdevice_native_stream.hpp"
+
+namespace hailort {
+
+/** Input stream **/
+hailo_status VDeviceNativeInputStreamBase::abort()
+{
+ auto status = HAILO_SUCCESS; // Best effort
+ for (auto &pair: m_streams){
+ auto &stream = pair.second;
+ auto abort_status = stream.get().abort();
+ if (HAILO_SUCCESS != status) {
+ LOGGER__ERROR("Failed to abort input stream. (status: {} device: {})", status, stream.get().get_dev_id());
+ status = abort_status;
+ }
+ }
+ return status;
+}
+
+hailo_status VDeviceNativeInputStreamBase::clear_abort()
+{
+ auto status = HAILO_SUCCESS; // Best effort
+ for (auto &pair: m_streams){
+ auto &stream = pair.second;
+ auto clear_abort_status = stream.get().clear_abort();
+ if ((HAILO_SUCCESS != clear_abort_status) && (HAILO_STREAM_NOT_ACTIVATED != clear_abort_status)) {
+ LOGGER__ERROR("Failed to clear abort input stream. (status: {} device: {})", clear_abort_status, stream.get().get_dev_id());
+ status = clear_abort_status;
+ }
+ }
+
+ return status;
+}
+
+Expected<std::unique_ptr<VDeviceNativeInputStream>> VDeviceNativeInputStream::create(
+ std::map<device_id_t, std::reference_wrapper<VdmaInputStreamBase>> &&streams,
+ EventPtr &&core_op_activated_event,
+ const LayerInfo &layer_info)
+{
+ auto status = HAILO_UNINITIALIZED;
+ auto stream = make_unique_nothrow<VDeviceNativeInputStream>(std::move(streams),
+ std::move(core_op_activated_event), layer_info, status);
+ CHECK_AS_EXPECTED((nullptr != stream), HAILO_OUT_OF_HOST_MEMORY);
+ CHECK_SUCCESS_AS_EXPECTED(status);
+ return stream;
+}
+
+hailo_status VDeviceNativeInputStream::write_impl(const MemoryView &buffer, const std::function<bool()> &should_cancel)
+{
+ if (should_cancel()) {
+ return HAILO_STREAM_ABORTED_BY_USER;
+ }
+ auto status = m_streams.at(m_next_transfer_stream).get().write_impl(buffer);
+ if (HAILO_SUCCESS != status) {
+ LOGGER__INFO("Write to stream has failed! status = {}", status);
+ return status;
+ }
+
+ // Update m_next_transfer_stream only if 'batch' frames has been transferred
+ if (0 == (++m_acc_frames % m_streams.begin()->second.get().get_dynamic_batch_size())) {
+ auto it = m_streams.upper_bound(m_next_transfer_stream);
+ if (m_streams.end() == it) {
+ it = m_streams.begin();
+ }
+ m_next_transfer_stream = it->first;
+ m_acc_frames = 0;
+ }
+ return HAILO_SUCCESS;
+}
+
+Expected<std::unique_ptr<VDeviceNativeAsyncInputStream>> VDeviceNativeAsyncInputStream::create(
+ std::map<device_id_t, std::reference_wrapper<VdmaInputStreamBase>> &&streams,
+ EventPtr &&core_op_activated_event,
+ const LayerInfo &layer_info)
+{
+ auto max_queue_size_per_stream = streams.begin()->second.get().get_buffer_frames_size();
+ CHECK_EXPECTED(max_queue_size_per_stream);
+ const auto max_queue_size = max_queue_size_per_stream.value() * streams.size();
+
+ auto status = HAILO_UNINITIALIZED;
+ auto stream = make_unique_nothrow<VDeviceNativeAsyncInputStream>(std::move(streams),
+ std::move(core_op_activated_event), layer_info, max_queue_size, status);
+ CHECK_AS_EXPECTED((nullptr != stream), HAILO_OUT_OF_HOST_MEMORY);
+ CHECK_SUCCESS_AS_EXPECTED(status);
+ return stream;
+}
+
+hailo_status VDeviceNativeAsyncInputStream::wait_for_async_ready(size_t transfer_size, std::chrono::milliseconds timeout)
+{
+ return m_streams.at(m_next_transfer_stream).get().wait_for_async_ready(transfer_size, timeout);
+}
+
+Expected<size_t> VDeviceNativeAsyncInputStream::get_async_max_queue_size() const
+{
+ return Expected<size_t>(m_max_queue_size);
+}
+
+hailo_status VDeviceNativeAsyncInputStream::write_async(TransferRequest &&transfer_request)
+{
+ // TODO HRT-10583 - allow option to remove reorder queue
+ transfer_request.callback = m_callback_reorder_queue.wrap_callback(transfer_request.callback);
+
+ auto status = m_streams.at(m_next_transfer_stream).get().write_async(std::move(transfer_request));
+ if (HAILO_SUCCESS != status) {
+ m_callback_reorder_queue.cancel_last_callback();
+ return status;
+ }
+
+ // Update m_next_transfer_stream_index only if 'batch' frames has been transferred
+ if (0 == (++m_acc_frames % m_streams.begin()->second.get().get_dynamic_batch_size())) {
+ auto it = m_streams.upper_bound(m_next_transfer_stream);
+ if (m_streams.end() == it) {
+ it = m_streams.begin();
+ }
+ m_next_transfer_stream = it->first;
+ m_acc_frames = 0;
+ }
+ return HAILO_SUCCESS;
+}
+
+hailo_status VDeviceNativeAsyncInputStream::write_impl(const MemoryView &, const std::function<bool()> &)
+{
+ LOGGER__ERROR("Sync write is not supported by async streams");
+ return HAILO_INVALID_OPERATION;
+}
+
+/** Output stream **/
+hailo_status VDeviceNativeOutputStreamBase::abort()
+{
+ auto status = HAILO_SUCCESS; // Best effort
+ for (const auto &pair : m_streams) {
+ auto &stream = pair.second;
+ auto abort_status = stream.get().abort();
+ if (HAILO_SUCCESS != status) {
+ LOGGER__ERROR("Failed to abort output stream. (status: {} device: {})", status, stream.get().get_dev_id());
+ status = abort_status;
+ }
+ }
+
+ return status;
+}
+
+hailo_status VDeviceNativeOutputStreamBase::clear_abort()
+{
+ auto status = HAILO_SUCCESS; // Best effort
+ for (const auto &pair : m_streams) {
+ auto &stream = pair.second;
+ auto clear_abort_status = stream.get().clear_abort();
+ if ((HAILO_SUCCESS != clear_abort_status) && (HAILO_STREAM_NOT_ACTIVATED != clear_abort_status)) {
+ LOGGER__ERROR("Failed to clear abort output stream. (status: {} device: {})", clear_abort_status, stream.get().get_dev_id());
+ status = clear_abort_status;
+ }
+ }
+
+ return status;
+}
+
+Expected<std::unique_ptr<VDeviceNativeOutputStream>> VDeviceNativeOutputStream::create(
+ std::map<device_id_t, std::reference_wrapper<VdmaOutputStreamBase>> &&streams,
+ EventPtr &&core_op_activated_event,
+ const LayerInfo &layer_info)
+{
+ auto status = HAILO_UNINITIALIZED;
+ auto stream = make_unique_nothrow<VDeviceNativeOutputStream>(std::move(streams),
+ std::move(core_op_activated_event), layer_info, status);
+ CHECK_AS_EXPECTED((nullptr != stream), HAILO_OUT_OF_HOST_MEMORY);
+ CHECK_SUCCESS_AS_EXPECTED(status);
+ return stream;
+}
+
+hailo_status VDeviceNativeOutputStream::read(MemoryView buffer)
+{
+ auto status = m_streams.at(m_next_transfer_stream).get().read(buffer);
+ if (HAILO_STREAM_ABORTED_BY_USER == status) {
+ // In case of aborted by user, don't show it as infer error
+ LOGGER__INFO("Stream aborted by user (device: {})", m_streams.at(m_next_transfer_stream).get().get_dev_id());
+ return status;
+ }
+ CHECK_SUCCESS(status, "Read from stream has failed! status = {}", status);
+
+ // Update m_next_transfer_stream_index only if 'batch' frames has been transferred
+ if (0 == (++m_acc_frames % m_streams.begin()->second.get().get_dynamic_batch_size())) {
+ auto it = m_streams.upper_bound(m_next_transfer_stream);
+ if (m_streams.end() == it) {
+ it = m_streams.begin();
+ }
+ m_next_transfer_stream = it->first;
+ m_acc_frames = 0;
+ }
+
+ return HAILO_SUCCESS;
+}
+
+Expected<std::unique_ptr<VDeviceNativeAsyncOutputStream>> VDeviceNativeAsyncOutputStream::create(
+ std::map<device_id_t, std::reference_wrapper<VdmaOutputStreamBase>> &&streams,
+ EventPtr &&core_op_activated_event,
+ const LayerInfo &layer_info)
+{
+ auto max_queue_size_per_stream = streams.begin()->second.get().get_buffer_frames_size();
+ CHECK_EXPECTED(max_queue_size_per_stream);
+ const auto max_queue_size = max_queue_size_per_stream.value() * streams.size();
+
+ auto status = HAILO_UNINITIALIZED;
+ auto stream = make_unique_nothrow<VDeviceNativeAsyncOutputStream>(std::move(streams),
+ std::move(core_op_activated_event), layer_info, max_queue_size, status);
+ CHECK_AS_EXPECTED((nullptr != stream), HAILO_OUT_OF_HOST_MEMORY);
+ CHECK_SUCCESS_AS_EXPECTED(status);
+ return stream;
+}
+
+hailo_status VDeviceNativeAsyncOutputStream::wait_for_async_ready(size_t transfer_size, std::chrono::milliseconds timeout)
+{
+ return m_streams.at(m_next_transfer_stream).get().wait_for_async_ready(transfer_size, timeout);
+}
+
+Expected<size_t> VDeviceNativeAsyncOutputStream::get_async_max_queue_size() const
+{
+ return Expected<size_t>(m_max_queue_size);
+}
+
+hailo_status VDeviceNativeAsyncOutputStream::read_async(TransferRequest &&transfer_request)
+{
+ // TODO HRT-10583 - allow option to remove reorder queue
+ transfer_request.callback = m_callback_reorder_queue.wrap_callback(transfer_request.callback);
+ auto status = m_streams.at(m_next_transfer_stream).get().read_async(std::move(transfer_request));
+ if (HAILO_SUCCESS != status) {
+ m_callback_reorder_queue.cancel_last_callback();
+ return status;
+ }
+ // Update m_next_transfer_stream_index only if 'batch' frames has been transferred
+ if (0 == (++m_acc_frames % m_streams.begin()->second.get().get_dynamic_batch_size())) {
+ auto it = m_streams.upper_bound(m_next_transfer_stream);
+ if (m_streams.end() == it) {
+ it = m_streams.begin();
+ }
+ m_next_transfer_stream = it->first;
+ m_acc_frames = 0;
+ }
+
+ return HAILO_SUCCESS;
+}
+
+hailo_status VDeviceNativeAsyncOutputStream::read(MemoryView)
+{
+ LOGGER__ERROR("The read function is not supported by async streams");
+ return HAILO_INVALID_OPERATION;
+}
+
+} /* namespace hailort */
\ No newline at end of file
#include "stream_common/stream_internal.hpp"
#include "vdevice_stream.hpp"
+#include "vdevice/callback_reorder_queue.hpp"
namespace hailort
{
-class InputVDeviceNativeStream : public InputVDeviceBaseStream {
+
+class VDeviceNativeInputStreamBase : public VDeviceInputStreamBase {
public:
- InputVDeviceNativeStream(
- std::vector<std::reference_wrapper<VdmaInputStream>> &&streams,
+ static Expected<std::unique_ptr<VDeviceNativeInputStreamBase>> create(
+ std::map<device_id_t, std::reference_wrapper<VdmaInputStreamBase>> &&streams,
+ EventPtr &&core_op_activated_event,
+ const LayerInfo &layer_info);
+
+ VDeviceNativeInputStreamBase(
+ std::map<device_id_t, std::reference_wrapper<VdmaInputStreamBase>> &&streams,
EventPtr &&core_op_activated_event,
const LayerInfo &layer_info,
hailo_status &status) :
- InputVDeviceBaseStream(std::move(streams), std::move(core_op_activated_event), layer_info, status)
+ VDeviceInputStreamBase(std::move(streams), std::move(core_op_activated_event), layer_info, status)
{}
virtual hailo_status abort() override;
virtual hailo_status clear_abort() override;
virtual bool is_scheduled() override { return false; };
+};
+
+class VDeviceNativeInputStream : public VDeviceNativeInputStreamBase {
+public:
+ static Expected<std::unique_ptr<VDeviceNativeInputStream>> create(
+ std::map<device_id_t, std::reference_wrapper<VdmaInputStreamBase>> &&streams,
+ EventPtr &&core_op_activated_event,
+ const LayerInfo &layer_info);
+
+ using VDeviceNativeInputStreamBase::VDeviceNativeInputStreamBase;
protected:
- virtual Expected<size_t> sync_write_raw_buffer(const MemoryView &buffer,
- const std::function<bool()> &should_cancel = []() { return false; }) override;
+ virtual hailo_status write_impl(const MemoryView &buffer, const std::function<bool()> &should_cancel) override;\
};
-class OutputVDeviceNativeStream : public OutputVDeviceBaseStream {
+class VDeviceNativeAsyncInputStream : public VDeviceNativeInputStreamBase {
public:
- OutputVDeviceNativeStream(
- std::vector<std::reference_wrapper<VdmaOutputStream>> &&streams,
+ static Expected<std::unique_ptr<VDeviceNativeAsyncInputStream>> create(
+ std::map<device_id_t, std::reference_wrapper<VdmaInputStreamBase>> &&streams,
+ EventPtr &&core_op_activated_event,
+ const LayerInfo &layer_info);
+
+ VDeviceNativeAsyncInputStream(
+ std::map<device_id_t, std::reference_wrapper<VdmaInputStreamBase>> &&streams,
+ EventPtr &&core_op_activated_event,
const LayerInfo &layer_info,
+ size_t max_queue_size,
+ hailo_status &status) :
+ VDeviceNativeInputStreamBase(std::move(streams), std::move(core_op_activated_event), layer_info, status),
+ m_callback_reorder_queue(max_queue_size), // TODO HRT-1058 - use reorder queue only when needed
+ m_max_queue_size(max_queue_size)
+ {}
+
+ virtual hailo_status wait_for_async_ready(size_t transfer_size, std::chrono::milliseconds timeout) override;
+ virtual hailo_status write_async(TransferRequest &&transfer_request) override;
+ virtual Expected<size_t> get_async_max_queue_size() const override;
+
+protected:
+ virtual hailo_status write_impl(const MemoryView &buffer, const std::function<bool()> &should_cancel) override;
+
+private:
+ CallbackReorderQueue m_callback_reorder_queue;
+ const size_t m_max_queue_size;
+};
+
+class VDeviceNativeOutputStreamBase : public VDeviceOutputStreamBase {
+public:
+ VDeviceNativeOutputStreamBase(
+ std::map<device_id_t, std::reference_wrapper<VdmaOutputStreamBase>> &&streams,
EventPtr &&core_op_activated_event,
+ const LayerInfo &layer_info,
hailo_status &status) :
- OutputVDeviceBaseStream(std::move(streams), layer_info, std::move(core_op_activated_event), status)
+ VDeviceOutputStreamBase(std::move(streams), layer_info, std::move(core_op_activated_event), status)
{}
virtual hailo_status abort() override;
virtual hailo_status clear_abort() override;
virtual bool is_scheduled() override { return false; };
+};
-protected:
- virtual hailo_status read(MemoryView buffer) override;;
+class VDeviceNativeOutputStream : public VDeviceNativeOutputStreamBase {
+public:
+ static Expected<std::unique_ptr<VDeviceNativeOutputStream>> create(
+ std::map<device_id_t, std::reference_wrapper<VdmaOutputStreamBase>> &&streams,
+ EventPtr &&core_op_activated_event, const LayerInfo &layer_info);
+
+ using VDeviceNativeOutputStreamBase::VDeviceNativeOutputStreamBase;
+ virtual hailo_status read(MemoryView buffer) override;
};
+class VDeviceNativeAsyncOutputStream : public VDeviceNativeOutputStreamBase {
+public:
+ static Expected<std::unique_ptr<VDeviceNativeAsyncOutputStream>> create(
+ std::map<device_id_t, std::reference_wrapper<VdmaOutputStreamBase>> &&streams,
+ EventPtr &&core_op_activated_event, const LayerInfo &layer_info);
+
+ VDeviceNativeAsyncOutputStream(
+ std::map<device_id_t, std::reference_wrapper<VdmaOutputStreamBase>> &&streams,
+ EventPtr &&core_op_activated_event,
+ const LayerInfo &layer_info,
+ size_t max_queue_size,
+ hailo_status &status) :
+ VDeviceNativeOutputStreamBase(std::move(streams), std::move(core_op_activated_event), layer_info, status),
+ m_callback_reorder_queue(max_queue_size), // TODO HRT-1058 - use reorder queue only when needed
+ m_max_queue_size(max_queue_size)
+ {}
+
+ virtual hailo_status wait_for_async_ready(size_t transfer_size, std::chrono::milliseconds timeout) override;
+ virtual hailo_status read_async(TransferRequest &&transfer_request) override;
+ virtual Expected<size_t> get_async_max_queue_size() const override;
+ virtual hailo_status read(MemoryView buffer) override;
+
+private:
+ CallbackReorderQueue m_callback_reorder_queue;
+ const size_t m_max_queue_size;
+ };
+
} /* namespace hailort */
#endif /* HAILO_VDEVICE_NATIVE_STREAM_HPP_ */
#include "common/utils.hpp"
-#include "utils/profiler/tracer_macros.hpp"
#include "vdevice/vdevice_stream.hpp"
#include "vdevice/vdevice_native_stream.hpp"
#include "vdevice/scheduler/multi_device_scheduled_stream.hpp"
namespace hailort
{
-hailo_status InputVDeviceBaseStream::deactivate_stream()
-{
- auto status = HAILO_SUCCESS; // Best effort
- for (auto &stream : m_streams) {
- auto deactivate_status = stream.get().deactivate_stream();
- if (HAILO_SUCCESS != deactivate_status) {
- LOGGER__ERROR("Failed to deactivate input stream. (status: {} device: {})", deactivate_status, stream.get().get_dev_id());
- status = deactivate_status;
- }
- }
- m_is_stream_activated = false;
- return status;
-}
-
/** Input stream **/
-InputVDeviceBaseStream::~InputVDeviceBaseStream()
+VDeviceInputStreamBase::~VDeviceInputStreamBase()
{
// We want to stop the vdma channel before closing the stream in the firmware
// because sending data to a closed stream may terminate the dma engine
}
}
-hailo_status InputVDeviceBaseStream::activate_stream(uint16_t dynamic_batch_size, bool resume_pending_stream_transfers)
+hailo_status VDeviceInputStreamBase::activate_stream(uint16_t dynamic_batch_size, bool resume_pending_stream_transfers)
{
- for (auto &stream : m_streams) {
+ for (const auto &pair : m_streams) {
+ auto &stream = pair.second;
auto status = stream.get().activate_stream(dynamic_batch_size, resume_pending_stream_transfers);
if (HAILO_SUCCESS != status) {
LOGGER__ERROR("Failed to activate input stream. (device: {})", stream.get().get_dev_id());
return HAILO_SUCCESS;
}
-hailo_status InputVDeviceBaseStream::sync_write_all_raw_buffer_no_transform_impl(void *buffer, size_t offset, size_t size)
+hailo_status VDeviceInputStreamBase::deactivate_stream()
{
- ASSERT(NULL != buffer);
-
- return sync_write_raw_buffer(MemoryView(static_cast<uint8_t*>(buffer) + offset, size)).status();
+ auto status = HAILO_SUCCESS; // Best effort
+ for (const auto &pair : m_streams) {
+ auto &stream = pair.second;
+ auto deactivate_status = stream.get().deactivate_stream();
+ if (HAILO_SUCCESS != deactivate_status) {
+ LOGGER__ERROR("Failed to deactivate input stream. (status: {} device: {})", deactivate_status, stream.get().get_dev_id());
+ status = deactivate_status;
+ }
+ }
+ m_is_stream_activated = false;
+ return status;
}
-hailo_status InputVDeviceBaseStream::send_pending_buffer(size_t device_index)
+hailo_status VDeviceInputStreamBase::send_pending_buffer(const device_id_t &device_id)
{
assert(1 == m_streams.size());
- CHECK(0 == device_index, HAILO_INVALID_OPERATION);
- VdmaInputStream &vdma_input = static_cast<VdmaInputStream&>(m_streams[m_next_transfer_stream_index].get());
- return vdma_input.send_pending_buffer();
+ auto &vdma_input = dynamic_cast<VdmaInputStreamBase&>(m_streams.at(m_next_transfer_stream).get());
+ return vdma_input.send_pending_buffer(device_id);
}
-Expected<size_t> InputVDeviceBaseStream::get_buffer_frames_size() const
+Expected<size_t> VDeviceInputStreamBase::get_buffer_frames_size() const
{
- size_t total_buffers_size = 0;
- for (auto &stream : m_streams) {
- auto stream_buffer_size = stream.get().get_buffer_frames_size();
- CHECK_EXPECTED(stream_buffer_size);
- total_buffers_size += stream_buffer_size.value();
- }
-
- return total_buffers_size;
+ return m_streams.begin()->second.get().get_buffer_frames_size();
}
-Expected<size_t> InputVDeviceBaseStream::get_pending_frames_count() const
+Expected<size_t> VDeviceInputStreamBase::get_pending_frames_count() const
{
size_t total_pending_frames_count = 0;
- for (auto &stream : m_streams) {
+ for (const auto &pair : m_streams) {
+ auto &stream = pair.second;
auto stream_pending_frames_count = stream.get().get_pending_frames_count();
CHECK_EXPECTED(stream_pending_frames_count);
total_pending_frames_count += stream_pending_frames_count.value();
}
-
return total_pending_frames_count;
}
-Expected<std::unique_ptr<InputVDeviceBaseStream>> InputVDeviceBaseStream::create(std::vector<std::reference_wrapper<VdmaInputStream>> &&low_level_streams,
- const LayerInfo &edge_layer, const scheduler_core_op_handle_t &core_op_handle,
- EventPtr core_op_activated_event, CoreOpsSchedulerWeakPtr core_ops_scheduler)
+Expected<std::unique_ptr<VDeviceInputStreamBase>> VDeviceInputStreamBase::create(
+ std::map<device_id_t, std::reference_wrapper<VdmaInputStreamBase>> &&low_level_streams,
+ const hailo_stream_parameters_t &stream_params, const LayerInfo &edge_layer,
+ const scheduler_core_op_handle_t &core_op_handle, EventPtr core_op_activated_event,
+ CoreOpsSchedulerWeakPtr core_ops_scheduler)
{
assert(0 < low_level_streams.size());
- auto status = HAILO_UNINITIALIZED;
-
- std::unique_ptr<InputVDeviceBaseStream> local_vdevice_stream;
if (core_ops_scheduler.lock()) {
- if (1 < low_level_streams.size()) {
- auto buffer_frame_size = low_level_streams[0].get().get_buffer_frames_size();
- CHECK_EXPECTED(buffer_frame_size);
- auto frame_size = low_level_streams[0].get().get_frame_size();
- auto buffers_queue_ptr = BuffersQueue::create_unique(frame_size, (low_level_streams.size() * buffer_frame_size.value()));
- CHECK_EXPECTED(buffers_queue_ptr);
-
- local_vdevice_stream = make_unique_nothrow<MultiDeviceScheduledInputStream>(std::move(low_level_streams),
+ if ((stream_params.flags & HAILO_STREAM_FLAGS_ASYNC) != 0) {
+ auto stream = ScheduledAsyncInputStream::create(std::move(low_level_streams),
core_op_handle, std::move(core_op_activated_event), edge_layer,
- core_ops_scheduler, buffers_queue_ptr.release(), status);
+ core_ops_scheduler);
+ CHECK_EXPECTED(stream);
+ return std::unique_ptr<VDeviceInputStreamBase>(stream.release());
} else {
- local_vdevice_stream = make_unique_nothrow<ScheduledInputStream>(std::move(low_level_streams),
- core_op_handle, std::move(core_op_activated_event), edge_layer,
- core_ops_scheduler, status);
+ if (1 < low_level_streams.size()) {
+ auto stream = MultiDeviceScheduledInputStream::create(std::move(low_level_streams),
+ core_op_handle, std::move(core_op_activated_event), edge_layer,
+ core_ops_scheduler);
+ CHECK_EXPECTED(stream);
+ return std::unique_ptr<VDeviceInputStreamBase>(stream.release());
+ } else {
+ auto stream = ScheduledInputStream::create(std::move(low_level_streams),
+ core_op_handle, std::move(core_op_activated_event), edge_layer,
+ core_ops_scheduler);
+ CHECK_EXPECTED(stream);
+ return std::unique_ptr<VDeviceInputStreamBase>(stream.release());
+ }
}
} else {
- local_vdevice_stream = make_unique_nothrow<InputVDeviceNativeStream>(std::move(low_level_streams),
- std::move(core_op_activated_event), edge_layer,status);
- }
-
- CHECK_AS_EXPECTED((nullptr != local_vdevice_stream), HAILO_OUT_OF_HOST_MEMORY);
- CHECK_SUCCESS_AS_EXPECTED(status);
+ if ((stream_params.flags & HAILO_STREAM_FLAGS_ASYNC) != 0) {
+ auto stream = VDeviceNativeAsyncInputStream::create(std::move(low_level_streams),
+ std::move(core_op_activated_event), edge_layer);
+ CHECK_EXPECTED(stream);
+ return std::unique_ptr<VDeviceInputStreamBase>(stream.release());
+ } else {
+ auto stream = VDeviceNativeInputStream::create(std::move(low_level_streams),
+ std::move(core_op_activated_event), edge_layer);
+ CHECK_EXPECTED(stream);
+ return std::unique_ptr<VDeviceInputStreamBase>(stream.release());
+ }
- return local_vdevice_stream;
+ }
}
-hailo_status InputVDeviceBaseStream::set_timeout(std::chrono::milliseconds timeout)
+hailo_status VDeviceInputStreamBase::set_timeout(std::chrono::milliseconds timeout)
{
- for (auto &stream : m_streams) {
+ for (const auto &pair : m_streams) {
+ auto &stream = pair.second;
auto status = stream.get().set_timeout(timeout);
CHECK_SUCCESS(status, "Failed to set timeout to input stream. (device: {})", stream.get().get_dev_id());
}
return HAILO_SUCCESS;
}
-std::chrono::milliseconds InputVDeviceBaseStream::get_timeout() const
+std::chrono::milliseconds VDeviceInputStreamBase::get_timeout() const
{
// All timeout values of m_streams should be the same
- return m_streams[0].get().get_timeout();
+ return m_streams.begin()->second.get().get_timeout();
}
-hailo_stream_interface_t InputVDeviceBaseStream::get_interface() const
+hailo_stream_interface_t VDeviceInputStreamBase::get_interface() const
{
// All interface values of m_streams should be the same
- return m_streams[0].get().get_interface();
+ return m_streams.begin()->second.get().get_interface();
}
-hailo_status InputVDeviceBaseStream::flush()
+hailo_status VDeviceInputStreamBase::flush()
{
auto status = HAILO_SUCCESS; // Best effort
- for (auto &stream : m_streams) {
+ for (const auto &pair : m_streams) {
+ auto &stream = pair.second;
auto flush_status = stream.get().flush();
if (HAILO_SUCCESS != status) {
LOGGER__ERROR("Failed to flush input stream. (status: {} device: {})", status, stream.get().get_dev_id());
return status;
}
-Expected<size_t> ScheduledInputStream::sync_write_raw_buffer(const MemoryView &buffer, const std::function<bool()> &should_cancel)
-{
- return sync_write_raw_buffer_impl(buffer, m_core_op_handle, should_cancel);
-}
-
-Expected<size_t> InputVDeviceNativeStream::sync_write_raw_buffer(const MemoryView &buffer, const std::function<bool()> &should_cancel)
-{
- if (should_cancel()) {
- return make_unexpected(HAILO_STREAM_ABORTED_BY_USER);
- }
-
- auto expected_written_bytes = m_streams[m_next_transfer_stream_index].get().sync_write_raw_buffer(buffer);
- if (HAILO_SUCCESS != expected_written_bytes.status()) {
- LOGGER__INFO("Write to stream has failed! status = {}", expected_written_bytes.status());
- return make_unexpected(expected_written_bytes.status());
- }
- auto written_bytes = expected_written_bytes.value();
-
- // Update m_next_transfer_stream_index only if 'batch' frames has been transferred
- if (0 == (++m_acc_frames % m_streams[0].get().get_dynamic_batch_size())) {
- m_next_transfer_stream_index = static_cast<uint32_t>((m_next_transfer_stream_index + 1) % m_streams.size());
- m_acc_frames = 0;
- }
- return written_bytes;
-}
-
-Expected<size_t> ScheduledInputStream::sync_write_raw_buffer_impl(const MemoryView &buffer, scheduler_core_op_handle_t core_op_handle,
- const std::function<bool()> &should_cancel)
-{
- auto core_ops_scheduler = m_core_ops_scheduler.lock();
- CHECK_AS_EXPECTED(core_ops_scheduler, HAILO_INTERNAL_FAILURE);
-
- auto status = core_ops_scheduler->wait_for_write(core_op_handle, name(), get_timeout(), should_cancel);
- if (HAILO_STREAM_ABORTED_BY_USER == status) {
- LOGGER__INFO("Write to stream was aborted.");
- return make_unexpected(status);
- }
- CHECK_SUCCESS_AS_EXPECTED(status);
-
- TRACE(WriteFrameTrace, "", core_op_handle, m_stream_info.name);
-
- assert(1 == m_streams.size());
- status = m_streams[0].get().write_buffer_only(buffer, should_cancel);
-
- auto write_finish_status = core_ops_scheduler->signal_write_finish(core_op_handle, name(), status != HAILO_SUCCESS);
- if (HAILO_SUCCESS != status) {
- LOGGER__INFO("Write to stream has failed! status = {}", status);
- return make_unexpected(status);
- }
-
- if (HAILO_STREAM_ABORTED_BY_USER == write_finish_status) {
- return make_unexpected(write_finish_status);
- }
- CHECK_SUCCESS_AS_EXPECTED(write_finish_status);
-
- auto written_bytes = buffer.size();
- return written_bytes;
-}
-
-hailo_status ScheduledInputStream::abort()
-{
- return abort_impl(m_core_op_handle);
-}
-
-hailo_status InputVDeviceNativeStream::abort()
-{
- auto status = HAILO_SUCCESS; // Best effort
- for (auto &stream : m_streams) {
- auto abort_status = stream.get().abort();
- if (HAILO_SUCCESS != status) {
- LOGGER__ERROR("Failed to abort input stream. (status: {} device: {})", status, stream.get().get_dev_id());
- status = abort_status;
- }
- }
-
- return status;
-}
-
-hailo_status ScheduledInputStream::abort_impl(scheduler_core_op_handle_t core_op_handle)
-{
- auto status = HAILO_SUCCESS; // Best effort
- assert(1 == m_streams.size());
- auto abort_status = m_streams[0].get().abort();
- if (HAILO_SUCCESS != status) {
- LOGGER__ERROR("Failed to abort input stream. (status: {} device: {})", status, m_streams[0].get().get_dev_id());
- status = abort_status;
- }
-
- auto core_ops_scheduler = m_core_ops_scheduler.lock();
- CHECK(core_ops_scheduler, HAILO_INTERNAL_FAILURE);
-
- auto disable_status = core_ops_scheduler->disable_stream(core_op_handle, name());
- if (HAILO_SUCCESS != disable_status) {
- LOGGER__ERROR("Failed to disable stream in the core-op scheduler. (status: {})", disable_status);
- status = disable_status;
- }
-
- return status;
-}
-
-hailo_status ScheduledInputStream::clear_abort()
+hailo_status VDeviceInputStreamBase::write_impl(const MemoryView &buffer)
{
- return clear_abort_impl(m_core_op_handle);
-}
-
-hailo_status InputVDeviceNativeStream::clear_abort()
-{
- auto status = HAILO_SUCCESS; // Best effort
- for (auto &stream : m_streams) {
- auto clear_abort_status = stream.get().clear_abort();
- if ((HAILO_SUCCESS != clear_abort_status) && (HAILO_STREAM_NOT_ACTIVATED != clear_abort_status)) {
- LOGGER__ERROR("Failed to clear abort input stream. (status: {} device: {})", clear_abort_status, stream.get().get_dev_id());
- status = clear_abort_status;
- }
- }
-
- return status;
-}
-
-hailo_status ScheduledInputStream::clear_abort_impl(scheduler_core_op_handle_t core_op_handle)
-{
- auto status = HAILO_SUCCESS; // Best effort
- assert(1 == m_streams.size());
- auto clear_abort_status = m_streams[0].get().clear_abort();
- if ((HAILO_SUCCESS != clear_abort_status) && (HAILO_STREAM_NOT_ACTIVATED != clear_abort_status)) {
- LOGGER__ERROR("Failed to clear abort input stream. (status: {} device: {})", clear_abort_status, m_streams[0].get().get_dev_id());
- status = clear_abort_status;
- }
-
- auto core_ops_scheduler = m_core_ops_scheduler.lock();
- CHECK(core_ops_scheduler, HAILO_INTERNAL_FAILURE);
-
- auto enable_status = core_ops_scheduler->enable_stream(core_op_handle, name());
- if (HAILO_SUCCESS != enable_status) {
- LOGGER__ERROR("Failed to enable stream in the core-op scheduler. (status: {})", enable_status);
- status = enable_status;
- }
-
- return status;
+ return write_impl(buffer, []() { return false; });
}
/** Output stream **/
-hailo_status OutputVDeviceBaseStream::deactivate_stream()
+hailo_status VDeviceOutputStreamBase::deactivate_stream()
{
auto status = HAILO_SUCCESS; // Best effort
- for (auto &stream : m_streams) {
+ for (const auto &pair : m_streams) {
+ auto &stream = pair.second;
auto deactivate_status = stream.get().deactivate_stream();
if (HAILO_SUCCESS != status) {
LOGGER__ERROR("Failed to deactivate output stream. (status: {} device: {})", status, stream.get().get_dev_id());
return status;
}
-OutputVDeviceBaseStream::~OutputVDeviceBaseStream()
+VDeviceOutputStreamBase::~VDeviceOutputStreamBase()
{
// We want to stop the vdma channel before closing the stream in the firmware
// because sending data to a closed stream may terminate the dma engine
}
}
-hailo_status OutputVDeviceBaseStream::activate_stream(uint16_t dynamic_batch_size, bool resume_pending_stream_transfers)
+hailo_status VDeviceOutputStreamBase::activate_stream(uint16_t dynamic_batch_size, bool resume_pending_stream_transfers)
{
- for (auto &stream : m_streams) {
+ for (const auto &pair : m_streams) {
+ auto &stream = pair.second;
auto status = stream.get().activate_stream(dynamic_batch_size, resume_pending_stream_transfers);
if (HAILO_SUCCESS != status) {
LOGGER__ERROR("Failed to activate output stream. (device: {})", stream.get().get_dev_id());
return HAILO_SUCCESS;
}
-hailo_status OutputVDeviceBaseStream::read_all(MemoryView &/*buffer*/)
+hailo_status VDeviceOutputStreamBase::read_impl(MemoryView &/*buffer*/)
{
- LOGGER__ERROR("read_all should not be called in vdevice flow");
+ LOGGER__ERROR("read_impl should not be called in vdevice flow");
return HAILO_INTERNAL_FAILURE;
}
-Expected<size_t> OutputVDeviceBaseStream::sync_read_raw_buffer(MemoryView &/*buffer*/)
-{
- LOGGER__ERROR("sync_read_raw_buffer should not be called in vdevice flow");
- return make_unexpected(HAILO_INTERNAL_FAILURE);
-}
-
-hailo_status ScheduledOutputStream::read(MemoryView buffer)
-{
- return read_impl(buffer, m_core_op_handle);
-}
-
-hailo_status OutputVDeviceNativeStream::read(MemoryView buffer)
-{
- auto status = m_streams[m_next_transfer_stream_index].get().read(buffer);
- if (HAILO_SUCCESS != status) {
- LOGGER__INFO("Read from stream has failed! status = {}", status);
- return status;
- }
-
- // Update m_next_transfer_stream_index only if 'batch' frames has been transferred
- if (0 == (++m_acc_frames % m_streams[0].get().get_dynamic_batch_size())) {
- m_next_transfer_stream_index = static_cast<uint32_t>((m_next_transfer_stream_index + 1) % m_streams.size());
- m_acc_frames = 0;
- }
-
- return HAILO_SUCCESS;
-}
-
-hailo_status ScheduledOutputStream::read_impl(MemoryView buffer, scheduler_core_op_handle_t core_op_handle)
-{
- auto core_ops_scheduler = m_core_ops_scheduler.lock();
- CHECK(core_ops_scheduler, HAILO_INTERNAL_FAILURE);
-
- auto device_id = core_ops_scheduler->wait_for_read(core_op_handle, name(), get_timeout());
- if (HAILO_STREAM_ABORTED_BY_USER == device_id.status()) {
- LOGGER__INFO("Read from stream was aborted.");
- return device_id.status();
- }
- CHECK_EXPECTED_AS_STATUS(device_id);
-
- TRACE(ReadFrameTrace, "", core_op_handle, m_stream_info.name);
- auto status = m_streams[device_id.value()].get().read(buffer);
- if (HAILO_SUCCESS != status) {
- LOGGER__INFO("Read from stream has failed! status = {}", status);
- return status;
- }
-
- status = core_ops_scheduler->signal_read_finish(core_op_handle, name(), device_id.value());
- if (HAILO_STREAM_ABORTED_BY_USER == status) {
- return status;
- }
- CHECK_SUCCESS(status);
-
- return HAILO_SUCCESS;
-}
-
-Expected<std::unique_ptr<OutputVDeviceBaseStream>> OutputVDeviceBaseStream::create(std::vector<std::reference_wrapper<VdmaOutputStream>> &&low_level_streams,
- const LayerInfo &edge_layer, const scheduler_core_op_handle_t &core_op_handle, EventPtr core_op_activated_event,
+Expected<std::unique_ptr<VDeviceOutputStreamBase>> VDeviceOutputStreamBase::create(
+ std::map<device_id_t, std::reference_wrapper<VdmaOutputStreamBase>> &&low_level_streams,
+ const hailo_stream_parameters_t &stream_params, const LayerInfo &edge_layer,
+ const scheduler_core_op_handle_t &core_op_handle, EventPtr core_op_activated_event,
CoreOpsSchedulerWeakPtr core_ops_scheduler)
{
assert(0 < low_level_streams.size());
- auto status = HAILO_UNINITIALIZED;
-
- std::unique_ptr<OutputVDeviceBaseStream> local_vdevice_stream;
+
if (core_ops_scheduler.lock()) {
- local_vdevice_stream = make_unique_nothrow<ScheduledOutputStream>(std::move(low_level_streams), core_op_handle,
- edge_layer, std::move(core_op_activated_event), core_ops_scheduler, status);
+ if ((stream_params.flags & HAILO_STREAM_FLAGS_ASYNC) != 0) {
+ LOGGER__ERROR("Async output streams are not supported with scheduler");
+ return make_unexpected(HAILO_NOT_IMPLEMENTED);
+ } else {
+ auto stream = ScheduledOutputStream::create(std::move(low_level_streams), core_op_handle,
+ edge_layer, std::move(core_op_activated_event), core_ops_scheduler);
+ CHECK_EXPECTED(stream);
+ return std::unique_ptr<VDeviceOutputStreamBase>(stream.release());
+ }
} else {
- local_vdevice_stream = make_unique_nothrow<OutputVDeviceNativeStream>(std::move(low_level_streams), edge_layer,
- std::move(core_op_activated_event), status);
+ if ((stream_params.flags & HAILO_STREAM_FLAGS_ASYNC) != 0) {
+ auto stream = VDeviceNativeAsyncOutputStream::create(std::move(low_level_streams),
+ std::move(core_op_activated_event), edge_layer);
+ CHECK_EXPECTED(stream);
+ return std::unique_ptr<VDeviceOutputStreamBase>(stream.release());
+ } else {
+ auto stream = VDeviceNativeOutputStream::create(std::move(low_level_streams),
+ std::move(core_op_activated_event), edge_layer);
+ CHECK_EXPECTED(stream);
+ return std::unique_ptr<VDeviceOutputStreamBase>(stream.release());
+ }
}
-
- CHECK_AS_EXPECTED((nullptr != local_vdevice_stream), HAILO_OUT_OF_HOST_MEMORY);
- CHECK_SUCCESS_AS_EXPECTED(status);
-
- return local_vdevice_stream;
}
-hailo_status OutputVDeviceBaseStream::set_timeout(std::chrono::milliseconds timeout)
+hailo_status VDeviceOutputStreamBase::set_timeout(std::chrono::milliseconds timeout)
{
- for (auto &stream : m_streams) {
+ for (const auto &pair : m_streams) {
+ auto &stream = pair.second;
auto status = stream.get().set_timeout(timeout);
CHECK_SUCCESS(status, "Failed to set timeout to output stream. (device: {})", stream.get().get_dev_id());
}
return HAILO_SUCCESS;
}
-std::chrono::milliseconds OutputVDeviceBaseStream::get_timeout() const
+std::chrono::milliseconds VDeviceOutputStreamBase::get_timeout() const
{
// All timeout values of m_streams should be the same
- return m_streams[0].get().get_timeout();
+ return m_streams.begin()->second.get().get_timeout();
}
-hailo_stream_interface_t OutputVDeviceBaseStream::get_interface() const
+hailo_stream_interface_t VDeviceOutputStreamBase::get_interface() const
{
// All interface values of m_streams should be the same
- return m_streams[0].get().get_interface();
+ return m_streams.begin()->second.get().get_interface();
}
-hailo_status ScheduledOutputStream::abort()
+Expected<size_t> VDeviceOutputStreamBase::get_buffer_frames_size() const
{
- return abort_impl(m_core_op_handle);
+ return m_streams.begin()->second.get().get_buffer_frames_size();
}
-hailo_status OutputVDeviceNativeStream::abort()
-{
- auto status = HAILO_SUCCESS; // Best effort
- for (auto &stream : m_streams) {
- auto abort_status = stream.get().abort();
- if (HAILO_SUCCESS != status) {
- LOGGER__ERROR("Failed to abort output stream. (status: {} device: {})", status, stream.get().get_dev_id());
- status = abort_status;
- }
- }
-
- return status;
-}
-
-hailo_status ScheduledOutputStream::abort_impl(scheduler_core_op_handle_t core_op_handle)
-{
- auto status = HAILO_SUCCESS; // Best effort
- for (auto& stream : m_streams) {
- auto abort_status = stream.get().abort();
- if (HAILO_SUCCESS != status) {
- LOGGER__ERROR("Failed to abort output stream. (status: {} device: {})", status, stream.get().get_dev_id());
- status = abort_status;
- }
- }
-
- auto core_ops_scheduler = m_core_ops_scheduler.lock();
- CHECK(core_ops_scheduler, HAILO_INTERNAL_FAILURE);
-
- auto disable_status = core_ops_scheduler->disable_stream(core_op_handle, name());
- if (HAILO_SUCCESS != disable_status) {
- LOGGER__ERROR("Failed to disable stream in the core-op scheduler. (status: {})", disable_status);
- status = disable_status;
- }
-
- return status;
-}
-
-hailo_status ScheduledOutputStream::clear_abort()
-{
- return clear_abort_impl(m_core_op_handle);
-}
-
-hailo_status OutputVDeviceNativeStream::clear_abort()
-{
- auto status = HAILO_SUCCESS; // Best effort
- for (auto &stream : m_streams) {
- auto clear_abort_status = stream.get().clear_abort();
- if ((HAILO_SUCCESS != clear_abort_status) && (HAILO_STREAM_NOT_ACTIVATED != clear_abort_status)) {
- LOGGER__ERROR("Failed to clear abort output stream. (status: {} device: {})", clear_abort_status, stream.get().get_dev_id());
- status = clear_abort_status;
- }
- }
-
- return status;
-}
-
-hailo_status ScheduledOutputStream::clear_abort_impl(scheduler_core_op_handle_t core_op_handle)
-{
- auto status = HAILO_SUCCESS; // Best effort
- for (auto& stream : m_streams) {
- auto clear_abort_status = stream.get().clear_abort();
- if ((HAILO_SUCCESS != clear_abort_status) && (HAILO_STREAM_NOT_ACTIVATED != clear_abort_status)) {
- LOGGER__ERROR("Failed to clear abort output stream. (status: {} device: {})", clear_abort_status, stream.get().get_dev_id());
- status = clear_abort_status;
- }
- }
-
- auto core_ops_scheduler = m_core_ops_scheduler.lock();
- CHECK(core_ops_scheduler, HAILO_INTERNAL_FAILURE);
-
- auto enable_status = core_ops_scheduler->enable_stream(core_op_handle, name());
- if (HAILO_SUCCESS != enable_status) {
- LOGGER__ERROR("Failed to enable stream in the core-op scheduler. (status: {})", enable_status);
- status = enable_status;
- }
-
- return status;
-}
-
-Expected<size_t> OutputVDeviceBaseStream::get_buffer_frames_size() const
-{
- size_t total_buffers_size = 0;
- for (auto &stream : m_streams) {
- auto stream_buffer_size = stream.get().get_buffer_frames_size();
- if (HAILO_NOT_AVAILABLE == stream_buffer_size.status()) {
- return make_unexpected(HAILO_NOT_AVAILABLE);
- }
- CHECK_EXPECTED(stream_buffer_size);
- total_buffers_size += stream_buffer_size.value();
- }
-
- return total_buffers_size;
-}
-
-Expected<size_t> OutputVDeviceBaseStream::get_pending_frames_count() const
+Expected<size_t> VDeviceOutputStreamBase::get_pending_frames_count() const
{
size_t total_pending_frames_count = 0;
- for (auto &stream : m_streams) {
+ for (const auto &pair : m_streams) {
+ auto &stream = pair.second;
auto stream_pending_frames_count = stream.get().get_pending_frames_count();
if (HAILO_NOT_AVAILABLE == stream_pending_frames_count.status()) {
return make_unexpected(HAILO_NOT_AVAILABLE);
}
- CHECK_EXPECTED(stream_pending_frames_count);
- total_pending_frames_count += stream_pending_frames_count.value();
+ CHECK_EXPECTED(stream_pending_frames_count);
+ total_pending_frames_count += stream_pending_frames_count.value();
}
-
return total_pending_frames_count;
}
* @file vdevice_stream.hpp
* @brief Internal stream implementation for VDevice
*
- * InputStream (External "interface")
- * |-- InputStreamBase (Base class)
- * |-- InputVDeviceBaseStream (Base class for vdevice streams)
- * | |-- InputVDeviceNativeStream
- * | |-- ScheduledInputStream
+ * InputStream (External "interface")
+ * |-- InputStreamBase (Base class)
+ * |-- VDeviceInputStreamBase (Base class for vdevice streams)
+ * | |-- VDeviceNativeInputStreamBase
+ * | | |-- VDeviceNativeInputStream (Sync api)
+ * | | |-- VDeviceNativeAsyncInputStream (Async api)
+ * | |-- ScheduledInputStreamBase
+ * | | |-- ScheduledInputStream (Sync api)
+ * | | |-- ScheduledAsyncInputStream (Async api)
*
- * OutputStream (External "interface")
- * |-- OutputStreamBase (Base class)
- * |-- OutputVDeviceBaseStream (Base class for vdevice streams)
- * | |-- OutputVDeviceNativeStream
- * | |-- ScheduledOutputStream
+ * OutputStream (External "interface")
+ * |-- OutputStreamBase (Base class)
+ * |-- VDeviceOutputStreamBase (Base class for vdevice streams)
+ * | |-- VDeviceNativeOutputStreamBase
+ * | | |-- VDeviceNativeOutputStream (Sync api)
+ * | | |-- VDeviceNativeAsyncOutputStream (Async api)
+ * | |-- ScheduledOutputStreamBase
+ * | | |-- ScheduledOutputStream (Sync api)
+ * | | |-- ScheduledAsyncOutputStream (Async api)
**/
#ifndef HAILO_VDEVICE_STREAM_HPP_
namespace hailort
{
-class InputVDeviceBaseStream : public InputStreamBase {
+class VDeviceInputStreamBase : public InputStreamBase {
public:
- static Expected<std::unique_ptr<InputVDeviceBaseStream>> create(std::vector<std::reference_wrapper<VdmaInputStream>> &&low_level_streams,
- const LayerInfo &edge_layer, const scheduler_core_op_handle_t &core_op_handle,
- EventPtr core_op_activated_event, CoreOpsSchedulerWeakPtr core_ops_scheduler);
+ static Expected<std::unique_ptr<VDeviceInputStreamBase>> create(
+ std::map<device_id_t, std::reference_wrapper<VdmaInputStreamBase>> &&low_level_streams,
+ const hailo_stream_parameters_t &stream_params, const LayerInfo &edge_layer,
+ const scheduler_core_op_handle_t &core_op_handle, EventPtr core_op_activated_event,
+ CoreOpsSchedulerWeakPtr core_ops_scheduler);
- virtual ~InputVDeviceBaseStream();
+ virtual ~VDeviceInputStreamBase();
virtual hailo_status activate_stream(uint16_t dynamic_batch_size, bool resume_pending_stream_transfers) override;
virtual hailo_status deactivate_stream() override;
virtual std::chrono::milliseconds get_timeout() const override;
virtual hailo_status set_timeout(std::chrono::milliseconds timeout) override;
- virtual hailo_status send_pending_buffer(size_t device_index = 0) override;
+ virtual hailo_status send_pending_buffer(const device_id_t &device_id) override;
virtual Expected<size_t> get_buffer_frames_size() const override;
virtual Expected<size_t> get_pending_frames_count() const override;
virtual bool is_scheduled() override = 0;
virtual hailo_status abort() override = 0;
virtual hailo_status clear_abort() override = 0;
-
- virtual hailo_status register_interrupt_callback(const vdma::ProcessingCompleteCallback &callback) override
- {
- for (auto &stream : m_streams) {
- auto status = stream.get().register_interrupt_callback(callback);
- CHECK_SUCCESS(status);
- }
- return HAILO_SUCCESS;
- }
+ virtual hailo_status flush() override;
virtual void notify_all()
{
}
protected:
- virtual hailo_status sync_write_all_raw_buffer_no_transform_impl(void *buffer, size_t offset, size_t size) override;
- virtual Expected<size_t> sync_write_raw_buffer(const MemoryView &buffer) override
- {
- return sync_write_raw_buffer(buffer, []() { return false; });
- }
- virtual Expected<size_t> sync_write_raw_buffer(const MemoryView &buffer, const std::function<bool()> &should_cancel) = 0;
+ virtual hailo_status write_impl(const MemoryView &buffer) final override;
+ virtual hailo_status write_impl(const MemoryView &buffer, const std::function<bool()> &should_cancel) = 0;
- explicit InputVDeviceBaseStream(
- std::vector<std::reference_wrapper<VdmaInputStream>> &&streams,
+ VDeviceInputStreamBase(
+ std::map<device_id_t, std::reference_wrapper<VdmaInputStreamBase>> &&streams,
EventPtr &&core_op_activated_event,
const LayerInfo &layer_info,
hailo_status &status) :
- InputStreamBase(layer_info, streams[0].get().get_interface(), std::move(core_op_activated_event), status),
+ InputStreamBase(layer_info, streams.begin()->second.get().get_interface(), std::move(core_op_activated_event), status),
m_streams(std::move(streams)),
m_is_stream_activated(false),
- m_next_transfer_stream_index(0),
+ m_next_transfer_stream(m_streams.begin()->first),
m_acc_frames(0)
{}
- std::vector<std::reference_wrapper<VdmaInputStream>> m_streams;
+ std::map<device_id_t, std::reference_wrapper<VdmaInputStreamBase>> m_streams;
bool m_is_stream_activated;
- uint32_t m_next_transfer_stream_index;
+ device_id_t m_next_transfer_stream;
uint32_t m_acc_frames;
private:
friend class VDeviceInputStreamMultiplexerWrapper;
-
- virtual hailo_status flush() override;
};
-class OutputVDeviceBaseStream : public OutputStreamBase {
+class VDeviceOutputStreamBase : public OutputStreamBase {
public:
- virtual ~OutputVDeviceBaseStream();
+ virtual ~VDeviceOutputStreamBase();
- static Expected<std::unique_ptr<OutputVDeviceBaseStream>> create(std::vector<std::reference_wrapper<VdmaOutputStream>> &&low_level_streams,
- const LayerInfo &edge_layer, const scheduler_core_op_handle_t &core_op_handle,
- EventPtr core_op_activated_event, CoreOpsSchedulerWeakPtr core_ops_scheduler);
+ static Expected<std::unique_ptr<VDeviceOutputStreamBase>> create(
+ std::map<device_id_t, std::reference_wrapper<VdmaOutputStreamBase>> &&low_level_streams,
+ const hailo_stream_parameters_t &stream_params, const LayerInfo &edge_layer,
+ const scheduler_core_op_handle_t &core_op_handle, EventPtr core_op_activated_event,
+ CoreOpsSchedulerWeakPtr core_ops_scheduler);
virtual hailo_status activate_stream(uint16_t dynamic_batch_size, bool resume_pending_stream_transfers) override;
virtual hailo_status deactivate_stream() override;
virtual std::chrono::milliseconds get_timeout() const override;
virtual hailo_status set_timeout(std::chrono::milliseconds timeout) override;
virtual Expected<size_t> get_buffer_frames_size() const override;
- virtual Expected<size_t> get_pending_frames_count() const override;
+ virtual Expected<size_t> get_pending_frames_count() const override; // Returns the accumulated pending frames
virtual hailo_status abort() override = 0;
virtual hailo_status clear_abort() override = 0;
virtual bool is_scheduled() override = 0;
- virtual hailo_status register_interrupt_callback(const vdma::ProcessingCompleteCallback &callback) override
- {
- for (auto &stream : m_streams) {
- auto status = stream.get().register_interrupt_callback(callback);
- CHECK_SUCCESS(status);
- }
- return HAILO_SUCCESS;
- }
-
protected:
- virtual Expected<size_t> sync_read_raw_buffer(MemoryView &buffer) override;
-
- explicit OutputVDeviceBaseStream(
- std::vector<std::reference_wrapper<VdmaOutputStream>> &&streams,
+ VDeviceOutputStreamBase(
+ std::map<device_id_t, std::reference_wrapper<VdmaOutputStreamBase>> &&streams,
const LayerInfo &layer_info,
EventPtr &&core_op_activated_event,
hailo_status &status) :
- OutputStreamBase(layer_info, std::move(core_op_activated_event), status),
+ OutputStreamBase(layer_info, streams.begin()->second.get().get_interface(),
+ std::move(core_op_activated_event), status),
m_streams(std::move(streams)),
m_is_stream_activated(false),
- m_next_transfer_stream_index(0),
+ m_next_transfer_stream(m_streams.begin()->first),
m_acc_frames(0)
{}
- virtual hailo_status read_all(MemoryView &buffer) override;
+ virtual hailo_status read_impl(MemoryView &buffer) override final;
- std::vector<std::reference_wrapper<VdmaOutputStream>> m_streams;
+ std::map<device_id_t, std::reference_wrapper<VdmaOutputStreamBase>> m_streams;
bool m_is_stream_activated;
- uint32_t m_next_transfer_stream_index;
+ device_id_t m_next_transfer_stream;
uint32_t m_acc_frames;
private:
}
*m_is_aborted = true;
- if (is_scheduled()) {
- auto status = m_multiplexer->disable_stream(m_core_op_multiplexer_handle, name());
- CHECK_SUCCESS(status);
-
- m_vdevice_input_stream->notify_all();
-
- status = m_multiplexer->run_once_for_stream(name(), INPUT_RUN_ONCE_HANDLE__ABORT, m_core_op_multiplexer_handle);
- CHECK_SUCCESS(status);
+ auto status = m_multiplexer->disable_stream(m_core_op_multiplexer_handle, name());
+ CHECK_SUCCESS(status);
- return HAILO_SUCCESS;
- }
+ m_vdevice_input_stream->notify_all();
- auto status = m_vdevice_input_stream->abort();
+ status = m_multiplexer->run_once_for_stream(name(), INPUT_RUN_ONCE_HANDLE__ABORT, m_core_op_multiplexer_handle);
CHECK_SUCCESS(status);
return HAILO_SUCCESS;
}
*m_is_aborted = false;
- if (is_scheduled()) {
- auto status = m_multiplexer->enable_stream(m_core_op_multiplexer_handle, name());
- CHECK_SUCCESS(status);
-
- status = m_multiplexer->run_once_for_stream(name(), INPUT_RUN_ONCE_HANDLE__CLEAR_ABORT, m_core_op_multiplexer_handle);
- CHECK_SUCCESS(status);
-
- m_vdevice_input_stream->notify_all();
-
- return HAILO_SUCCESS;
- }
+ auto status = m_multiplexer->enable_stream(m_core_op_multiplexer_handle, name());
+ CHECK_SUCCESS(status);
- auto status = m_vdevice_input_stream->clear_abort();
+ status = m_multiplexer->run_once_for_stream(name(), INPUT_RUN_ONCE_HANDLE__CLEAR_ABORT, m_core_op_multiplexer_handle);
CHECK_SUCCESS(status);
+ m_vdevice_input_stream->notify_all();
+
return HAILO_SUCCESS;
}
bool VDeviceInputStreamMultiplexerWrapper::is_scheduled()
{
- return m_vdevice_input_stream->is_scheduled();
+ // Multiplexer can only work with scheduler
+ assert(m_vdevice_input_stream->is_scheduled());
+ return true;
}
-hailo_status VDeviceInputStreamMultiplexerWrapper::send_pending_buffer(size_t device_index)
+hailo_status VDeviceInputStreamMultiplexerWrapper::send_pending_buffer(const device_id_t &device_id)
{
- return m_vdevice_input_stream->send_pending_buffer(device_index);
+ return m_vdevice_input_stream->send_pending_buffer(device_id);
}
Expected<size_t> VDeviceInputStreamMultiplexerWrapper::get_buffer_frames_size() const
return m_vdevice_input_stream->get_pending_frames_count();
}
-Expected<size_t> VDeviceInputStreamMultiplexerWrapper::sync_write_raw_buffer(const MemoryView &buffer)
+hailo_status VDeviceInputStreamMultiplexerWrapper::write_impl(const MemoryView &buffer)
{
- if (is_scheduled()) {
- auto status = m_multiplexer->wait_for_write(m_core_op_multiplexer_handle);
- if (HAILO_STREAM_ABORTED_BY_USER == status) {
- return make_unexpected(status);
- }
- CHECK_SUCCESS_AS_EXPECTED(status);
+ auto status = m_multiplexer->wait_for_write(m_core_op_multiplexer_handle);
+ if (HAILO_STREAM_ABORTED_BY_USER == status) {
+ return status;
}
+ CHECK_SUCCESS(status);
- auto exp = m_vdevice_input_stream->sync_write_raw_buffer(buffer, [this]() { return m_is_aborted->load(); });
- if (is_scheduled()) {
- auto status = m_multiplexer->signal_write_finish(m_core_op_multiplexer_handle, exp.status() != HAILO_SUCCESS);
- CHECK_SUCCESS_AS_EXPECTED(status);
- }
- if (HAILO_STREAM_ABORTED_BY_USER == exp.status()) {
- return make_unexpected(exp.status());
+ auto write_status = m_vdevice_input_stream->write_impl(buffer, [this]() { return m_is_aborted->load(); });
+ status = m_multiplexer->signal_write_finish(m_core_op_multiplexer_handle, write_status != HAILO_SUCCESS);
+ CHECK_SUCCESS(status);
+ if (HAILO_STREAM_ABORTED_BY_USER == write_status) {
+ return write_status;
}
- CHECK_EXPECTED(exp);
-
- return exp;
-}
-
-hailo_status VDeviceInputStreamMultiplexerWrapper::sync_write_all_raw_buffer_no_transform_impl(void *buffer, size_t offset, size_t size)
-{
- ASSERT(NULL != buffer);
+ CHECK_SUCCESS(write_status);
- return sync_write_raw_buffer(MemoryView(static_cast<uint8_t*>(buffer) + offset, size)).status();
+ return HAILO_SUCCESS;
}
hailo_status VDeviceInputStreamMultiplexerWrapper::set_timeout(std::chrono::milliseconds timeout)
hailo_status VDeviceInputStreamMultiplexerWrapper::flush()
{
- if (is_scheduled()) {
- auto status = m_multiplexer->run_once_for_stream(name(), INPUT_RUN_ONCE_HANDLE__FLUSH, m_core_op_multiplexer_handle);
- CHECK_SUCCESS(status);
-
- return HAILO_SUCCESS;
- }
-
- return m_vdevice_input_stream->flush();
+ return m_multiplexer->run_once_for_stream(name(), INPUT_RUN_ONCE_HANDLE__FLUSH, m_core_op_multiplexer_handle);
}
-Expected<std::unique_ptr<VDeviceInputStreamMultiplexerWrapper>> VDeviceInputStreamMultiplexerWrapper::create(std::shared_ptr<InputVDeviceBaseStream> vdevice_input_stream,
+Expected<std::unique_ptr<VDeviceInputStreamMultiplexerWrapper>> VDeviceInputStreamMultiplexerWrapper::create(std::shared_ptr<VDeviceInputStreamBase> vdevice_input_stream,
std::string network_name, std::shared_ptr<PipelineMultiplexer> multiplexer, scheduler_core_op_handle_t core_ops_scheduler_handle,
multiplexer_core_op_handle_t core_op_multiplexer_handle)
{
+ assert(vdevice_input_stream->is_scheduled());
hailo_status status = HAILO_UNINITIALIZED;
std::unique_ptr<VDeviceInputStreamMultiplexerWrapper> wrapper(new (std::nothrow) VDeviceInputStreamMultiplexerWrapper(vdevice_input_stream, network_name, multiplexer,
core_ops_scheduler_handle, core_op_multiplexer_handle, status));
return wrapper;
}
-VDeviceInputStreamMultiplexerWrapper::VDeviceInputStreamMultiplexerWrapper(std::shared_ptr<InputVDeviceBaseStream> &vdevice_input_stream,
+VDeviceInputStreamMultiplexerWrapper::VDeviceInputStreamMultiplexerWrapper(std::shared_ptr<VDeviceInputStreamBase> &vdevice_input_stream,
std::string network_name, std::shared_ptr<PipelineMultiplexer> multiplexer, scheduler_core_op_handle_t core_ops_scheduler_handle,
multiplexer_core_op_handle_t core_op_multiplexer_handle, hailo_status &status) :
InputStreamBase(vdevice_input_stream->get_info(),
return m_vdevice_output_stream->get_timeout();
}
+hailo_status VDeviceOutputStreamMultiplexerWrapper::set_next_device_to_read(const device_id_t &device_id)
+{
+ return m_vdevice_output_stream->set_next_device_to_read(device_id);
+}
+
hailo_status VDeviceOutputStreamMultiplexerWrapper::abort()
{
if (*m_is_aborted) {
}
*m_is_aborted = true;
- if (is_scheduled()) {
- auto status = m_multiplexer->disable_stream(m_core_op_multiplexer_handle, name());
- CHECK_SUCCESS(status);
-
- status = m_multiplexer->run_once_for_stream(name(), OUTPUT_RUN_ONCE_HANDLE__ABORT, m_core_op_multiplexer_handle);
- CHECK_SUCCESS(status);
-
- return HAILO_SUCCESS;
- }
+ auto status = m_multiplexer->disable_stream(m_core_op_multiplexer_handle, name());
+ CHECK_SUCCESS(status);
- auto status = m_vdevice_output_stream->abort();
+ status = m_multiplexer->run_once_for_stream(name(), OUTPUT_RUN_ONCE_HANDLE__ABORT, m_core_op_multiplexer_handle);
CHECK_SUCCESS(status);
return HAILO_SUCCESS;
}
*m_is_aborted = false;
- if (is_scheduled()) {
- auto status = m_multiplexer->enable_stream(m_core_op_multiplexer_handle, name());
- CHECK_SUCCESS(status);
-
- status = m_multiplexer->run_once_for_stream(name(), OUTPUT_RUN_ONCE_HANDLE__CLEAR_ABORT, m_core_op_multiplexer_handle);
- CHECK_SUCCESS(status);
-
- return HAILO_SUCCESS;
- }
+ auto status = m_multiplexer->enable_stream(m_core_op_multiplexer_handle, name());
+ CHECK_SUCCESS(status);
- auto status = m_vdevice_output_stream->clear_abort();
+ status = m_multiplexer->run_once_for_stream(name(), OUTPUT_RUN_ONCE_HANDLE__CLEAR_ABORT, m_core_op_multiplexer_handle);
CHECK_SUCCESS(status);
return HAILO_SUCCESS;
bool VDeviceOutputStreamMultiplexerWrapper::is_scheduled()
{
- return m_vdevice_output_stream->is_scheduled();
+ // Multiplexer can only work with scheduler
+ assert(m_vdevice_output_stream->is_scheduled());
+ return true;
}
Expected<size_t> VDeviceOutputStreamMultiplexerWrapper::get_buffer_frames_size() const
return m_vdevice_output_stream->get_pending_frames_count();
}
-Expected<size_t> VDeviceOutputStreamMultiplexerWrapper::sync_read_raw_buffer(MemoryView &buffer)
-{
- return m_vdevice_output_stream->sync_read_raw_buffer(buffer);
-}
-
-hailo_status VDeviceOutputStreamMultiplexerWrapper::read_all(MemoryView &buffer)
+hailo_status VDeviceOutputStreamMultiplexerWrapper::read_impl(MemoryView &buffer)
{
- return m_vdevice_output_stream->read_all(buffer);
+ return m_vdevice_output_stream->read_impl(buffer);
}
hailo_status VDeviceOutputStreamMultiplexerWrapper::read(MemoryView buffer)
{
uint32_t frames_to_drain_count = 0;
- if (is_scheduled()) {
- auto expected_drain_count = m_multiplexer->wait_for_read(m_core_op_multiplexer_handle, name(),
- m_vdevice_output_stream->get_timeout());
- if (HAILO_STREAM_ABORTED_BY_USER == expected_drain_count.status()) {
- return expected_drain_count.status();
- }
- CHECK_EXPECTED_AS_STATUS(expected_drain_count);
-
- frames_to_drain_count = expected_drain_count.release();
+ auto expected_drain_count = m_multiplexer->wait_for_read(m_core_op_multiplexer_handle, name(),
+ m_vdevice_output_stream->get_timeout());
+ if (HAILO_STREAM_ABORTED_BY_USER == expected_drain_count.status()) {
+ return expected_drain_count.status();
}
+ CHECK_EXPECTED_AS_STATUS(expected_drain_count);
+
+ frames_to_drain_count = expected_drain_count.release();
for (uint32_t i = 0; i < frames_to_drain_count; i++) {
auto status = m_vdevice_output_stream->read(buffer);
}
CHECK_SUCCESS(status);
- if (is_scheduled()) {
- status = m_multiplexer->signal_read_finish();
- CHECK_SUCCESS(status);
- }
+ status = m_multiplexer->signal_read_finish();
+ CHECK_SUCCESS(status);
return HAILO_SUCCESS;
}
return m_vdevice_output_stream->set_timeout(timeout);
}
-Expected<std::unique_ptr<VDeviceOutputStreamMultiplexerWrapper>> VDeviceOutputStreamMultiplexerWrapper::create(std::shared_ptr<OutputVDeviceBaseStream> vdevice_output_stream,
+Expected<std::unique_ptr<VDeviceOutputStreamMultiplexerWrapper>> VDeviceOutputStreamMultiplexerWrapper::create(std::shared_ptr<VDeviceOutputStreamBase> vdevice_output_stream,
std::string network_name, std::shared_ptr<PipelineMultiplexer> multiplexer, scheduler_core_op_handle_t core_ops_scheduler_handle,
multiplexer_core_op_handle_t core_op_multiplexer_handle)
{
+ assert(vdevice_output_stream->is_scheduled());
hailo_status status = HAILO_UNINITIALIZED;
std::unique_ptr<VDeviceOutputStreamMultiplexerWrapper> wrapper(new (std::nothrow) VDeviceOutputStreamMultiplexerWrapper(vdevice_output_stream, network_name, multiplexer,
core_ops_scheduler_handle, core_op_multiplexer_handle, status));
return wrapper;
}
-VDeviceOutputStreamMultiplexerWrapper::VDeviceOutputStreamMultiplexerWrapper(std::shared_ptr<OutputVDeviceBaseStream> &vdevice_output_stream,
+VDeviceOutputStreamMultiplexerWrapper::VDeviceOutputStreamMultiplexerWrapper(std::shared_ptr<VDeviceOutputStreamBase> &vdevice_output_stream,
std::string network_name, std::shared_ptr<PipelineMultiplexer> multiplexer, scheduler_core_op_handle_t core_ops_scheduler_handle,
multiplexer_core_op_handle_t core_op_multiplexer_handle, hailo_status &status) :
OutputStreamBase(vdevice_output_stream->get_layer_info(), vdevice_output_stream->get_info(),
class VDeviceInputStreamMultiplexerWrapper : public InputStreamBase {
public:
virtual ~VDeviceInputStreamMultiplexerWrapper() = default;
- static Expected<std::unique_ptr<VDeviceInputStreamMultiplexerWrapper>> create(std::shared_ptr<InputVDeviceBaseStream> vdevice_input_stream,
+ static Expected<std::unique_ptr<VDeviceInputStreamMultiplexerWrapper>> create(std::shared_ptr<VDeviceInputStreamBase> vdevice_input_stream,
std::string network_name, std::shared_ptr<PipelineMultiplexer> multiplexer, scheduler_core_op_handle_t core_ops_scheduler_handle,
multiplexer_core_op_handle_t core_op_multiplexer_handle = 0);
Expected<std::unique_ptr<VDeviceInputStreamMultiplexerWrapper>> clone(multiplexer_core_op_handle_t core_op_multiplexer_handle);
virtual hailo_status clear_abort() override;
virtual bool is_scheduled() override;
- virtual hailo_status send_pending_buffer(size_t device_index = 0) override;
+ virtual hailo_status send_pending_buffer(const device_id_t &device_id) override;
virtual Expected<size_t> get_buffer_frames_size() const override;
virtual Expected<size_t> get_pending_frames_count() const override;
- virtual hailo_status register_interrupt_callback(const vdma::ProcessingCompleteCallback &callback) override
- {
- return m_vdevice_input_stream->register_interrupt_callback(callback);
- }
-
protected:
- virtual Expected<size_t> sync_write_raw_buffer(const MemoryView &buffer) override;
- virtual hailo_status sync_write_all_raw_buffer_no_transform_impl(void *buffer, size_t offset, size_t size) override;
+ virtual hailo_status write_impl(const MemoryView &buffer) override;
private:
- VDeviceInputStreamMultiplexerWrapper(std::shared_ptr<InputVDeviceBaseStream> &vdevice_input_stream,
+ VDeviceInputStreamMultiplexerWrapper(std::shared_ptr<VDeviceInputStreamBase> &vdevice_input_stream,
std::string network_name, std::shared_ptr<PipelineMultiplexer> multiplexer, scheduler_core_op_handle_t core_ops_scheduler_handle,
multiplexer_core_op_handle_t core_op_multiplexer_handle, hailo_status &status);
virtual hailo_status set_timeout(std::chrono::milliseconds timeout) override;
virtual hailo_status flush() override;
-
- std::shared_ptr<InputVDeviceBaseStream> m_vdevice_input_stream;
+
+ std::shared_ptr<VDeviceInputStreamBase> m_vdevice_input_stream;
std::shared_ptr<PipelineMultiplexer> m_multiplexer;
scheduler_core_op_handle_t m_core_ops_scheduler_handle;
multiplexer_core_op_handle_t m_core_op_multiplexer_handle;
public:
virtual ~VDeviceOutputStreamMultiplexerWrapper() noexcept = default;
- static Expected<std::unique_ptr<VDeviceOutputStreamMultiplexerWrapper>> create(std::shared_ptr<OutputVDeviceBaseStream> vdevice_output_stream,
+ static Expected<std::unique_ptr<VDeviceOutputStreamMultiplexerWrapper>> create(std::shared_ptr<VDeviceOutputStreamBase> vdevice_output_stream,
std::string network_name, std::shared_ptr<PipelineMultiplexer> multiplexer, scheduler_core_op_handle_t core_ops_scheduler_handle,
multiplexer_core_op_handle_t core_op_multiplexer_handle = 0);
Expected<std::unique_ptr<VDeviceOutputStreamMultiplexerWrapper>> clone(multiplexer_core_op_handle_t core_op_multiplexer_handle);
virtual hailo_status deactivate_stream() override;
virtual hailo_stream_interface_t get_interface() const override;
virtual std::chrono::milliseconds get_timeout() const override;
+ virtual hailo_status set_next_device_to_read(const device_id_t &device_id) override;
virtual hailo_status abort() override;
virtual hailo_status clear_abort() override;
virtual bool is_scheduled() override;
virtual Expected<size_t> get_buffer_frames_size() const override;
virtual Expected<size_t> get_pending_frames_count() const override;
- virtual hailo_status register_interrupt_callback(const vdma::ProcessingCompleteCallback &callback) override
- {
- return m_vdevice_output_stream->register_interrupt_callback(callback);
- }
-
-protected:
- virtual Expected<size_t> sync_read_raw_buffer(MemoryView &buffer) override;
-
private:
- VDeviceOutputStreamMultiplexerWrapper(std::shared_ptr<OutputVDeviceBaseStream> &vdevice_output_stream,
+ VDeviceOutputStreamMultiplexerWrapper(std::shared_ptr<VDeviceOutputStreamBase> &vdevice_output_stream,
std::string network_name, std::shared_ptr<PipelineMultiplexer> multiplexer, scheduler_core_op_handle_t core_ops_scheduler_handle,
multiplexer_core_op_handle_t core_op_multiplexer_handle, hailo_status &status);
virtual hailo_status set_timeout(std::chrono::milliseconds timeout) override;
- virtual hailo_status read_all(MemoryView &buffer) override;
+ virtual hailo_status read_impl(MemoryView &buffer) override;
virtual hailo_status read(MemoryView buffer) override;
- std::shared_ptr<OutputVDeviceBaseStream> m_vdevice_output_stream;
+ std::shared_ptr<VDeviceOutputStreamBase> m_vdevice_output_stream;
std::shared_ptr<PipelineMultiplexer> m_multiplexer;
scheduler_core_op_handle_t m_core_ops_scheduler_handle;
multiplexer_core_op_handle_t m_core_op_multiplexer_handle;
${CMAKE_CURRENT_SOURCE_DIR}/memory/descriptor_list.cpp
${CMAKE_CURRENT_SOURCE_DIR}/memory/vdma_buffer.cpp
- ${CMAKE_CURRENT_SOURCE_DIR}/memory/dma_mapped_buffer.cpp
- ${CMAKE_CURRENT_SOURCE_DIR}/memory/mapped_buffer_impl.cpp
- ${CMAKE_CURRENT_SOURCE_DIR}/memory/mapped_buffer_factory.cpp
+ ${CMAKE_CURRENT_SOURCE_DIR}/memory/mapped_buffer.cpp
+ ${CMAKE_CURRENT_SOURCE_DIR}/memory/dma_able_buffer.cpp
${CMAKE_CURRENT_SOURCE_DIR}/memory/sg_buffer.cpp
${CMAKE_CURRENT_SOURCE_DIR}/memory/continuous_buffer.cpp
+ ${CMAKE_CURRENT_SOURCE_DIR}/memory/buffer_requirements.cpp
)
set(HAILORT_CPP_SOURCES ${HAILORT_CPP_SOURCES} ${SRC_FILES} PARENT_SCOPE)
status = HAILO_SUCCESS;\r
}\r
\r
-hailo_status AsyncChannel::transfer(std::shared_ptr<DmaMappedBuffer> buffer, const TransferDoneCallback &user_callback, void *opaque)\r
-{\r
- CHECK_NOT_NULL(buffer, HAILO_INVALID_ARGUMENT);\r
- CHECK(0 != buffer->size(), HAILO_INVALID_ARGUMENT);\r
+hailo_status AsyncChannel::transfer_async(TransferRequest &&transfer_request)\r
+{\r
+ CHECK_ARG_NOT_NULL(transfer_request.buffer.data());\r
+ CHECK(0 != transfer_request.buffer.size(), HAILO_INVALID_ARGUMENT, "Buffer is empty (size 0)");\r
+\r
+ auto is_new_mapping = true;\r
+ MappedBufferPtr mapped_buffer = nullptr;\r
+ if (transfer_request.mapped_buffer != nullptr) {\r
+ assert(transfer_request.buffer.data() == transfer_request.mapped_buffer->data());\r
+ assert(transfer_request.buffer.size() == transfer_request.mapped_buffer->size());\r
+ CHECK(transfer_request.mapped_buffer->storage().type() == BufferStorage::Type::DMA, HAILO_INVALID_ARGUMENT,\r
+ "Buffer must be dma-able (provided buffer type {})", transfer_request.mapped_buffer->storage().type());\r
+\r
+ // Map if not already mapped\r
+ const auto mapping_direction = (m_direction == Direction::H2D) ? HAILO_DMA_BUFFER_DIRECTION_H2D : HAILO_DMA_BUFFER_DIRECTION_D2H;\r
+ auto is_new_mapping_exp = transfer_request.mapped_buffer->storage().dma_map(m_driver, mapping_direction);\r
+ CHECK_EXPECTED_AS_STATUS(is_new_mapping_exp);\r
+ is_new_mapping = is_new_mapping_exp.release();\r
+\r
+ auto mapped_buffer_exp = transfer_request.mapped_buffer->storage().get_dma_mapped_buffer(m_driver.device_id());\r
+ CHECK_EXPECTED_AS_STATUS(mapped_buffer_exp);\r
+ mapped_buffer = mapped_buffer_exp.release();\r
+ } else {\r
+ auto mapped_buffer_exp = MappedBuffer::create_shared(m_driver, m_direction,\r
+ transfer_request.buffer.size(), transfer_request.buffer.data());\r
+ CHECK_EXPECTED_AS_STATUS(mapped_buffer_exp);\r
+ mapped_buffer = mapped_buffer_exp.release();\r
+ }\r
+\r
+ if (!is_new_mapping) {\r
+ // The buffer has been previously mapped, so it needs to be sync'd from host to device.\r
+ // * If the buffer is mapped H2D/BOTH, then synchronize will make sure the device "sees" the most "up to date"\r
+ // version of the buffer.\r
+ // * If the buffer is mapped D2H, it might have been changed by the host between the time it was mapped and the\r
+ // current async transfer. Synchronizing will transfer ownership to the device, so that when the transfer is\r
+ // complete, the host will "see" an "up to date" version of the buffer.\r
+ auto status = mapped_buffer->synchronize(HailoRTDriver::DmaSyncDirection::TO_DEVICE);\r
+ CHECK_SUCCESS(status);\r
+ }\r
\r
std::lock_guard<RecursiveSharedMutex> state_guard(m_state->mutex());\r
+ if (!m_state->m_is_channel_activated) {\r
+ return HAILO_STREAM_NOT_ACTIVATED;\r
+ }\r
if (m_state->m_is_aborted) {\r
LOGGER__INFO("Tried to write to aborted channel {}", m_channel_id);\r
return HAILO_STREAM_ABORTED_BY_USER;\r
}\r
\r
- hailo_status status = HAILO_UNINITIALIZED;\r
if (Direction::H2D == m_direction) {\r
- status = transfer_h2d(buffer, user_callback, opaque);\r
+ return transfer_h2d(mapped_buffer, transfer_request.callback);\r
} else {\r
- status = transfer_d2h(buffer, user_callback, opaque);\r
+ return transfer_d2h(mapped_buffer, transfer_request.callback);\r
}\r
-\r
- if (HAILO_STREAM_NOT_ACTIVATED == status) {\r
- LOGGER__INFO("Transfer failed because Channel {} is not activated", m_channel_id);\r
- return HAILO_STREAM_NOT_ACTIVATED;\r
- } \r
- if (HAILO_SUCCESS != status) {\r
- LOGGER__ERROR("Transfer failed for channel {} with status {}", m_channel_id, status);\r
- return status;\r
- }\r
-\r
- return HAILO_SUCCESS;\r
}\r
\r
hailo_status AsyncChannel::cancel_pending_transfers()\r
std::lock_guard<RecursiveSharedMutex> state_guard(m_state->mutex());\r
for (auto &pending_buffer_info : m_state->m_pending_buffers) {\r
if (pending_buffer_info.on_transfer_done) {\r
- pending_buffer_info.on_transfer_done(pending_buffer_info.buffer,\r
- hailo_async_transfer_completion_info_t{HAILO_STREAM_NOT_ACTIVATED},\r
- pending_buffer_info.opaque);\r
- // Release our references to user buffer, callback and opaque\r
+ pending_buffer_info.on_transfer_done(HAILO_STREAM_ABORTED_BY_USER);\r
+ // Release our references to user buffer and callback.\r
pending_buffer_info = PendingBuffer{};\r
} else {\r
LOGGER__WARNING("No transfer done callback found for transfer (channel {}); skipping", m_channel_id);\r
return HAILO_SUCCESS;\r
}\r
\r
-hailo_status AsyncChannel::transfer(void */* buf */, size_t /* count */)\r
+hailo_status AsyncChannel::transfer_sync(void */* buf */, size_t /* count */, std::chrono::milliseconds /* timeout */)\r
{\r
return HAILO_NOT_IMPLEMENTED;\r
}\r
return make_unexpected(HAILO_NOT_IMPLEMENTED);\r
}\r
\r
-hailo_status AsyncChannel::transfer_d2h(std::shared_ptr<DmaMappedBuffer> buffer, const TransferDoneCallback &user_callback, void *opaque)\r
+hailo_status AsyncChannel::transfer_d2h(MappedBufferPtr mapped_buffer, const InternalTransferDoneCallback &callback)\r
{\r
InterruptsDomain first_desc_interrupts_domain = InterruptsDomain::NONE;\r
// Provide FW interrupt only in the end of the last transfer in the batch\r
- InterruptsDomain last_desc_interrupts_domain = (m_state->m_accumulated_transfers + 1 == m_transfers_per_axi_intr) ? \r
+ InterruptsDomain last_desc_interrupts_domain = (m_state->m_accumulated_transfers + 1 == m_transfers_per_axi_intr) ?\r
InterruptsDomain::BOTH : InterruptsDomain::HOST;\r
\r
- const auto status = prepare_descriptors(buffer, user_callback, opaque, first_desc_interrupts_domain, last_desc_interrupts_domain);\r
+ const auto status = prepare_descriptors(mapped_buffer, callback, first_desc_interrupts_domain,\r
+ last_desc_interrupts_domain);\r
+ if (HAILO_QUEUE_IS_FULL == status) {\r
+ return status;\r
+ }\r
CHECK_SUCCESS(status);\r
\r
m_state->m_accumulated_transfers = (m_state->m_accumulated_transfers + 1) % m_transfers_per_axi_intr;\r
return HAILO_SUCCESS;\r
}\r
\r
-hailo_status AsyncChannel::transfer_h2d(std::shared_ptr<DmaMappedBuffer> buffer, const TransferDoneCallback &user_callback, void *opaque)\r
+hailo_status AsyncChannel::transfer_h2d(MappedBufferPtr mapped_buffer, const InternalTransferDoneCallback &callback)\r
{\r
// For h2d, only the host need to get transfer done interrupts\r
InterruptsDomain last_desc_interrupts_domain = InterruptsDomain::HOST;\r
InterruptsDomain first_desc_interrupts_domain = (m_latency_meter != nullptr) ?\r
InterruptsDomain::HOST : InterruptsDomain::NONE;\r
\r
- return prepare_descriptors(buffer, user_callback, opaque, first_desc_interrupts_domain, last_desc_interrupts_domain);\r
+ return prepare_descriptors(mapped_buffer, callback, first_desc_interrupts_domain,\r
+ last_desc_interrupts_domain);\r
}\r
\r
-hailo_status AsyncChannel::prepare_descriptors(std::shared_ptr<DmaMappedBuffer> buffer, const TransferDoneCallback &user_callback,\r
- void *opaque, InterruptsDomain first_desc_interrupts_domain, InterruptsDomain last_desc_interrupts_domain)\r
+hailo_status AsyncChannel::prepare_descriptors(MappedBufferPtr mapped_buffer,\r
+ const InternalTransferDoneCallback &callback, InterruptsDomain first_desc_interrupts_domain,\r
+ InterruptsDomain last_desc_interrupts_domain)\r
{\r
- const auto desired_desc_num = m_desc_list->descriptors_in_buffer(buffer->size());\r
+ assert(mapped_buffer != nullptr);\r
+\r
+ const auto desired_desc_num = m_desc_list->descriptors_in_buffer(mapped_buffer->size());\r
CHECK(desired_desc_num <= MAX_DESCS_COUNT, HAILO_INTERNAL_FAILURE);\r
const uint16_t desc_num = static_cast<uint16_t>(desired_desc_num);\r
\r
- int num_available = get_num_available();\r
- int num_processed = CB_TAIL(m_state->m_descs);\r
- int num_free = CB_AVAIL(m_state->m_descs, num_available, num_processed);\r
+ const auto num_available = get_num_available();\r
+ const auto num_processed = CB_TAIL(m_state->m_descs);\r
+ const auto num_free = CB_AVAIL(m_state->m_descs, num_available, num_processed);\r
if (num_free < desc_num) {\r
- // TODO: do we want to block here?\r
- return HAILO_OUT_OF_DESCRIPTORS;\r
+ return HAILO_QUEUE_IS_FULL;\r
}\r
\r
- const auto status = m_desc_list->configure_to_use_buffer(*buffer, m_channel_id, num_available);\r
+ const auto status = m_desc_list->configure_to_use_buffer(*mapped_buffer, m_channel_id, num_available);\r
CHECK_SUCCESS(status);\r
+\r
if (nullptr != m_latency_meter) {\r
// Program first descriptor\r
m_desc_list->program_single_descriptor((*m_desc_list)[num_available], m_desc_list->desc_page_size(),\r
first_desc_interrupts_domain);\r
}\r
- auto actual_desc_count = m_desc_list->program_last_descriptor(buffer->size(), last_desc_interrupts_domain,\r
- num_available, true);\r
+ auto actual_desc_count = m_desc_list->program_last_descriptor(mapped_buffer->size(), last_desc_interrupts_domain,\r
+ num_available);\r
CHECK_EXPECTED_AS_STATUS(actual_desc_count, "Failed to program desc_list for channel {}", m_channel_id);\r
assert (actual_desc_count.value() == desc_num);\r
- int last_desc_avail = ((num_available + desc_num - 1) & m_state->m_descs.size_mask);\r
+ assert(desc_num > 0);\r
+ const auto last_desc_avail = static_cast<uint16_t>((num_available + desc_num - 1) & m_state->m_descs.size_mask);\r
+\r
+ const auto wrapped_callback = [this, mapped_buffer, callback](hailo_status callback_status) {\r
+ if (HAILO_SUCCESS != callback_status) {\r
+ // No need to sync, just forward the callback.\r
+ callback(callback_status);\r
+ return;\r
+ }\r
\r
- const auto callback = [this, user_callback](std::shared_ptr<DmaMappedBuffer> buffer, const hailo_async_transfer_completion_info_t &status, void *opaque) {\r
- user_callback(buffer, status, opaque);\r
+ // The device may only change the contents of mapped_buffer, if it was mapped in Direction::D2H\r
+ // (not Direction::BOTH because channels are either D2H or H2D). Hence, we don't need to sync H2D\r
+ // buffers to the host (the host's "view" of the buffer is "up to date").\r
+ if (m_direction == Direction::D2H) {\r
+ auto sync_status = mapped_buffer->synchronize(HailoRTDriver::DmaSyncDirection::TO_HOST);\r
+ if (sync_status != HAILO_SUCCESS) {\r
+ LOGGER__ERROR("Failed to sync buffer to host with status {}", sync_status);\r
+ callback_status = sync_status;\r
+ }\r
+ }\r
\r
- // opaque is only for the user callback\r
- static constexpr void *NO_CONTEXT = nullptr;\r
- m_transfer_done_callback(buffer, status, NO_CONTEXT);\r
+ callback(callback_status);\r
};\r
\r
- m_state->add_pending_buffer(num_available, last_desc_avail, m_direction, callback, buffer, opaque);\r
+ m_state->add_pending_buffer(num_available, last_desc_avail, m_direction, wrapped_callback, mapped_buffer);\r
return inc_num_available(desc_num);\r
}\r
\r
static Expected<AsyncChannelPtr> create(vdma::ChannelId channel_id, Direction direction, HailoRTDriver &driver,\r
uint32_t descs_count, uint16_t desc_page_size, const std::string &stream_name = "", LatencyMeterPtr latency_meter = nullptr,\r
uint16_t transfers_per_axi_intr = 1);\r
- \r
+\r
AsyncChannel(vdma::ChannelId channel_id, Direction direction, HailoRTDriver &driver, uint32_t descs_count,\r
uint16_t desc_page_size, const std::string &stream_name, LatencyMeterPtr latency_meter, uint16_t transfers_per_axi_intr,\r
hailo_status &status);\r
virtual hailo_status complete_channel_activation(uint32_t transfer_size, bool resume_pending_transfers) override;\r
virtual hailo_status complete_channel_deactivation() override;\r
\r
- virtual hailo_status transfer(std::shared_ptr<DmaMappedBuffer> buffer, const TransferDoneCallback &user_callback, void *opaque) override;\r
+ virtual hailo_status transfer_async(TransferRequest &&transfer_request) override;\r
virtual hailo_status cancel_pending_transfers() override;\r
\r
- virtual hailo_status transfer(void *buf, size_t count) override;\r
+ virtual hailo_status transfer_sync(void *buf, size_t count, std::chrono::milliseconds timeout) override;\r
// TODO: don't want\r
virtual hailo_status write_buffer(const MemoryView &buffer, std::chrono::milliseconds timeout,\r
const std::function<bool()> &should_cancel) override;\r
virtual Expected<size_t> get_d2h_pending_descs_count() override;\r
\r
private:\r
- hailo_status transfer_d2h(std::shared_ptr<DmaMappedBuffer> buffer, const TransferDoneCallback &user_callback, void *opaque);\r
- hailo_status transfer_h2d(std::shared_ptr<DmaMappedBuffer> buffer, const TransferDoneCallback &user_callback, void *opaque);\r
- hailo_status prepare_descriptors(std::shared_ptr<DmaMappedBuffer> buffer, const TransferDoneCallback &user_callback,\r
- void *opaque, InterruptsDomain first_desc_interrupts_domain, InterruptsDomain last_desc_interrupts_domain);\r
+ hailo_status transfer_d2h(MappedBufferPtr mapped_buffer, const InternalTransferDoneCallback &user_callback);\r
+ hailo_status transfer_h2d(MappedBufferPtr mapped_buffer, const InternalTransferDoneCallback &user_callback);\r
+ hailo_status prepare_descriptors(MappedBufferPtr mapped_buffer, const InternalTransferDoneCallback &user_callback,\r
+ InterruptsDomain first_desc_interrupts_domain, InterruptsDomain last_desc_interrupts_domain);\r
};\r
\r
} /* namespace vdma */\r
status = HAILO_INVALID_ARGUMENT;
return;
}
-
- m_transfer_done_callback = [this](std::shared_ptr<DmaMappedBuffer>, const hailo_async_transfer_completion_info_t &, void *) {
- m_user_interrupt_callback(1);
- };
}
void BoundaryChannel::clear_pending_buffers_descriptors()
hailo_status BoundaryChannel::trigger_channel_completion(uint16_t hw_num_processed)
{
- size_t processed_no = 0;
+ PendingBuffersQueue completed_buffers{PENDING_BUFFERS_SIZE};
{
// NOTE: right now, we can retake the 'completion' descriptor for a new transfer before handling the interrupt.
// we should have our own pointers indicating whats free instead of reading from HW.
- // TODO: consider calculating the last descriptor using the src_desc_avail and src_desc_proc instead of using
- // status?
- // TODO: we might free a pending buffer which we didn't get an interrupt for yet. we should still handle this
- // situation correctly.
-
- std::lock_guard<RecursiveSharedMutex> state_guard(m_state->mutex());
- // Although the hw_num_processed should be a number between 0 and m_descs.size-1, if m_desc.size < 0x10000
- // (the maximum desc size), the actual hw_num_processed is a number between 1 and m_descs.size. Therefore the
- // value can be m_descs.size, in this case we change it to zero.
- hw_num_processed = static_cast<uint16_t>(hw_num_processed & m_state->m_descs.size_mask);
+ std::unique_lock<RecursiveSharedMutex> state_guard(m_state->mutex());
if (m_state->m_is_aborted) {
return HAILO_STREAM_ABORTED_BY_USER;
return HAILO_STREAM_NOT_ACTIVATED;
}
+ // Although the hw_num_processed should be a number between 0 and m_descs.size-1, if m_desc.size < 0x10000
+ // (the maximum desc size), the actual hw_num_processed is a number between 1 and m_descs.size. Therefore the
+ // value can be m_descs.size, in this case we change it to zero.
+ hw_num_processed = static_cast<uint16_t>(hw_num_processed & m_state->m_descs.size_mask);
+
if (m_latency_meter != nullptr) {
// The latency meter gets an updated hw_num_processed via a call to vdma_interrupts_read_timestamps
// (the desc index of the last measured timestamp returned from that ioctl). Since update_latency_meter
hw_num_processed = latency_meter_hw_num_processed.value();
}
- const auto last_num_processed = static_cast<uint16_t>(CB_TAIL(m_state->m_descs));
+ const auto previous_num_processed = static_cast<uint16_t>(CB_TAIL(m_state->m_descs));
- // Calculate pending_buffers_count before iteration, because the iteration removes done transfers
+ // Calculate pending_buffers_count before iteration, because the iteration removes done transfers.
const auto pending_buffers_count = m_state->m_pending_buffers.size();
for (size_t i = 0; i < pending_buffers_count; i++) {
- auto &last_pending_buffer_info = m_state->m_pending_buffers.front();
- const auto last_desc_index = static_cast<uint16_t>(last_pending_buffer_info.last_desc);
- // Transfer is complete if its last descriptor is in [last_num_processed, hw_num_processed) or
- // the the buffer is empty (hw_num_processed == get_num_available())
- const bool is_complete = is_desc_between(last_num_processed, hw_num_processed, last_desc_index) ||
- (hw_num_processed == get_num_available());
-
- #ifndef NDEBUG
- static constexpr auto STATUS_MASK = 0xFF;
- static constexpr auto ERROR_BIT = 1;
- const auto status = (*m_desc_list)[last_desc_index].RemainingPageSize_Status & STATUS_MASK;
- CHECK(!is_bit_set(status, ERROR_BIT), HAILO_INTERNAL_FAILURE,
- "Error while processing descriptor {} of DMA {} on board {}.",
- last_desc_index, m_channel_id, m_driver.dev_path());
-
- // status is read after hw_num_processed, so we want is_complete -> (status == 1).
- assert(!is_complete || ((status & 0x1) == 1));
- #endif
-
- if (!is_complete) {
+ if (!is_complete(m_state->m_pending_buffers.front(), previous_num_processed, hw_num_processed)) {
break;
}
- // Clear relevant descriptors from previous transfer
- if (nullptr != m_latency_meter) {
- const auto latency_desc_index = last_pending_buffer_info.latency_measure_desc;
- m_desc_list->clear_descriptor(latency_desc_index);
- }
- m_desc_list->clear_descriptor(last_desc_index);
-
- _CB_SET(m_state->m_descs.tail, (last_pending_buffer_info.last_desc + 1) & m_state->m_descs.size_mask);
- last_pending_buffer_info.on_transfer_done(last_pending_buffer_info.buffer,
- hailo_async_transfer_completion_info_t{HAILO_SUCCESS}, last_pending_buffer_info.opaque);
- processed_no++;
+ // Move item from pending_buffers to completed_buffers
+ completed_buffers.push_back(std::move(m_state->m_pending_buffers.front()));
m_state->m_pending_buffers.pop_front();
}
}
- if (0 < processed_no) {
+ // completed_buffers were copied from m_pending_buffers inside the lock. Now we are free to process them and call
+ // the right completion callbacks without state mutex held.
+ for (auto &pending_buffer : completed_buffers) {
+ on_pending_buffer_irq(pending_buffer);
+ }
+
+ if (!completed_buffers.empty()) {
m_state->transfer_buffer_cv().notify_all();
}
return HAILO_SUCCESS;
}
-hailo_status BoundaryChannel::register_interrupt_callback(const ProcessingCompleteCallback &callback)
+void BoundaryChannel::register_interrupt_callback(const ProcessingCompleteCallback &callback)
{
std::lock_guard<RecursiveSharedMutex> state_guard(m_state->mutex());
m_user_interrupt_callback = callback;
- return HAILO_SUCCESS;
}
CONTROL_PROTOCOL__host_buffer_info_t BoundaryChannel::get_boundary_buffer_info(uint32_t transfer_size)
hailo_status BoundaryChannel::deactivate()
{
std::unique_lock<RecursiveSharedMutex> state_guard(m_state->mutex());
+ {
+ CHECK(m_state->m_is_channel_activated, HAILO_INTERNAL_FAILURE,
+ "Vdma channel {} is not activated", m_channel_id);
+ m_state->m_is_channel_activated = false;
- CHECK(m_state->m_is_channel_activated, HAILO_INTERNAL_FAILURE,
- "Vdma channel {} is not activated", m_channel_id);
- m_state->m_is_channel_activated = false;
-
- // Reset the user callback, so as not to keep objects provided by the user alive (they may lead to a chain of refs
- // back to this channel causing it to be leaked).
- // Note: PendingBuffers held by m_pending_buffers may still hold copies of the current m_transfer_done_callback,
- // which in turn holds a reference to *this. Since we stop the m_wait_interrupts_thread there's no risk that
- // these callbacks will be called and we don't need to reset this callback.
- m_user_interrupt_callback = ignore_processing_complete;
+ // Note: PendingBuffers held by m_pending_buffers may still hold copies of the current m_transfer_done_callback,
+ // which in turn holds a reference to *this. Since we stop the m_wait_interrupts_thread there's no risk that
+ // these callbacks will be called and we don't need to reset this callback.
- auto status = complete_channel_deactivation();
- CHECK_SUCCESS(status);
+ auto status = complete_channel_deactivation();
+ CHECK_SUCCESS(status);
+ }
+ m_state->m_can_transfer_buffer_cv.notify_all();
return HAILO_SUCCESS;
}
return m_type;
}
+hailo_status BoundaryChannel::set_transfers_per_axi_intr(uint16_t transfers_per_axi_intr)
+{
+ CHECK(0 != transfers_per_axi_intr, HAILO_INVALID_ARGUMENT, "Invalid transfers per axi interrupt");
+ m_transfers_per_axi_intr = transfers_per_axi_intr;
+ return HAILO_SUCCESS;
+}
+
hailo_status BoundaryChannel::flush(const std::chrono::milliseconds &timeout)
{
if (Direction::D2H == m_direction) {
status = HAILO_STREAM_ABORTED_BY_USER;
return true; // return true so that the wait will finish
}
+ if (!m_state->m_is_channel_activated) {
+ status = HAILO_STREAM_NOT_ACTIVATED;
+ return true; // return true so that the wait will finish
+ }
return m_state->m_pending_buffers.empty();
});
CHECK(was_successful, HAILO_TIMEOUT, "Got HAILO_TIMEOUT while waiting for channel {} interrupts on flush", m_channel_id);
if (desc_num == m_state->m_descs.size) {
// Special case when the checking if the buffer is empty
- return num_available == num_processed;
+ return num_available == num_processed;
}
int num_free = CB_AVAIL(m_state->m_descs, num_available, num_processed);
return true;
}
-hailo_status BoundaryChannel::wait(size_t buffer_size, std::chrono::milliseconds timeout)
+hailo_status BoundaryChannel::wait(size_t buffer_size, std::chrono::milliseconds timeout,
+ bool stop_if_deactivated)
{
+ std::unique_lock<RecursiveSharedMutex> state_guard(m_state->mutex());
+ assert(state_guard.owns_lock());
+
const auto max_transfer_size = m_desc_list->desc_page_size() * m_desc_list->count();
CHECK(buffer_size < max_transfer_size, HAILO_INVALID_ARGUMENT,
"Requested transfer size ({}) must be smaller than ({})", buffer_size, max_transfer_size);
std::bind(&BoundaryChannel::is_ready_for_transfer_h2d, this, buffer_size) :
std::bind(&BoundaryChannel::is_ready_for_transfer_d2h, this, buffer_size);
- std::unique_lock<RecursiveSharedMutex> state_guard(m_state->mutex());
- hailo_status status = HAILO_SUCCESS; // Best effort
- bool was_successful = m_state->transfer_buffer_cv().wait_for(state_guard, timeout, [this, is_ready_for_transfer, &status] () {
- if (m_state->m_is_aborted) {
- status = HAILO_STREAM_ABORTED_BY_USER;
- return true; // return true so that the wait will finish
- }
+ auto status = HAILO_SUCCESS; // Best effort
+ bool was_successful = m_state->transfer_buffer_cv().wait_for(state_guard, timeout,
+ [this, is_ready_for_transfer, stop_if_deactivated, &status] () {
+ if (m_state->m_is_aborted) {
+ status = HAILO_STREAM_ABORTED_BY_USER;
+ return true; // return true so that the wait will finish
+ }
+ if (stop_if_deactivated && !m_state->m_is_channel_activated) {
+ status = HAILO_STREAM_NOT_ACTIVATED;
+ return true; // return true so that the wait will finish
+ }
- return is_ready_for_transfer();
- });
+ return is_ready_for_transfer();
+ }
+ );
CHECK(was_successful, HAILO_TIMEOUT, "Got HAILO_TIMEOUT while waiting for channel {} interrupts", m_channel_id);
return status;
}
-hailo_status BoundaryChannel::set_transfers_per_axi_intr(uint16_t transfers_per_axi_intr)
+bool BoundaryChannel::is_complete(const PendingBuffer &pending_buffer, uint16_t previous_num_processed,
+ uint16_t current_num_processed)
{
- CHECK(0 != transfers_per_axi_intr, HAILO_INVALID_ARGUMENT, "Invalid transfers per axi interrupt");
- m_transfers_per_axi_intr = transfers_per_axi_intr;
- return HAILO_SUCCESS;
+ // Transfer is complete if its last descriptor is in [previous_num_processed, current_num_processed) or
+ // the the buffer is empty (previous_num_processed == get_num_available())
+ return is_desc_between(previous_num_processed, current_num_processed, pending_buffer.last_desc) ||
+ (current_num_processed == get_num_available());
+}
+
+
+void BoundaryChannel::on_pending_buffer_irq(PendingBuffer &pending_buffer)
+{
+#ifndef NDEBUG
+ auto &last_desc = (*m_desc_list)[pending_buffer.last_desc];
+ if (!last_desc.is_done() || last_desc.is_error()) {
+ LOGGER__ERROR("Error while processing descriptor {} of DMA {} on device {} DESC_STATUS=0x{:x}.",
+ pending_buffer.last_desc, m_channel_id, m_driver.device_id(), last_desc.status());
+ pending_buffer.on_transfer_done(HAILO_INTERNAL_FAILURE);
+ return;
+ }
+#endif
+
+ {
+ std::unique_lock<RecursiveSharedMutex> state_guard(m_state->mutex());
+
+ // First, we want to call m_user_interrupt_callback. This callback is meant to be called right after we
+ // got an interrupt and before the user can read the frame or write a new frame.
+ // We call this callback inside the lock to make sure it wont be called when the channel is aborted.
+ if (!m_state->m_is_aborted) {
+ m_user_interrupt_callback();
+ }
+
+ // Then we increase desc num_proc (can happen only in this flow). After it is increased -
+ // 1. On D2H channels - the output can be read by the user.
+ // 2. On H2D channels - new input can be written to the buffer.
+ // Clear relevant descriptors from previous transfer
+ if (nullptr != m_latency_meter) {
+ m_desc_list->clear_descriptor(pending_buffer.latency_measure_desc);
+ }
+ m_desc_list->clear_descriptor(pending_buffer.last_desc);
+
+ _CB_SET(m_state->m_descs.tail, (pending_buffer.last_desc + 1) & m_state->m_descs.size_mask);
+ }
+
+ // Finally, we notify user callbacks registered with the transfer.
+ // We want to make sure that the callbacks are called after the descriptors can be reused (So the user will
+ // be able to start new transfer).
+ pending_buffer.on_transfer_done(HAILO_SUCCESS);
}
} /* namespace vdma */
class BoundaryChannel;
using BoundaryChannelPtr = std::shared_ptr<BoundaryChannel>;
-using ProcessingCompleteCallback = std::function<void(uint32_t frames_processed)>;
+using ProcessingCompleteCallback = std::function<void()>;
class BoundaryChannel : public ChannelBase
{
hailo_status deactivate();
Type type() const;
+ hailo_status set_transfers_per_axi_intr(uint16_t transfers_per_axi_intr);
void clear_pending_buffers_descriptors();
hailo_status trigger_channel_completion(uint16_t hw_num_processed);
- virtual hailo_status register_interrupt_callback(const ProcessingCompleteCallback &callback);
+
+ // Register some new interrupt callback (and reset previous).
+ // Note - when reseting an old callback, it may still be called (until interrupts are stopped).
+ void register_interrupt_callback(const ProcessingCompleteCallback &callback);
+
CONTROL_PROTOCOL__host_buffer_info_t get_boundary_buffer_info(uint32_t transfer_size);
virtual hailo_status abort();
virtual hailo_status clear_abort();
// For D2H channels, we don't buffer data
// Hence there's nothing to be "flushed" and the function will return with HAILO_SUCCESS
virtual hailo_status flush(const std::chrono::milliseconds &timeout);
- virtual hailo_status wait(size_t buffer_size, std::chrono::milliseconds timeout);
- hailo_status set_transfers_per_axi_intr(uint16_t transfers_per_axi_intr);
- virtual hailo_status transfer(void *buf, size_t count) = 0;
+ // Blocks until buffer_size bytes can transferred to/from the channel or until timeout has elapsed.
+ // If stop_if_deactivated is true, this function will return HAILO_STREAM_NOT_ACTIVATED after deactivate()
+ // is called. Otherwise, this function can be used to access the buffer while the channel is not active.
+ hailo_status wait(size_t buffer_size, std::chrono::milliseconds timeout, bool stop_if_deactivated=false);
+
+ // Transfers count bytes to/from buf via the channel.
+ // Blocks until the transfer can be registered or timeout has elapsed. Hence, calling 'wait(buffer_size, timeout)'
+ // prior to 'transfer(buf, buffer_size)' is redundant.
+ virtual hailo_status transfer_sync(void *buf, size_t count, std::chrono::milliseconds timeout) = 0;
+
// TODO: can write_buffer + send_pending_buffer move to BufferedChannel? (HRT-9105)
// Either write_buffer + send_pending_buffer or transfer (h2d) should be used on a given channel, not both
virtual hailo_status write_buffer(const MemoryView &buffer, std::chrono::milliseconds timeout,
const std::function<bool()> &should_cancel) = 0;
virtual hailo_status send_pending_buffer() = 0;
-
- // TODO: move buffer?
- // TODO: If the same callback is used for different buffers we need a way to tell the transfers appart
- // - Passing buffer to callback could do the trick. However, what will happen if the same buffer has been transferred twice?
- // - Maybe add a unique transfer_id? At least unique in the context of the maximum number of ongoing transfers
- // TODO: What if there's no more room in desc list so the transfer can't be programmed? Should the function block
- // - Maybe define that if more than max_concurrent_transfers() (based on a param passed to create) the function will return a failure?
+
// When the transfer is complete (i.e. data is written to/from buffer with a D2H/H2D channel) callback is called
- // buffer can't be freed until callback is called
- virtual hailo_status transfer(std::shared_ptr<DmaMappedBuffer> buffer, const TransferDoneCallback &user_callback, void *opaque) = 0;
+ // transfer_request.buffer can't be freed/changed until callback is called.
+ virtual hailo_status transfer_async(TransferRequest &&transfer_request) = 0;
- // Calls all pending transfer callbacks (if they exist), marking them as canceled by passing hailo_async_transfer_completion_info_t{HAILO_STREAM_NOT_ACTIVATED}.
- // Note: This function is to be called on a deactivated channel object. Calling on an active channel will lead to unexpected results
+ // Calls all pending transfer callbacks (if they exist), marking them as canceled by passing
+ // HAILO_STREAM_ABORTED_BY_USER as a status to the callbacks.
+ // Note: This function is to be called on a deactivated channel object. Calling on an active channel will lead to
+ // unexpected results
virtual hailo_status cancel_pending_transfers() = 0;
virtual void notify_all() = 0;
virtual Expected<size_t> get_d2h_pending_descs_count() = 0;
protected:
- static void ignore_processing_complete(uint32_t) {}
+ static void ignore_processing_complete() {}
void stop_interrupts_thread(std::unique_lock<RecursiveSharedMutex> &lock);
virtual bool is_ready_for_transfer_h2d(size_t buffer_size);
virtual bool is_ready_for_transfer_d2h(size_t buffer_size);
virtual hailo_status complete_channel_deactivation() = 0;
const Type m_type;
- TransferDoneCallback m_transfer_done_callback;
ProcessingCompleteCallback m_user_interrupt_callback;
uint16_t m_transfers_per_axi_intr;
private:
bool has_room_in_desc_list(size_t buffer_size);
+ bool is_complete(const PendingBuffer &pending_buffer, uint16_t previous_num_processed,
+ uint16_t current_num_processed);
+ void on_pending_buffer_irq(PendingBuffer &buffer);
};
} /* namespace vdma */
#include "common/logger_macros.hpp"
#include "vdma/channel/buffered_channel.hpp"
-#include "vdma/memory/mapped_buffer_factory.hpp"
-#include "vdma/memory/mapped_buffer_impl.hpp"
#include "hw_consts.hpp"
#include <list>
return;
}
- auto mapped_buffer = create_mapped_buffer(descs_count, desc_page_size, direction, driver);
+ auto mapped_buffer = MappedBuffer::create_shared(driver, direction, descs_count * desc_page_size);
if (!mapped_buffer) {
LOGGER__ERROR("Failed building mapped vdma buffer");
status = mapped_buffer.status();
status = HAILO_SUCCESS;
}
-Expected<std::shared_ptr<DmaMappedBuffer>> BufferedChannel::create_mapped_buffer(uint32_t descs_count, uint16_t desc_page_size,
- Direction direction, HailoRTDriver &driver)
-{
- auto desc_page_size_value = driver.calc_desc_page_size(desc_page_size);
- CHECK_AS_EXPECTED(is_powerof2(desc_page_size_value), HAILO_INVALID_ARGUMENT, "Descriptor page_size must be a power of two.");
-
- auto mapped_buffer_exp = MappedBufferFactory::create_mapped_buffer(descs_count * desc_page_size_value, direction, driver);
- CHECK_EXPECTED(mapped_buffer_exp);
-
- auto mapped_buffer = make_shared_nothrow<DmaMappedBuffer>(mapped_buffer_exp.release());
- CHECK_NOT_NULL_AS_EXPECTED(mapped_buffer, HAILO_OUT_OF_HOST_MEMORY);
-
- return mapped_buffer;
-}
-
hailo_status BufferedChannel::complete_channel_deactivation()
{
const auto status = store_channel_buffer_state();
}
if ((Direction::D2H == m_direction) && (transfer_size != 0)) {
- const auto transfers_in_buffer = get_transfers_count_in_buffer(transfer_size);
+ const auto max_transfers_in_buffer = get_transfers_count_in_buffer(transfer_size);
+ const auto transfers_in_buffer = std::min(max_transfers_in_buffer, m_state->m_pending_buffers.capacity());
const auto pending_descs = get_d2h_pending_descs_count();
const auto descs_in_transfer = m_desc_list->descriptors_in_buffer(transfer_size);
const auto pending_transfers = pending_descs.value() / descs_in_transfer;
// We prepare descs in advance for D2H channels:
- // (1) The channel's buffer can store up to 'transfers_in_buffer' frames of size transfer_size
- // (2) There are 'pending_transfers' frames from the previous channel activation (we assume that the same
- // 'transfer_size' was used)
- // (3) Hence, we have room for 'transfers_in_buffer - pending_transfers' frames in the buffer currently.
- // (4) However, we can allow at most 'm_state->m_pending_buffers.capacity()' transfers. We can't store more than
+ // (1) The channel's buffer can store up to 'max_transfers_in_buffer' frames of size transfer_size
+ // (2) However, we can allow at most 'm_state->m_pending_buffers.capacity()' transfers. We can't store more than
// that in the pending buffers circular array.
- // (5) Hence, we'll take the minimum between (3) and (4).
- const auto transfers_count = std::min(transfers_in_buffer - pending_transfers,
- m_state->m_pending_buffers.capacity());
+ // (3) There are 'pending_transfers' frames from the previous channel activation (we assume that the same
+ // 'transfer_size' was used)
+ // (4) Hence, we have room for 'min(transfers_in_buffer, pending_buffers.capacity()) - pending_transfers' frames in the buffer currently.
+ const auto transfers_count = transfers_in_buffer - pending_transfers;
status = prepare_d2h_pending_descriptors(transfer_size, static_cast<uint32_t>(transfers_count));
CHECK_SUCCESS(status);
}
return HAILO_SUCCESS;
}
-hailo_status BufferedChannel::transfer(void *buf, size_t count)
+hailo_status BufferedChannel::transfer_sync(void *buf, size_t count, std::chrono::milliseconds timeout)
{
CHECK_NOT_NULL(buf, HAILO_INVALID_ARGUMENT);
CHECK(0 != count, HAILO_INVALID_ARGUMENT);
- std::lock_guard<RecursiveSharedMutex> state_guard(m_state->mutex());
- if (m_state->m_is_aborted) {
- LOGGER__INFO("Tried to write to aborted channel {}", m_channel_id);
- return HAILO_STREAM_ABORTED_BY_USER;
+ auto status = wait(count, timeout);
+ if ((HAILO_STREAM_NOT_ACTIVATED == status) || (HAILO_STREAM_ABORTED_BY_USER == status)) {
+ LOGGER__INFO("wait failed because channel {} is not activated/aborted (status {})", m_channel_id, status);
+ return status;
}
+ CHECK_SUCCESS(status, "wait failed with status {} (channel id: {}, timeout: {}ms)", status, m_channel_id, timeout.count());
- hailo_status status = HAILO_UNINITIALIZED;
+ std::unique_lock<RecursiveSharedMutex> state_guard(m_state->mutex());
if (Direction::H2D == m_direction) {
status = transfer_h2d(buf, count);
} else {
status = transfer_d2h(buf, count);
}
- if (HAILO_STREAM_NOT_ACTIVATED == status) {
- LOGGER__INFO("Transfer failed because Channel {} is not activated", m_channel_id);
- return HAILO_STREAM_NOT_ACTIVATED;
- }
- if (HAILO_SUCCESS != status) {
- LOGGER__ERROR("Transfer failed for channel {} with status {}", m_channel_id, status);
+ if ((HAILO_STREAM_NOT_ACTIVATED == status) || (HAILO_STREAM_ABORTED_BY_USER == status)) {
+ LOGGER__INFO("transfer failed because channel {} is not activated/aborted (status {})", m_channel_id, status);
return status;
}
+ CHECK_SUCCESS(status, "transfer failed with status {} (channel id: {}, timeout: {}ms)", status, m_channel_id, timeout.count());
+
return HAILO_SUCCESS;
}
"Can't write {} bytes to channel buffer (channel buffer size {})",
buffer.size(), m_channel_buffer->size());
+ static const auto SYNC_TO_DEIVCE = HailoRTDriver::DmaSyncDirection::TO_DEVICE;
const auto size_to_end = m_channel_buffer->size() - channel_buffer_write_offset;
const auto first_chunk_size = std::min(size_to_end, buffer.size());
const auto first_chunk_addr = static_cast<uint8_t *>(m_channel_buffer->user_address()) + channel_buffer_write_offset;
// Copy from buffer to m_channel_buffer and then synchronize
std::memcpy(first_chunk_addr, buffer.data(), first_chunk_size);
- auto status = m_channel_buffer->pimpl->synchronize(channel_buffer_write_offset, first_chunk_size);
+ auto status = m_channel_buffer->synchronize(channel_buffer_write_offset, first_chunk_size, SYNC_TO_DEIVCE);
CHECK_SUCCESS(status);
const auto remaining_size = buffer.size() - first_chunk_size;
if (remaining_size > 0) {
// Copy the remainder from buffer to m_channel_buffer and then synchronize
std::memcpy(m_channel_buffer->user_address(), buffer.data() + first_chunk_size, remaining_size);
- status = m_channel_buffer->pimpl->synchronize(0, remaining_size);
+ status = m_channel_buffer->synchronize(0, remaining_size, SYNC_TO_DEIVCE);
CHECK_SUCCESS(status);
}
"Can't read {} bytes from channel buffer (channel buffer size {})",
read_size, m_channel_buffer->size());
+ static const auto SYNC_TO_HOST = HailoRTDriver::DmaSyncDirection::TO_HOST;
const auto size_to_end = m_channel_buffer->size() - channel_buffer_read_offset;
const auto first_chunk_size = std::min(size_to_end, read_size);
const auto first_chunk_addr = static_cast<uint8_t *>(m_channel_buffer->user_address()) + channel_buffer_read_offset;
// Synchronize m_channel_buffer and copy to dest_buffer
- auto status = m_channel_buffer->pimpl->synchronize(channel_buffer_read_offset, first_chunk_size);
+ auto status = m_channel_buffer->synchronize(channel_buffer_read_offset, first_chunk_size, SYNC_TO_HOST);
CHECK_SUCCESS(status);
std::memcpy(dest_buffer, first_chunk_addr, first_chunk_size);
const auto remaining_size = read_size - first_chunk_size;
if (remaining_size > 0) {
// Synchronize m_channel_buffer and copy remainder to dest_buffer
- status = m_channel_buffer->pimpl->synchronize(0, remaining_size);
+ status = m_channel_buffer->synchronize(0, remaining_size, SYNC_TO_HOST);
CHECK_SUCCESS(status);
std::memcpy(dest_buffer + first_chunk_size, m_channel_buffer->user_address(), remaining_size);
}
return HAILO_SUCCESS;
}
-hailo_status BufferedChannel::transfer(std::shared_ptr<DmaMappedBuffer>, const TransferDoneCallback &, void *)
+hailo_status BufferedChannel::transfer_async(TransferRequest &&)
{
return HAILO_NOT_IMPLEMENTED;
}
assert(desired_desc_num <= MAX_DESCS_COUNT);
uint16_t desc_num = static_cast<uint16_t>(desired_desc_num);
- int num_available = get_num_available();
- int num_processed = CB_TAIL(m_state->m_descs);
- int num_free = CB_AVAIL(m_state->m_descs, num_available, num_processed);
+ const auto num_available = get_num_available();
+ const auto num_processed = CB_TAIL(m_state->m_descs);
+ const auto num_free = CB_AVAIL(m_state->m_descs, num_available, num_processed);
if (num_free < desc_num) {
return HAILO_OUT_OF_DESCRIPTORS;
}
first_desc_interrupts_domain);
}
auto actual_desc_count = m_desc_list->program_last_descriptor(transfer_size, last_desc_interrupts_domain,
- num_available, true);
+ num_available);
if (!actual_desc_count) {
LOGGER__ERROR("Failed to program desc_list for channel {}", m_channel_id);
return actual_desc_count.status();
}
- assert (actual_desc_count.value() == desc_num);
- int last_desc_avail = ((num_available + desc_num - 1) & m_state->m_descs.size_mask);
+ assert(actual_desc_count.value() == desc_num);
+ assert(desc_num > 0);
+ const auto last_desc_avail = static_cast<uint16_t>((num_available + desc_num - 1) & m_state->m_descs.size_mask);
- m_state->add_pending_buffer(num_available, last_desc_avail, m_direction, m_transfer_done_callback);
+ m_state->add_pending_buffer(num_available, last_desc_avail, m_direction);
return inc_num_available(desc_num);
}
#ifndef _HAILO_VDMA_BUFFERED_CHANNEL_HPP_
#define _HAILO_VDMA_BUFFERED_CHANNEL_HPP_
-#include "hailo/hailort.h"
-#include "hailo/dma_mapped_buffer.hpp"
+#include "hailo/hailort.h"
+#include "vdma/memory/mapped_buffer.hpp"
#include "vdma/channel/boundary_channel.hpp"
BufferedChannel &operator=(BufferedChannel &&other) = delete;
virtual ~BufferedChannel() = default;
- virtual hailo_status transfer(void *buf, size_t count) override;
+ // Writes/reads from the channel buffer. This function can work even if the channel is not activated (for example -
+ // reading data if it is ready).
+ virtual hailo_status transfer_sync(void *buf, size_t count, std::chrono::milliseconds timeout) override;
// Either write_buffer + send_pending_buffer or transfer (h2d) should be used on a given channel, not both
virtual hailo_status write_buffer(const MemoryView &buffer, std::chrono::milliseconds timeout,
const std::function<bool()> &should_cancel) override;
virtual hailo_status send_pending_buffer() override;
- virtual hailo_status transfer(std::shared_ptr<DmaMappedBuffer>, const TransferDoneCallback &, void *) override;
+ // TODO: merge with "transfer_sync(void *buf, size_t count)"? (HRT-10207)
+ virtual hailo_status transfer_async(TransferRequest &&) override;
virtual hailo_status cancel_pending_transfers() override;
virtual hailo_status complete_channel_activation(uint32_t transfer_size, bool resume_pending_transfers) override;
virtual hailo_status complete_channel_deactivation() override;
virtual void notify_all() override;
private:
- static Expected<std::shared_ptr<DmaMappedBuffer>> create_mapped_buffer(uint32_t descs_count, uint16_t desc_page_size,
- Direction direction, HailoRTDriver &driver);
hailo_status transfer_h2d(void *buf, size_t count);
hailo_status write_buffer_impl(const MemoryView &buffer);
// TODO: m_channel_buffer gets bound to ChannelBase::m_desc_list meaning the desc in that list point to dma addrs
// that back m_channel_buffer. Because ChannelBase gets dtor'd after BufferedChannel, m_channel_buffer ChannelBase::m_desc_list
// will point to a freed buffer. This is ok because the channel objects only get dtor'd after they are deactivated by the fw.
- // Might want to enforce this in hailort as well (e.g. desc lists can hold shared_ptrs to DmaMappedBuffer while they are bound).
+ // Might want to enforce this in hailort as well (e.g. desc lists can hold shared_ptrs to MappedBuffer while they are bound).
// (HRT-9110)
- std::shared_ptr<DmaMappedBuffer> m_channel_buffer;
+ std::shared_ptr<MappedBuffer> m_channel_buffer;
// Using CircularArray because it won't allocate or free memory wile pushing and popping. The fact that it is circular is not relevant here
CircularArray<size_t> m_pending_buffers_sizes;
std::atomic_uint16_t m_pending_num_avail_offset;
return;
}
- if (descs_count > MAX_DESCS_COUNT) {
- LOGGER__ERROR("Vdma channel descs_count mustn't be larger than {}", MAX_DESCS_COUNT);
- status = HAILO_INVALID_ARGUMENT;
- return;
- }
-
auto state = VdmaChannelState::create(descs_count, (nullptr != m_latency_meter));
if(!state) {
LOGGER__ERROR("Failed to create channel's state");
}
m_state = state.release();
- // Allocate descriptor list (host side)
status = allocate_descriptor_list(descs_count, desc_page_size);
if (HAILO_SUCCESS != status) {
LOGGER__ERROR("Failed to allocate Vdma buffer for channel transfer! status={}", status);
return num_available;
}
+void ChannelBase::set_num_proc_value(uint16_t new_value)
+{
+ assert(new_value < m_state->m_descs.size);
+ _CB_SET(m_state->m_descs.tail, new_value);
+}
+
Expected<uint16_t> ChannelBase::get_hw_num_processed()
{
auto hw_num_processed = m_host_registers.get_num_processed();
hailo_status ChannelBase::allocate_descriptor_list(uint32_t descs_count, uint16_t desc_page_size)
{
- auto desc_page_size_value = m_driver.calc_desc_page_size(desc_page_size);
- CHECK(is_powerof2(desc_page_size_value), HAILO_INVALID_ARGUMENT, "Descriptor page_size must be a power of two.");
-
- auto desc_list_exp = DescriptorList::create(descs_count, desc_page_size_value, m_driver);
+ static const bool CIRCULAR = true;
+ auto desc_list_exp = DescriptorList::create(descs_count, desc_page_size, CIRCULAR, m_driver);
CHECK_EXPECTED_AS_STATUS(desc_list_exp);
m_desc_list = make_shared_nothrow<DescriptorList>(desc_list_exp.release());
Expected<bool> is_aborted();
hailo_status set_num_avail_value(uint16_t new_value);
uint16_t get_num_available();
+ void set_num_proc_value(uint16_t new_value);
Expected<uint16_t> get_hw_num_processed();
hailo_status inc_num_available(uint16_t value);
static Direction other_direction(const Direction direction);
{
return std::make_pair(a.engine_index, a.channel_index) < std::make_pair(b.engine_index, b.channel_index);
}
+
+ // Allow channel Id's to be compared
+ friend bool operator==(const ChannelId &a, const ChannelId &b)
+ {
+ return ((a.channel_index == b.channel_index) && (a.engine_index == b.engine_index));
+ }
};
} /* namespace vdma */
m_d2h_read_desc_index_abs = 0;
}
-void VdmaChannelState::add_pending_buffer(uint32_t first_desc, uint32_t last_desc, HailoRTDriver::DmaDirection direction,
- const TransferDoneCallback &on_transfer_done, std::shared_ptr<DmaMappedBuffer> buffer, void *opaque)
+void VdmaChannelState::add_pending_buffer(uint16_t first_desc, uint16_t last_desc, HailoRTDriver::DmaDirection direction,
+ const InternalTransferDoneCallback &on_transfer_done, MappedBufferPtr mapped_buffer)
{
if (m_pending_buffers.full()) {
// TODO- HRT-8900 : Fix log and check if should return error
LOGGER__ERROR("no avail space");
}
+
PendingBuffer pending_buffer{};
pending_buffer.last_desc = last_desc;
pending_buffer.latency_measure_desc = (direction == HailoRTDriver::DmaDirection::H2D) ? first_desc : last_desc;
pending_buffer.on_transfer_done = on_transfer_done;
- pending_buffer.buffer = buffer;
- pending_buffer.opaque = opaque;
+ pending_buffer.mapped_buffer = mapped_buffer;
m_pending_buffers.push_back(std::move(pending_buffer));
}
#include "hailo/hailort.h"
#include "os/hailort_driver.hpp"
#include "common/circular_buffer.hpp"
-#include "hailo/dma_mapped_buffer.hpp"
-#include "hailo/stream.hpp"
+#include "vdma/memory/mapped_buffer.hpp"
+#include "stream_common/async_common.hpp"
#include <array>
#include <condition_variable>
namespace vdma {
struct PendingBuffer {
- uint32_t last_desc;
- uint32_t latency_measure_desc;
- TransferDoneCallback on_transfer_done;
- std::shared_ptr<DmaMappedBuffer> buffer;
- void *opaque;
+ uint16_t last_desc;
+ uint16_t latency_measure_desc;
+ InternalTransferDoneCallback on_transfer_done;
+ MappedBufferPtr mapped_buffer;
};
+// We use std::array for PendingBuffersQueue to avoid dynamic allocations allocations. We are doing it for two reasons:
+// 1. It relies on memory shared between process (so we can't have dynamic allocation).
+// 2. We put it on interrupt handler stack - we want to avoid allocations.
+using PendingBuffersQueue = CircularArray<PendingBuffer, std::array<PendingBuffer, PENDING_BUFFERS_SIZE>>;
+
class ChannelBase;
class BoundaryChannel;
class AsyncChannel;
using SharedConditionVariable = std::condition_variable_any;
#endif
+
class VdmaChannelState final
{
public:
VdmaChannelState(VdmaChannelState &&other) = delete;
~VdmaChannelState() = default;
+ static void empty_transfer_done_callback(hailo_status){}
+
void reset_counters();
void reset_previous_state_counters();
// Each transfer on the channel is logged by a PendingBuffer:
// - first_desc/last_desc - first and last descriptors of the transfer
// - direction - transfer's direction
// - on_transfer_done - callback to be called once the transfer is complete (i.e. when an interrupt is received on last_desc)
- // - buffer - points to the vdma mapped buffer being transferred (may be null)
- // - opaque - context to be transferred to the callback (may be null)
- void add_pending_buffer(uint32_t first_desc, uint32_t last_desc, HailoRTDriver::DmaDirection direction,
- const TransferDoneCallback &on_transfer_done, std::shared_ptr<DmaMappedBuffer> buffer = nullptr, void *opaque = nullptr);
+ // - context - transfer context
+ // - mapped_buffer - buffer's dma mapping (may be null)
+ void add_pending_buffer(uint16_t first_desc, uint16_t last_desc, HailoRTDriver::DmaDirection direction,
+ const InternalTransferDoneCallback &on_transfer_done = empty_transfer_done_callback,
+ MappedBufferPtr mapped_buffer = nullptr);
RecursiveSharedMutex &mutex()
{
bool m_is_channel_activated;
- // On pending buffer with must use std::array because it relays on the shared memory (and std::vector uses new malloc)
- CircularArray<PendingBuffer, std::array<PendingBuffer, PENDING_BUFFERS_SIZE>> m_pending_buffers;
+ PendingBuffersQueue m_pending_buffers;
// TODO: describe why we must have our own num_available and num_proc.
// it's not just for efficiency but its critical to avoid a potential bug - see Avigail email.
// TODO: Consider C11 stdatomic
InterruptsDispatcher::InterruptsDispatcher(std::reference_wrapper<HailoRTDriver> driver) :
m_driver(driver),
- m_is_running(false),
- m_channels_bitmap()
+ m_interrupts_thread([this] { wait_interrupts(); })
{}
InterruptsDispatcher::~InterruptsDispatcher()
{
- if (m_is_running) {
- stop();
+ if (m_wait_context != nullptr) {
+ auto status = stop();
+ if (status != HAILO_SUCCESS) {
+ LOGGER__ERROR("Failed stopping interrupts dispatcher on destructor");
+ }
+ }
+
+ if (m_interrupts_thread.joinable()) {
+ signal_thread_quit();
+ m_interrupts_thread.join();
}
}
hailo_status InterruptsDispatcher::start(const ChannelsBitmap &channels_bitmap, bool enable_timestamp_measure,
const ProcessIrqCallback &process_irq)
{
- CHECK(!m_is_running, HAILO_INVALID_OPERATION, "Interrupt thread already running");
- assert(m_channel_threads.empty());
- assert(m_channels_bitmap == ChannelsBitmap{});
+ {
+ std::unique_lock<std::mutex> lock(m_mutex);
+ CHECK(m_wait_context == nullptr, HAILO_INVALID_OPERATION, "Interrupt thread already running");
- m_channels_bitmap = channels_bitmap;
+ auto wait_context = make_unique_nothrow<WaitContext>(WaitContext{channels_bitmap, process_irq});
+ CHECK_NOT_NULL(wait_context, HAILO_OUT_OF_HOST_MEMORY);
+ m_wait_context = std::move(wait_context);
- auto status = m_driver.get().vdma_interrupts_enable(m_channels_bitmap, enable_timestamp_measure);
- CHECK_SUCCESS(status, "Failed to enable vdma interrupts");
-
- // Setting m_is_running will allow the threads to run
- m_is_running = true;
- m_channel_threads.emplace_back([this, process_irq]() {
- // m_channels_bitmap may be changed by InterruptsDispatcher::stop. To avoid wait for 0 channels,
- // we use copy of m_channels_bitmap.
- ChannelsBitmap channels_bitmap_local = m_channels_bitmap;
- wait_interrupts(channels_bitmap_local, process_irq);
- });
+ auto status = m_driver.get().vdma_interrupts_enable(m_wait_context->bitmap, enable_timestamp_measure);
+ CHECK_SUCCESS(status, "Failed to enable vdma interrupts");
+ }
+ m_cond.notify_one();
return HAILO_SUCCESS;
}
hailo_status InterruptsDispatcher::stop()
{
- CHECK(m_is_running, HAILO_INVALID_OPERATION, "Interrupts thread not started");
- assert(!m_channel_threads.empty());
- assert(m_channels_bitmap != ChannelsBitmap{});
+ std::unique_lock<std::mutex> lock(m_mutex);
+ CHECK(m_wait_context != nullptr, HAILO_INVALID_OPERATION, "Interrupt thread not running");
- // Signal threads to stop execution
- m_is_running = false;
+ // Nullify wait context so the thread will pause
+ const auto bitmap = m_wait_context->bitmap;
+ m_wait_context = nullptr;
// Calling disable interrupts will cause the vdma_interrupts_wait to return.
- auto status = m_driver.get().vdma_interrupts_disable(m_channels_bitmap);
+ auto status = m_driver.get().vdma_interrupts_disable(bitmap);
CHECK_SUCCESS(status, "Failed to disable vdma interrupts");
- m_channels_bitmap = ChannelsBitmap{};
- for (auto &thread : m_channel_threads) {
- if (thread.joinable()) {
- thread.join();
- }
- }
- m_channel_threads.clear();
+ // Needs to make sure that the interrupts thread is disabled.
+ // The wait is needed because otherwise, on a fast stop() and start(), the next start() may accept
+ // interrupts from previous run.
+ m_cond.wait(lock, [&]{ return m_thread_state == ThreadState::not_active; });
return HAILO_SUCCESS;
}
-void InterruptsDispatcher::wait_interrupts(const ChannelsBitmap &channels_bitmap, const ProcessIrqCallback &process_irq)
+void InterruptsDispatcher::wait_interrupts()
{
OsUtils::set_current_thread_name("CHANNEL_INTR");
- while (m_is_running) {
+
+ std::unique_lock<std::mutex> lock(m_mutex);
+ while (true) {
+
+ m_thread_state = ThreadState::not_active;
+ m_cond.notify_one(); // Wake up stop()
+
+ m_cond.wait(lock, [&]{ return m_should_quit || (m_wait_context != nullptr); });
+ if (m_should_quit) {
+ break;
+ }
+
+ m_thread_state = ThreadState::active;
+ auto wait_context = *m_wait_context;
+
// vdma_interrupts_wait is a blocking function that returns in this scenarios:
// 1. We got a new interrupts, irq_data will be passed to the process_irq callback
// 2. vdma_interrupts_disable will be called, vdma_interrupts_wait will return with an empty list.
// 3. Other error returns - shouldn't really happen, we exit the interrupt thread.
- auto irq_data = m_driver.get().vdma_interrupts_wait(channels_bitmap);
+ lock.unlock();
+ auto irq_data = m_driver.get().vdma_interrupts_wait(wait_context.bitmap);
+ lock.lock();
+
if (!irq_data.has_value()) {
LOGGER__ERROR("Interrupt thread exit with {}", irq_data.status());
break;
}
if (irq_data->channels_count > 0) {
- process_irq(irq_data.release());
+ wait_context.process_irq(irq_data.release());
}
}
}
+void InterruptsDispatcher::signal_thread_quit()
+{
+ {
+ std::unique_lock<std::mutex> lock(m_mutex);
+ assert(m_thread_state == ThreadState::not_active);
+ m_should_quit = true;
+ }
+ m_cond.notify_one();
+}
+
} /* namespace vdma */
} /* namespace hailort */
#include "os/hailort_driver.hpp"
#include <thread>
#include <functional>
+#include <condition_variable>
namespace hailort {
namespace vdma {
-
/// When needed, creates thread (or threads) that waits for interrupts on all channels.
class InterruptsDispatcher final {
public:
InterruptsDispatcher(InterruptsDispatcher &&) = delete;
InterruptsDispatcher &operator=(InterruptsDispatcher &&) = delete;
- // TODO: HRT-9590 remove interrupt_thread_per_channel, use it by default
hailo_status start(const ChannelsBitmap &channels_bitmap, bool enable_timestamp_measure,
const ProcessIrqCallback &process_irq);
hailo_status stop();
private:
- void wait_interrupts(const ChannelsBitmap &channels_bitmap, const ProcessIrqCallback &process_irq);
+ void wait_interrupts();
+ void signal_thread_quit();
+
+ struct WaitContext {
+ ChannelsBitmap bitmap;
+ ProcessIrqCallback process_irq;
+ };
+
+ enum class ThreadState {
+ // The interrupts thread is actually waiting for interrupts
+ active,
+
+ // The interrupts thread is done waiting for interrupts, it is waiting to be active.
+ not_active,
+ };
+
+ std::mutex m_mutex;
+ std::condition_variable m_cond;
const std::reference_wrapper<HailoRTDriver> m_driver;
- std::atomic<bool> m_is_running;
- ChannelsBitmap m_channels_bitmap;
- std::vector<std::thread> m_channel_threads;
+
+ ThreadState m_thread_state = ThreadState::not_active;
+ // When m_wait_context is not nullptr, the thread should start waiting for interrupts.
+ std::unique_ptr<WaitContext> m_wait_context;
+
+ // m_should_quit is used to quit the thread (called on destruction)
+ bool m_should_quit = false;
+ std::thread m_interrupts_thread;
};
} /* namespace vdma */
{
hailo_status status = HAILO_UNINITIALIZED;
- auto driver = HailoRTDriver::create(INTEGRATED_NNC_DRIVER_PATH);
+ const HailoRTDriver::DeviceInfo device_info {INTEGRATED_NNC_DRIVER_PATH, DEVICE_ID};
+ auto driver = HailoRTDriver::create(device_info);
CHECK_EXPECTED(driver, "Failed to initialize HailoRTDriver");
- auto device = std::unique_ptr<IntegratedDevice>(new (std::nothrow) IntegratedDevice(driver.release(), status, DEVICE_ID));
+ auto device = std::unique_ptr<IntegratedDevice>(new (std::nothrow) IntegratedDevice(driver.release(), status));
CHECK_AS_EXPECTED((nullptr != device), HAILO_OUT_OF_HOST_MEMORY);
CHECK_SUCCESS_AS_EXPECTED(status, "Failed creating IntegratedDevice");
return device;
}
-
-IntegratedDevice::IntegratedDevice(HailoRTDriver &&driver, hailo_status &status, const std::string &device_id) :
- VdmaDevice::VdmaDevice(std::move(driver), Device::Type::INTEGRATED, device_id)
+IntegratedDevice::IntegratedDevice(HailoRTDriver &&driver, hailo_status &status) :
+ VdmaDevice::VdmaDevice(std::move(driver), Device::Type::INTEGRATED)
{
status = update_fw_state();
if (HAILO_SUCCESS != status) {
status = HAILO_SUCCESS;
}
-Expected<hailo_device_architecture_t> IntegratedDevice::get_architecture() const {
- return Expected<hailo_device_architecture_t>(m_device_architecture);
-}
-
hailo_status IntegratedDevice::reset_impl(CONTROL_PROTOCOL__reset_type_t reset_type)
{
if (CONTROL_PROTOCOL__RESET_TYPE__NN_CORE == reset_type) {
class IntegratedDevice : public VdmaDevice {
public:
- virtual ~IntegratedDevice() = default;
static bool is_loaded();
static Expected<std::unique_ptr<IntegratedDevice>> create();
- virtual Expected<hailo_device_architecture_t> get_architecture() const override;
- virtual const char* get_dev_id() const override {return DEVICE_ID;}
+ virtual ~IntegratedDevice() = default;
+
Expected<size_t> read_log(MemoryView &buffer, hailo_cpu_id_t cpu_id);
virtual bool is_stream_interface_supported(const hailo_stream_interface_t &stream_interface) const override
virtual hailo_status reset_impl(CONTROL_PROTOCOL__reset_type_t reset_type) override;
private:
- IntegratedDevice(HailoRTDriver &&driver, hailo_status &status, const std::string &device_id);
+ IntegratedDevice(HailoRTDriver &&driver, hailo_status &status);
};
--- /dev/null
+/**
+ * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+**/
+/**
+ * @file buffer_requirements.cpp
+ **/
+
+#include "buffer_requirements.hpp"
+#include "vdma/memory/descriptor_list.hpp"
+#include "utils.h"
+
+namespace hailort {
+namespace vdma {
+
+// Minimum size of ccb buffers in descriptors, taken from the CCB spec.
+static constexpr uint32_t MIN_CCB_DESCRIPTORS_COUNT = 16;
+
+Expected<BufferSizesRequirements> BufferSizesRequirements::get_sg_buffer_requirements_single_transfer(
+ uint16_t max_desc_page_size, uint16_t min_batch_size, uint16_t max_batch_size, uint32_t transfer_size,
+ bool is_circular, const bool force_default_page_size)
+{
+ // First, get the result for the min size
+ auto results = get_sg_buffer_requirements_multiple_transfers(max_desc_page_size, min_batch_size,
+ {transfer_size}, is_circular, force_default_page_size);
+ CHECK_EXPECTED(results);
+
+ // In order to fetch all descriptors, the amount of active descs is lower by one that the amount
+ // of descs given (Otherwise we won't be able to determine if the buffer is empty or full).
+ // Therefore we add 1 in order to compensate.
+ const uint32_t descs_per_transfer = DIV_ROUND_UP(transfer_size, results->desc_page_size());
+ uint32_t descs_count = std::min((descs_per_transfer * max_batch_size) + 1, MAX_DESCS_COUNT);
+ if (is_circular) {
+ descs_count = get_nearest_powerof_2(descs_count, MIN_DESCS_COUNT);
+ }
+
+ return BufferSizesRequirements{ descs_count, results->desc_page_size() };
+}
+
+Expected<BufferSizesRequirements> BufferSizesRequirements::get_sg_buffer_requirements_multiple_transfers(
+ uint16_t max_desc_page_size, uint16_t batch_size, const std::vector<uint32_t> &transfer_sizes,
+ bool is_circular, const bool force_default_page_size)
+{
+ const uint16_t initial_desc_page_size = force_default_page_size ?
+ DEFAULT_DESC_PAGE_SIZE : find_initial_desc_page_size(transfer_sizes);
+
+ CHECK_AS_EXPECTED(max_desc_page_size <= MAX_DESC_PAGE_SIZE, HAILO_INTERNAL_FAILURE,
+ "max_desc_page_size given {} is bigger than hw max desc page size {}",
+ max_desc_page_size, MAX_DESC_PAGE_SIZE);
+ CHECK_AS_EXPECTED(MIN_DESC_PAGE_SIZE <= max_desc_page_size, HAILO_INTERNAL_FAILURE,
+ "max_desc_page_size given {} is lower that hw min desc page size {}",
+ max_desc_page_size, MIN_DESC_PAGE_SIZE);
+
+ const uint16_t min_desc_page_size = MIN_DESC_PAGE_SIZE;
+ CHECK_AS_EXPECTED(initial_desc_page_size <= max_desc_page_size, HAILO_INTERNAL_FAILURE,
+ "Initial descriptor page size ({}) is larger than maximum descriptor page size ({})",
+ initial_desc_page_size, max_desc_page_size);
+ CHECK_AS_EXPECTED(initial_desc_page_size >= min_desc_page_size, HAILO_INTERNAL_FAILURE,
+ "Initial descriptor page size ({}) is smaller than minimum descriptor page size ({})",
+ initial_desc_page_size, min_desc_page_size);
+
+ // Defined as uint32_t to prevent overflow (as we multiply it by two in each iteration of the while loop bellow)
+ uint32_t local_desc_page_size = initial_desc_page_size;
+
+ uint32_t descs_count = get_required_descriptor_count(transfer_sizes, initial_desc_page_size);
+ // Too many descriptors; try a larger desc_page_size which will lead to less descriptors used
+ while ((descs_count * batch_size) > (MAX_DESCS_COUNT - 1)) {
+ CHECK_AS_EXPECTED(IS_FIT_IN_UINT16(local_desc_page_size << 1), HAILO_INTERNAL_FAILURE,
+ "Descriptor page size needs to fit in 16B");
+ local_desc_page_size = static_cast<uint16_t>(local_desc_page_size << 1);
+
+ CHECK_AS_EXPECTED(local_desc_page_size <= max_desc_page_size, HAILO_OUT_OF_DESCRIPTORS,
+ "Network shapes and batch size exceeds driver descriptors capabilities. "
+ "Required descriptors count: {}, max allowed on the driver: {}. "
+ "(A common cause for this error could be the batch size - which is {}).",
+ (batch_size * descs_count), (MAX_DESCS_COUNT - 1), batch_size);
+
+ descs_count = get_required_descriptor_count(transfer_sizes, static_cast<uint16_t>(local_desc_page_size));
+ }
+
+ // Found desc_page_size and descs_count
+ const auto desc_page_size = static_cast<uint16_t>(local_desc_page_size);
+ if (initial_desc_page_size != desc_page_size) {
+ LOGGER__WARNING("Desc page size value ({}) is not optimal for performance.", desc_page_size);
+ }
+
+ if (is_circular) {
+ // The length of a descriptor list is always a power of 2. Therefore, on circular buffers the hw will have to
+ // access all descriptors.
+ descs_count = get_nearest_powerof_2(descs_count, MIN_DESCS_COUNT);
+ CHECK_AS_EXPECTED(descs_count <= MAX_DESCS_COUNT, HAILO_OUT_OF_DESCRIPTORS);
+ }
+
+ return BufferSizesRequirements{descs_count, desc_page_size};
+}
+
+Expected<BufferSizesRequirements> BufferSizesRequirements::get_ccb_buffer_requirements_single_transfer(uint16_t batch_size,
+ uint32_t transfer_size, bool is_circular)
+{
+ const uint16_t desc_page_size = DEFAULT_DESC_PAGE_SIZE;
+ const auto desc_per_transfer = DIV_ROUND_UP(transfer_size, desc_page_size);
+ auto descs_count = desc_per_transfer * batch_size;
+ descs_count = std::max(descs_count, MIN_CCB_DESCRIPTORS_COUNT);
+ if (is_circular) {
+ // The first 12 channels in D2H CCB ("regular channels") requires that the amount of descriptors will be a power
+ // of 2.
+ // We can optimize it by checking that channel index is one of the last 4 channels ("enhanced channels"), or
+ // even allocate those indexes.
+ // Meanwhile however, we always use power of 2
+ descs_count = get_nearest_powerof_2(descs_count, MIN_CCB_DESCRIPTORS_COUNT);
+ }
+
+ return BufferSizesRequirements{descs_count, desc_page_size};
+}
+
+
+uint16_t BufferSizesRequirements::find_initial_desc_page_size(const std::vector<uint32_t> &transfer_sizes)
+{
+ const auto max_transfer_size = *std::max_element(transfer_sizes.begin(), transfer_sizes.end());
+ // Note: If the pages pointed to by the descriptors are copied in their entirety, then DEFAULT_DESC_PAGE_SIZE
+ // is the optimal value. For transfer_sizes smaller than DEFAULT_DESC_PAGE_SIZE using smaller descriptor page
+ // sizes will save memory consuption without harming performance. In the case of nms for example, only one bbox
+ // is copied from each page. Hence, we'll use MIN_DESC_PAGE_SIZE for nms.
+ const uint16_t initial_desc_page_size = (DEFAULT_DESC_PAGE_SIZE > max_transfer_size) ?
+ static_cast<uint16_t>(get_nearest_powerof_2(max_transfer_size, MIN_DESC_PAGE_SIZE)) :
+ DEFAULT_DESC_PAGE_SIZE;
+ if (DEFAULT_DESC_PAGE_SIZE != initial_desc_page_size) {
+ LOGGER__INFO("Using non-default initial_desc_page_size of {}, due to a small transfer size ({})",
+ initial_desc_page_size, max_transfer_size);
+ }
+ return initial_desc_page_size;
+}
+
+uint32_t BufferSizesRequirements::get_required_descriptor_count(const std::vector<uint32_t> &transfer_sizes,
+ uint16_t desc_page_size)
+{
+ uint32_t desc_count = 0;
+ for (auto &transfer_size : transfer_sizes) {
+ desc_count += DIV_ROUND_UP(transfer_size, desc_page_size);
+ }
+ // One extra descriptor is needed, because the amount of available descriptors is (desc_count - 1)
+ return desc_count + 1;
+}
+
+} /* namespace vdma */
+} /* namespace hailort */
--- /dev/null
+/**
+ * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+**/
+/**
+ * @file buffer_requirements.hpp
+ * @brief Calculate all vdma buffer size requirements, including actual size, amount of descriptors and the actual desc
+ * count.
+ **/
+
+#ifndef _HAILO_BUFFER_REQUIREMENTS_HPP_
+#define _HAILO_BUFFER_REQUIREMENTS_HPP_
+
+#include "hailo/expected.hpp"
+
+#include <cstdint>
+#include <cassert>
+#include <vector>
+
+
+namespace hailort {
+namespace vdma {
+
+class BufferSizesRequirements final {
+public:
+ BufferSizesRequirements(uint32_t descs_count, uint16_t desc_page_size) :
+ m_descs_count(descs_count),
+ m_desc_page_size(desc_page_size)
+ {
+ assert(m_descs_count > 0);
+ assert(m_desc_page_size > 0);
+ }
+
+ uint32_t descs_count() const { return m_descs_count; }
+ uint16_t desc_page_size() const { return m_desc_page_size; }
+ uint32_t buffer_size() const { return m_descs_count * m_desc_page_size; }
+
+ static Expected<BufferSizesRequirements> get_sg_buffer_requirements_single_transfer(uint16_t max_desc_page_size,
+ uint16_t min_batch_size, uint16_t max_batch_size, uint32_t transfer_size, bool is_circular,
+ const bool force_default_page_size);
+ static Expected<BufferSizesRequirements> get_sg_buffer_requirements_multiple_transfers(uint16_t max_desc_page_size,
+ uint16_t batch_size, const std::vector<uint32_t> &transfer_sizes, bool is_circular,
+ const bool force_default_page_size);
+
+ static Expected<BufferSizesRequirements> get_ccb_buffer_requirements_single_transfer(uint16_t batch_size,
+ uint32_t transfer_size, bool is_circular);
+
+private:
+ static uint16_t find_initial_desc_page_size(const std::vector<uint32_t> &transfer_sizes);
+ static uint32_t get_required_descriptor_count(const std::vector<uint32_t> &transfer_sizes, uint16_t desc_page_size);
+
+ const uint32_t m_descs_count;
+ const uint16_t m_desc_page_size;
+};
+
+} /* namespace vdma */
+} /* namespace hailort */
+
+#endif /* _HAILO_BUFFER_REQUIREMENTS_HPP_ */
namespace hailort {
namespace vdma {
-// Minimum size of ccb buffers in descriptors, taken from the CCB spec.
-#define MIN_CCB_DESCRIPTORS_COUNT (16)
-
-static uint32_t align(uint32_t size, uint32_t align)
-{
- assert(is_powerof2(align));
- const uint32_t mask = align - 1;
- return (size + mask) & ~mask;
-}
-
Expected<ContinuousBuffer> ContinuousBuffer::create(size_t size, HailoRTDriver &driver)
{
auto result = driver.vdma_continuous_buffer_alloc(size);
- CHECK_EXPECTED(result, "Failed allocating continuous buffer, size {}", size);
+ /* Don't print error here since this might be expected error that the libhailoRT can recover from
+ (out of host memory). If it's not the case, there is a print in hailort_driver.cpp file */
+ if (HAILO_OUT_OF_HOST_CMA_MEMORY == result.status()) {
+ return make_unexpected(result.status());
+ } else {
+ CHECK_EXPECTED(result);
+ }
uintptr_t handle = 0;
uint64_t dma_address = 0;
return ContinuousBuffer(size, driver, handle, dma_address, mmap.release());
}
-uint32_t ContinuousBuffer::get_buffer_size(uint32_t buffer_size)
-{
- const uint16_t page_size = DEFAULT_DESC_PAGE_SIZE;
- const auto aligned_buffer_size = align(buffer_size, page_size);
-
- const uint32_t min_buffer_size = page_size * MIN_CCB_DESCRIPTORS_COUNT;
- return std::max(aligned_buffer_size, min_buffer_size);
-}
-
-uint32_t ContinuousBuffer::get_buffer_size_desc_power2(uint32_t buffer_size)
-{
- const uint16_t page_size = DEFAULT_DESC_PAGE_SIZE;
- const auto descriptors_in_buffer = DIV_ROUND_UP(buffer_size, page_size);
- const auto actual_descriptors_count = get_nearest_powerof_2(descriptors_in_buffer, MIN_CCB_DESCRIPTORS_COUNT);
- return actual_descriptors_count * page_size;
-}
-
ContinuousBuffer::~ContinuousBuffer()
{
if (0 != m_handle) {
return descriptors_in_buffer(m_size);
}
-hailo_status ContinuousBuffer::read(void *buf_dst, size_t count, size_t offset, bool /* should_sync */)
+hailo_status ContinuousBuffer::read(void *buf_dst, size_t count, size_t offset)
{
CHECK((count + offset) <= m_size, HAILO_INSUFFICIENT_BUFFER,
"Requested size {} from offset {} is more than the buffer size {}", count, offset, m_size);
}
Expected<uint32_t> ContinuousBuffer::program_descriptors(size_t transfer_size, InterruptsDomain last_desc_interrupts_domain,
- size_t desc_offset, bool is_circular)
+ size_t desc_offset)
{
(void)last_desc_interrupts_domain;
(void)desc_offset;
- (void)is_circular;
// The descriptors in continuous mode are programmed by the hw, nothing to do here.
return descriptors_in_buffer(transfer_size);
public:
static Expected<ContinuousBuffer> create(size_t size, HailoRTDriver &driver);
- static uint32_t get_buffer_size(uint32_t buffer_size);
- // Get buffer size with the requirment that the amount of descriptors is a power of 2.
- static uint32_t get_buffer_size_desc_power2(uint32_t buffer_size);
-
ContinuousBuffer(const ContinuousBuffer &) = delete;
ContinuousBuffer& operator=(const ContinuousBuffer &) = delete;
ContinuousBuffer& operator=(ContinuousBuffer &&) = delete;
virtual uint16_t desc_page_size() const override;
virtual uint32_t descs_count() const override;
- virtual hailo_status read(void *buf_dst, size_t count, size_t offset, bool should_sync) override;
+ virtual hailo_status read(void *buf_dst, size_t count, size_t offset) override;
virtual hailo_status write(const void *buf_src, size_t count, size_t offset) override;
virtual Expected<uint32_t> program_descriptors(size_t transfer_size, InterruptsDomain last_desc_interrupts_domain,
- size_t desc_offset, bool is_circular) override;
+ size_t desc_offset) override;
virtual hailo_status reprogram_device_interrupts_for_end_of_batch(size_t transfer_size, uint16_t batch_size,
InterruptsDomain new_interrupts_domain) override;
**/
#include "vdma/memory/descriptor_list.hpp"
-#include "vdma/memory/mapped_buffer_impl.hpp"
#include "utils.h"
namespace vdma {
-Expected<DescriptorList> DescriptorList::create(uint32_t desc_count, uint16_t requested_desc_page_size,
+Expected<DescriptorList> DescriptorList::create(uint32_t desc_count, uint16_t desc_page_size, bool is_circular,
HailoRTDriver &driver)
{
hailo_status status = HAILO_UNINITIALIZED;
- auto desc_page_size_value = driver.calc_desc_page_size(requested_desc_page_size);
- DescriptorList object(desc_count, driver, desc_page_size_value, status);
+ assert(desc_page_size <= driver.desc_max_page_size());
+
+ CHECK_AS_EXPECTED(desc_count <= MAX_DESCS_COUNT, HAILO_INVALID_ARGUMENT,
+ "descs_count {} must be smaller/equal to {}", desc_count, MAX_DESCS_COUNT);
+
+ DescriptorList object(desc_count, desc_page_size, is_circular, driver, status);
if (HAILO_SUCCESS != status) {
return make_unexpected(status);
}
- // No need to initialize descripotrs here because they are initialized in driver in hailo_vdma_program_descriptor()
+ // No need to initialize descriptors here because they are initialized in driver in hailo_vdma_program_descriptor()
return object;
}
-DescriptorList::DescriptorList(uint32_t desc_count, HailoRTDriver &driver, uint16_t desc_page_size,
- hailo_status &status) :
- m_mapped_list(),
- m_count(desc_count),
- m_depth(0),
- m_desc_handle(0),
- m_dma_address(0),
+DescriptorList::DescriptorList(uint32_t desc_count, uint16_t desc_page_size, bool is_circular, HailoRTDriver &driver,
+ hailo_status &status) :
+ m_desc_list_info(),
+ m_is_circular(is_circular),
m_driver(driver),
m_desc_page_size(desc_page_size)
{
- if (!is_powerof2(desc_count)) {
- LOGGER__ERROR("Descriptor count ({}) must be power of 2", desc_count);
+ if (m_is_circular && !is_powerof2(desc_count)) {
+ LOGGER__ERROR("Descriptor count ({}) for circular descriptor list must be power of 2", desc_count);
status = HAILO_INVALID_ARGUMENT;
return;
}
- auto depth = calculate_desc_list_depth(desc_count);
- if (!depth) {
- status = depth.status();
- return;
- }
- m_depth = depth.value();
-
- auto desc_handle_phys_addr_pair = m_driver.descriptors_list_create(desc_count);
- if (!desc_handle_phys_addr_pair) {
- status = desc_handle_phys_addr_pair.status();
+ auto desc_list_info = m_driver.descriptors_list_create(desc_count, m_is_circular);
+ if (!desc_list_info) {
+ status = desc_list_info.status();
return;
}
- m_desc_handle = desc_handle_phys_addr_pair->first;
- m_dma_address = desc_handle_phys_addr_pair->second;
-
- auto mapped_list = MmapBuffer<VdmaDescriptor>::create_file_map(desc_count * sizeof(VdmaDescriptor), m_driver.fd(), m_desc_handle);
- if (!mapped_list) {
- LOGGER__ERROR("Failed to memory map descriptors. desc handle: {:X}", m_desc_handle);
- status = mapped_list.status();
- return;
- }
+ m_desc_list_info = desc_list_info.release();
- m_mapped_list = mapped_list.release();
status = HAILO_SUCCESS;
}
DescriptorList::~DescriptorList()
{
- if (HAILO_SUCCESS != m_mapped_list.unmap()) {
- LOGGER__ERROR("Failed to release descriptors mapping");
- }
-
- // Note: The descriptors_list is freed by the desc_handle (no need to use the phys_address to free)
- if (0 != m_desc_handle) {
- if(HAILO_SUCCESS != m_driver.descriptors_list_release(m_desc_handle)) {
- LOGGER__ERROR("Failed to release descriptor list {}", m_desc_handle);
+ if (0 != m_desc_list_info.handle) {
+ auto status = m_driver.descriptors_list_release(m_desc_list_info);
+ if(HAILO_SUCCESS != status) {
+ LOGGER__ERROR("Failed to release descriptor list {} with status {}", m_desc_list_info.handle, status);
}
}
}
-DescriptorList::DescriptorList(DescriptorList &&other) noexcept :
- m_mapped_list(std::move(other.m_mapped_list)),
- m_count(std::move(other.m_count)),
- m_depth(std::move(other.m_depth)),
- m_desc_handle(std::exchange(other.m_desc_handle, 0)),
- m_dma_address(std::exchange(other.m_dma_address, 0)),
+DescriptorList::DescriptorList(DescriptorList &&other) noexcept :
+ m_desc_list_info(),
+ m_is_circular(std::move(other.m_is_circular)),
m_driver(other.m_driver),
- m_desc_page_size(other.m_desc_page_size) {}
-
-Expected<uint8_t> DescriptorList::calculate_desc_list_depth(size_t count)
+ m_desc_page_size(other.m_desc_page_size)
{
- // Calculate log2 of m_count (by finding the offset of the MSB)
- uint32_t depth = 0;
- while (count >>= 1) {
- ++depth;
- }
- CHECK_AS_EXPECTED(IS_FIT_IN_UINT8(depth), HAILO_INTERNAL_FAILURE, "Calculated desc_list_depth is too big: {}", depth);
- return static_cast<uint8_t>(depth);
+ m_desc_list_info.handle = std::exchange(other.m_desc_list_info.handle, 0);
+ m_desc_list_info.dma_address = std::exchange(other.m_desc_list_info.dma_address, 0);
+ m_desc_list_info.desc_count = std::move(other.m_desc_list_info.desc_count);
+ m_desc_list_info.user_address = std::exchange(other.m_desc_list_info.user_address, nullptr);
}
-hailo_status DescriptorList::configure_to_use_buffer(DmaMappedBuffer& buffer, ChannelId channel_id, uint32_t starting_desc)
+hailo_status DescriptorList::configure_to_use_buffer(MappedBuffer& buffer, ChannelId channel_id, uint32_t starting_desc)
{
- const auto desc_list_capacity = m_desc_page_size * m_count;
+ const auto desc_list_capacity = m_desc_page_size * count();
CHECK(buffer.size() <= desc_list_capacity, HAILO_INVALID_ARGUMENT,
"Can't bind a buffer larger than the descriptor list's capacity. Buffer size {}, descriptor list capacity {}",
buffer.size(), desc_list_capacity);
- return m_driver.descriptors_list_bind_vdma_buffer(m_desc_handle, buffer.pimpl->handle(), m_desc_page_size,
+ return m_driver.descriptors_list_bind_vdma_buffer(m_desc_list_info.handle, buffer.handle(), m_desc_page_size,
channel_id.channel_index, starting_desc);
}
Expected<uint16_t> DescriptorList::program_last_descriptor(size_t transfer_size,
- InterruptsDomain last_desc_interrupts_domain, size_t desc_offset, bool is_circular)
+ InterruptsDomain last_desc_interrupts_domain, size_t desc_offset)
{
assert(transfer_size > 0);
const auto required_descriptors = descriptors_in_buffer(transfer_size);
// Required_descriptors + desc_offset can't reach m_count.
- if ((!is_circular) && ((required_descriptors + desc_offset) > m_count)){
- LOGGER__ERROR("Requested transfer size ({}) result in more descriptors than available ({})", transfer_size, m_count);
+ if ((!m_is_circular) && ((required_descriptors + desc_offset) > count())){
+ LOGGER__ERROR("Requested transfer size ({}) result in more descriptors than available ({})", transfer_size, count());
return make_unexpected(HAILO_OUT_OF_DESCRIPTORS);
}
/* write residue page with the remaining buffer size*/
auto resuide = transfer_size - (required_descriptors - 1) * m_desc_page_size;
assert(IS_FIT_IN_UINT16(resuide));
- size_t last_desc = (desc_offset + required_descriptors - 1) & (m_count - 1);
+ size_t last_desc = (desc_offset + required_descriptors - 1) % count();
program_single_descriptor((*this)[last_desc], static_cast<uint16_t>(resuide), last_desc_interrupts_domain);
return std::move(static_cast<uint16_t>(required_descriptors));
hailo_status DescriptorList::reprogram_descriptor_interrupts_domain(size_t desc_index,
InterruptsDomain interrupts_domain)
{
- if (desc_index >= m_count){
- LOGGER__ERROR("Requested desc (index={}) exceeds the number of descriptors in the list ({})", desc_index, m_count);
+ if (desc_index >= count()){
+ LOGGER__ERROR("Requested desc (index={}) exceeds the number of descriptors in the list ({})", desc_index, count());
return HAILO_OUT_OF_DESCRIPTORS;
}
reprogram_single_descriptor_interrupts_domain((*this)[desc_index], interrupts_domain);
return get_nearest_powerof_2(descs_count, MIN_DESCS_COUNT);
}
-Expected<std::pair<uint16_t, uint32_t>> DescriptorList::get_desc_buffer_sizes_for_single_transfer(
- const HailoRTDriver &driver, uint16_t min_batch_size, uint16_t max_batch_size, uint32_t transfer_size)
-{
- // Note: If the pages pointed to by the descriptors are copied in their entirety, then DEFAULT_DESC_PAGE_SIZE
- // is the optimal value. For transfer_sizes smaller than DEFAULT_DESC_PAGE_SIZE using smaller descriptor page
- // sizes will save memory consuption without harming performance. In the case of nms for example, only one bbox
- // is copied from each page. Hence, we'll use MIN_DESC_PAGE_SIZE for nms.
- const uint32_t initial_desc_page_size = (DEFAULT_DESC_PAGE_SIZE > transfer_size) ?
- get_nearest_powerof_2(transfer_size, MIN_DESC_PAGE_SIZE) : DEFAULT_DESC_PAGE_SIZE;
- if (DEFAULT_DESC_PAGE_SIZE != initial_desc_page_size) {
- LOGGER__INFO("Using non-default initial_desc_page_size of {}, due to a small transfer size ({})",
- initial_desc_page_size, transfer_size);
- }
- CHECK_AS_EXPECTED(IS_FIT_IN_UINT16(initial_desc_page_size), HAILO_INTERNAL_FAILURE,
- "Descriptor page size needs to fit in 16B");
-
- return get_desc_buffer_sizes_for_single_transfer_impl(driver, min_batch_size, max_batch_size, transfer_size,
- static_cast<uint16_t>(initial_desc_page_size));
-}
-
-Expected<std::pair<uint16_t, uint32_t>> DescriptorList::get_desc_buffer_sizes_for_multiple_transfers(
- const HailoRTDriver &driver, uint16_t batch_size, const std::vector<uint32_t> &transfer_sizes)
-{
- return get_desc_buffer_sizes_for_multiple_transfers_impl(driver, batch_size, transfer_sizes,
- DEFAULT_DESC_PAGE_SIZE);
-}
-
-Expected<std::pair<uint16_t, uint32_t>> DescriptorList::get_desc_buffer_sizes_for_single_transfer_impl(
- const HailoRTDriver &driver, uint16_t min_batch_size, uint16_t max_batch_size, uint32_t transfer_size,
- uint16_t initial_desc_page_size)
-{
- auto results = DescriptorList::get_desc_buffer_sizes_for_multiple_transfers_impl(driver, min_batch_size,
- {transfer_size}, initial_desc_page_size);
- CHECK_EXPECTED(results);
-
- auto page_size = results->first;
-
- auto desc_count = std::min(MAX_DESCS_COUNT,
- DescriptorList::calculate_descriptors_count(transfer_size, max_batch_size, page_size));
-
- return std::make_pair(page_size, desc_count);
-}
-
-Expected<std::pair<uint16_t, uint32_t>> DescriptorList::get_desc_buffer_sizes_for_multiple_transfers_impl(
- const HailoRTDriver &driver, uint16_t batch_size, const std::vector<uint32_t> &transfer_sizes,
- uint16_t initial_desc_page_size)
-{
- const uint16_t min_desc_page_size = driver.calc_desc_page_size(MIN_DESC_PAGE_SIZE);
- const uint16_t max_desc_page_size = driver.calc_desc_page_size(MAX_DESC_PAGE_SIZE);
- // Defined as uint32_t to prevent overflow (as we multiply it by two in each iteration of the while loop bellow)
- uint32_t local_desc_page_size = driver.calc_desc_page_size(initial_desc_page_size);
- CHECK_AS_EXPECTED(IS_FIT_IN_UINT16(local_desc_page_size), HAILO_INTERNAL_FAILURE,
- "Descriptor page size needs to fit in 16B");
- CHECK_AS_EXPECTED(local_desc_page_size <= max_desc_page_size, HAILO_INTERNAL_FAILURE,
- "Initial descriptor page size ({}) is larger than maximum descriptor page size ({})",
- local_desc_page_size, max_desc_page_size);
- CHECK_AS_EXPECTED(local_desc_page_size >= min_desc_page_size, HAILO_INTERNAL_FAILURE,
- "Initial descriptor page size ({}) is smaller than minimum descriptor page size ({})",
- local_desc_page_size, min_desc_page_size);
-
- uint32_t acc_desc_count = get_descriptors_count_needed(transfer_sizes, static_cast<uint16_t>(local_desc_page_size));
-
- // Too many descriptors; try a larger desc_page_size which will lead to less descriptors used
- while ((acc_desc_count * batch_size) > (MAX_DESCS_COUNT - 1)) {
- local_desc_page_size <<= 1;
-
- CHECK_AS_EXPECTED(local_desc_page_size <= max_desc_page_size, HAILO_OUT_OF_DESCRIPTORS,
- "Network shapes and batch size exceeds driver descriptors capabilities. "
- "Required descriptors count: {}, max allowed on the driver: {}. "
- "(A common cause for this error could be the batch size - which is {}).",
- (batch_size * acc_desc_count), (MAX_DESCS_COUNT - 1), batch_size);
-
- CHECK_AS_EXPECTED(IS_FIT_IN_UINT16(local_desc_page_size), HAILO_INTERNAL_FAILURE,
- "Descriptor page size needs to fit in 16B");
-
- acc_desc_count = get_descriptors_count_needed(transfer_sizes, static_cast<uint16_t>(local_desc_page_size));
- }
-
- // Found desc_page_size and acc_desc_count
- const auto desc_page_size = static_cast<uint16_t>(local_desc_page_size);
-
- // Find descs_count
- const auto descs_count = get_nearest_powerof_2(acc_desc_count, MIN_DESCS_COUNT);
- CHECK_AS_EXPECTED(descs_count <= MAX_DESCS_COUNT, HAILO_OUT_OF_DESCRIPTORS);
-
- if (initial_desc_page_size != desc_page_size) {
- LOGGER__WARNING("Desc page size value ({}) is not optimal for performance.", desc_page_size);
- }
-
- return std::make_pair(desc_page_size, descs_count);
-}
-
-uint32_t DescriptorList::get_descriptors_count_needed(const std::vector<uint32_t> &transfer_sizes,
- uint16_t desc_page_size)
-{
- uint32_t desc_count = 0;
- for (auto &transfer_size : transfer_sizes) {
- desc_count += descriptors_in_buffer(transfer_size, desc_page_size);
- }
-
- // One extra descriptor is needed, because the amount of available descriptors is (desc_count - 1)
- desc_count += 1;
- return desc_count;
-}
-
uint32_t DescriptorList::get_interrupts_bitmask(InterruptsDomain interrupts_domain)
{
uint32_t host_bitmask = 0;
// Set the IRQ control bits to zero
// Make all edits to the local variable local_pagesize_desc_ctrl that is on the stack to save read/writes to DDR
auto local_pagesize_desc_ctrl = (descriptor.PageSize_DescControl & ~DESC_IRQ_MASK);
-
+
if (InterruptsDomain::NONE == interrupts_domain) {
// Nothing else to do
descriptor.PageSize_DescControl = local_pagesize_desc_ctrl;
#define _HAILO_VDMA_DESCRIPTOR_LIST_HPP_
#include "hailo/expected.hpp"
-#include "hailo/dma_mapped_buffer.hpp"
+#include "hailo/hailort_common.hpp"
#include "common/utils.hpp"
#include "vdma/channel/channel_id.hpp"
+#include "vdma/memory/mapped_buffer.hpp"
+
#include "os/hailort_driver.hpp"
#include "os/mmap_buffer.hpp"
"DEFAULT_DESC_COUNT not in range");
// From PLDA's vDMA controller reference:
-// - Addresses of pages pointed to by vDMA descriptors need to be on a 64B boundry.
+// - Addresses of pages pointed to by vDMA descriptors need to be on a 64B boundary.
// Hence, we require a minimum page size of 64B.
// - G_PAGE_SIZE_MAX dictates the maximum desc page size:
// max_page_size = 2 ^ (G_PAGE_SIZE_MAX - 1)
// In our case max_page_size = 2 ^ (13 - 1) = 4096
-#define MIN_DESC_PAGE_SIZE (64u)
-// TODO: Calculate from G_PAGE_SIZE_MAX (I.e. read the reg etc.)
-#define MAX_DESC_PAGE_SIZE (4096u)
+static constexpr uint16_t MIN_DESC_PAGE_SIZE = 64;
+static constexpr uint16_t MAX_DESC_PAGE_SIZE = 4096;
static constexpr uint16_t DEFAULT_DESC_PAGE_SIZE = 512;
static_assert(is_powerof2(MIN_DESC_PAGE_SIZE), "MIN_DESC_PAGE_SIZE must be a power of 2");
static_assert(DEFAULT_DESC_PAGE_SIZE > 0, "DEFAULT_DESC_PAGE_SIZE must be larger then 0");
-struct VdmaDescriptor
+static constexpr auto DESCRIPTOR_STATUS_MASK = 0xFF;
+static constexpr auto DESCRIPTOR_STATUS_DONE_BIT = 0;
+static constexpr auto DESCRIPTOR_STATUS_ERROR_BIT = 1;
+
+struct VdmaDescriptor
{
+ // Struct layout is taken from PLDA spec for vDMA, and cannot be changed.
uint32_t PageSize_DescControl;
uint32_t AddrL_rsvd_DataID;
uint32_t AddrH;
uint32_t RemainingPageSize_Status;
+
+#ifndef NDEBUG
+ // Easy accessors (only on debug since we mark DESC_STATUS_REQ and DESC_STATUS_REQ_ERR are set only on debug).
+ uint8_t status() const
+ {
+ return RemainingPageSize_Status & DESCRIPTOR_STATUS_MASK;
+ }
+
+ bool is_done() const
+ {
+ return is_bit_set(status(), DESCRIPTOR_STATUS_DONE_BIT);
+ }
+
+ bool is_error() const
+ {
+ return is_bit_set(status(), DESCRIPTOR_STATUS_ERROR_BIT);
+ }
+#endif /* NDEBUG */
};
-enum class InterruptsDomain
+static_assert(SIZE_OF_SINGLE_DESCRIPTOR == sizeof(VdmaDescriptor), "Invalid size of descriptor");
+
+enum class InterruptsDomain
{
NONE = 0,
DEVICE = 1 << 0,
class DescriptorList
{
public:
- static Expected<DescriptorList> create(uint32_t desc_count, uint16_t requested_desc_page_size,
+ static Expected<DescriptorList> create(uint32_t desc_count, uint16_t desc_page_size, bool is_circular,
HailoRTDriver &driver);
~DescriptorList();
DescriptorList(DescriptorList &&other) noexcept;
DescriptorList &operator=(DescriptorList &&other) = delete;
- uint8_t depth() const
- {
- return m_depth;
- }
-
uint32_t count() const
{
- return m_count;
+ assert(m_desc_list_info.desc_count <= std::numeric_limits<uint32_t>::max());
+ return static_cast<uint32_t>(m_desc_list_info.desc_count);
}
uint64_t dma_address() const
{
- return m_dma_address;
+ return m_desc_list_info.dma_address;
}
VdmaDescriptor& operator[](size_t i)
{
- assert(i < m_count);
- return m_mapped_list[i];
+ assert(i < count());
+ return desc_list()[i];
}
uint16_t desc_page_size() const
uintptr_t handle() const
{
- return m_desc_handle;
+ return m_desc_list_info.handle;
}
uint16_t max_transfers(uint32_t transfer_size)
{
// We need to keep at least 1 free desc at all time.
- return static_cast<uint16_t>((m_count - 1) / descriptors_in_buffer(transfer_size));
+ return static_cast<uint16_t>((count() - 1) / descriptors_in_buffer(transfer_size));
}
// Map descriptors starting at offset to the start of buffer, wrapping around the descriptor list as needed
// On hailo8, we allow configuring buffer without specific channel index (default is INVALID_VDMA_CHANNEL_INDEX).
- hailo_status configure_to_use_buffer(DmaMappedBuffer& buffer, ChannelId channel_id, uint32_t starting_desc = 0);
+ hailo_status configure_to_use_buffer(MappedBuffer& buffer, ChannelId channel_id, uint32_t starting_desc = 0);
// All descritors are initialized to have size of m_desc_page_size - so all we do is set the last descritor for the
// Interrupt - and then after transfer has finished clear the previously used first and last decsriptors.
// This saves us write/ reads to the desscriptor list which is DMA memory.
Expected<uint16_t> program_last_descriptor(size_t transfer_size, InterruptsDomain last_desc_interrupts_domain,
- size_t desc_offset, bool is_circular);
+ size_t desc_offset);
void program_single_descriptor(VdmaDescriptor &descriptor, uint16_t page_size, InterruptsDomain interrupts_domain);
hailo_status reprogram_descriptor_interrupts_domain(size_t desc_index, InterruptsDomain interrupts_domain);
void clear_descriptor(const size_t desc_index);
uint32_t descriptors_in_buffer(size_t buffer_size) const;
static uint32_t descriptors_in_buffer(size_t buffer_size, uint16_t desc_page_size);
static uint32_t calculate_descriptors_count(uint32_t buffer_size, uint16_t batch_size, uint16_t desc_page_size);
- static Expected<std::pair<uint16_t, uint32_t>> get_desc_buffer_sizes_for_single_transfer(const HailoRTDriver &driver,
- uint16_t min_batch_size, uint16_t max_batch_size, uint32_t transfer_size);
- static Expected<std::pair<uint16_t, uint32_t>> get_desc_buffer_sizes_for_multiple_transfers(const HailoRTDriver &driver,
- uint16_t batch_size, const std::vector<uint32_t> &transfer_sizes);
private:
- DescriptorList(uint32_t desc_count, HailoRTDriver &driver, uint16_t desc_page_size, hailo_status &status);
+ DescriptorList(uint32_t desc_count, uint16_t desc_page_size, bool is_circular, HailoRTDriver &driver,
+ hailo_status &status);
+
+ VdmaDescriptor *desc_list() { return reinterpret_cast<VdmaDescriptor*>(m_desc_list_info.user_address); }
+
uint32_t get_interrupts_bitmask(InterruptsDomain interrupts_domain);
void reprogram_single_descriptor_interrupts_domain(VdmaDescriptor &descriptor, InterruptsDomain interrupts_domain);
- static Expected<uint8_t> calculate_desc_list_depth(size_t count);
- // Note: initial_desc_page_size should be the optimal descriptor page size.
- static Expected<std::pair<uint16_t, uint32_t>> get_desc_buffer_sizes_for_single_transfer_impl(
- const HailoRTDriver &driver, uint16_t min_batch_size, uint16_t max_batch_size, uint32_t transfer_size,
- uint16_t initial_desc_page_size);
- static Expected<std::pair<uint16_t, uint32_t>> get_desc_buffer_sizes_for_multiple_transfers_impl(
- const HailoRTDriver &driver, uint16_t batch_size, const std::vector<uint32_t> &transfer_sizes,
- uint16_t initial_desc_page_size);
- static uint32_t get_descriptors_count_needed(const std::vector<uint32_t> &transfer_sizes,
- uint16_t desc_page_size);
-
- MmapBuffer<VdmaDescriptor> m_mapped_list;
- uint32_t m_count;
- uint8_t m_depth;
- uintptr_t m_desc_handle;
- uint64_t m_dma_address;
+
+
+ DescriptorsListInfo m_desc_list_info;
+ const bool m_is_circular;
HailoRTDriver &m_driver;
const uint16_t m_desc_page_size;
};
--- /dev/null
+/**
+ * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+**/
+/**
+ * @file dma_able_buffer.cpp
+ * @brief A Buffer that can be mapped to some device for dma operations.
+ * See hpp for more information.
+ **/
+
+#include "dma_able_buffer.hpp"
+#include "common/os_utils.hpp"
+
+#if defined(_MSC_VER)
+#include "os/windows/virtual_alloc_guard.hpp"
+#endif /* defined(_MSC_VER) */
+
+
+#if defined(__QNX__)
+#include <fcntl.h>
+#endif
+
+namespace hailort {
+namespace vdma {
+
+#if defined(__linux__) || defined(_MSC_VER)
+
+// User buffer. This class does not own the buffer.
+class UserAllocatedDmaAbleBuffer : public DmaAbleBuffer {
+public:
+ static Expected<DmaAbleBufferPtr> create(void *user_address, size_t size)
+ {
+ CHECK_AS_EXPECTED(0 == (reinterpret_cast<size_t>(user_address) % OsUtils::get_page_size()),
+ HAILO_INVALID_ARGUMENT, "User address mapped as dma must be paged aligned (page size {})",
+ OsUtils::get_page_size());
+
+ auto buffer = make_shared_nothrow<UserAllocatedDmaAbleBuffer>(user_address, size);
+ CHECK_NOT_NULL_AS_EXPECTED(buffer, HAILO_OUT_OF_HOST_MEMORY);
+
+ return std::static_pointer_cast<DmaAbleBuffer>(buffer);
+ }
+
+ UserAllocatedDmaAbleBuffer(void *user_address, size_t size) :
+ m_size(size),
+ m_user_address(user_address)
+ {}
+
+ virtual size_t size() const override { return m_size; }
+ virtual void *user_address() override { return m_user_address; }
+ virtual vdma_mapped_buffer_driver_identifier buffer_identifier() override { return HailoRTDriver::INVALID_DRIVER_BUFFER_HANDLE_VALUE; }
+
+private:
+ const size_t m_size;
+ void *m_user_address;
+};
+
+
+#if defined(__linux__)
+class PageAlignedDmaAbleBuffer : public DmaAbleBuffer {
+public:
+ static Expected<DmaAbleBufferPtr> create(size_t size)
+ {
+ // Shared memory to allow python fork.
+ auto mmapped_buffer = MmapBuffer<void>::create_shared_memory(size);
+ CHECK_EXPECTED(mmapped_buffer);
+
+ auto buffer = make_shared_nothrow<PageAlignedDmaAbleBuffer>(mmapped_buffer.release());
+ CHECK_NOT_NULL_AS_EXPECTED(buffer, HAILO_OUT_OF_HOST_MEMORY);
+ return std::static_pointer_cast<DmaAbleBuffer>(buffer);
+ }
+
+ PageAlignedDmaAbleBuffer(MmapBuffer<void> &&mmapped_buffer) :
+ m_mmapped_buffer(std::move(mmapped_buffer))
+ {}
+
+ virtual void* user_address() override { return m_mmapped_buffer.address(); }
+ virtual size_t size() const override { return m_mmapped_buffer.size(); }
+ virtual vdma_mapped_buffer_driver_identifier buffer_identifier() override { return HailoRTDriver::INVALID_DRIVER_BUFFER_HANDLE_VALUE; }
+
+private:
+ // Using mmap instead of aligned_alloc to enable MEM_SHARE flag - used for multi-process fork.
+ MmapBuffer<void> m_mmapped_buffer;
+};
+
+#elif defined(_MSC_VER)
+class PageAlignedDmaAbleBuffer : public DmaAbleBuffer {
+public:
+ static Expected<DmaAbleBufferPtr> create(size_t size)
+ {
+ auto memory_guard = VirtualAllocGuard::create(size);
+ CHECK_EXPECTED(memory_guard);
+
+ auto buffer = make_shared_nothrow<PageAlignedDmaAbleBuffer>(memory_guard.release());
+ CHECK_NOT_NULL_AS_EXPECTED(buffer, HAILO_OUT_OF_HOST_MEMORY);
+ return std::static_pointer_cast<DmaAbleBuffer>(buffer);
+ }
+
+ PageAlignedDmaAbleBuffer(VirtualAllocGuard &&memory_guard) :
+ m_memory_guard(std::move(memory_guard))
+ {}
+
+ virtual size_t size() const override { return m_memory_guard.size(); }
+ virtual void *user_address() override { return m_memory_guard.address(); }
+ virtual vdma_mapped_buffer_driver_identifier buffer_identifier() override { return HailoRTDriver::INVALID_DRIVER_BUFFER_HANDLE_VALUE; }
+
+private:
+ VirtualAllocGuard m_memory_guard;
+};
+#else
+#error "unsupported platform!"
+#endif
+
+// Allocate low memory buffer using HailoRTDriver.
+class DriverAllocatedDmaAbleBuffer : public DmaAbleBuffer {
+public:
+ static Expected<DmaAbleBufferPtr> create(HailoRTDriver &driver, size_t size)
+ {
+ auto driver_buffer_handle = driver.vdma_low_memory_buffer_alloc(size);
+ CHECK_EXPECTED(driver_buffer_handle);
+
+ auto mmapped_buffer = MmapBuffer<void>::create_file_map(size, driver.fd(), driver_buffer_handle.value());
+ if (!mmapped_buffer) {
+ auto free_status = driver.vdma_low_memory_buffer_free(driver_buffer_handle.value());
+ if (HAILO_SUCCESS != free_status) {
+ LOGGER__ERROR("Failed free vdma low memory with status {}", free_status);
+ // Continue
+ }
+
+ return make_unexpected(mmapped_buffer.status());
+ }
+ CHECK_EXPECTED(mmapped_buffer);
+
+ auto buffer = make_shared_nothrow<DriverAllocatedDmaAbleBuffer>(driver, driver_buffer_handle.value(),
+ mmapped_buffer.release());
+ CHECK_NOT_NULL_AS_EXPECTED(buffer, HAILO_OUT_OF_HOST_MEMORY);
+ return std::static_pointer_cast<DmaAbleBuffer>(buffer);
+ }
+
+ DriverAllocatedDmaAbleBuffer(HailoRTDriver &driver, vdma_mapped_buffer_driver_identifier driver_allocated_buffer_id,
+ MmapBuffer<void> &&mmapped_buffer) :
+ m_driver(driver),
+ m_driver_allocated_buffer_id(driver_allocated_buffer_id),
+ m_mmapped_buffer(std::move(mmapped_buffer))
+ {}
+
+ DriverAllocatedDmaAbleBuffer(const DriverAllocatedDmaAbleBuffer &) = delete;
+ DriverAllocatedDmaAbleBuffer &operator=(const DriverAllocatedDmaAbleBuffer &) = delete;
+
+ ~DriverAllocatedDmaAbleBuffer()
+ {
+ auto status = m_mmapped_buffer.unmap();
+ if (HAILO_SUCCESS != status) {
+ LOGGER__ERROR("Failed to unmap buffer");
+ // continue
+ }
+
+ status = m_driver.vdma_low_memory_buffer_free(m_driver_allocated_buffer_id);
+ if (HAILO_SUCCESS != status) {
+ LOGGER__ERROR("Failed to free low memory buffer");
+ // continue
+ }
+ }
+
+ virtual void* user_address() override { return m_mmapped_buffer.address(); }
+ virtual size_t size() const override { return m_mmapped_buffer.size(); }
+ virtual vdma_mapped_buffer_driver_identifier buffer_identifier() override { return m_driver_allocated_buffer_id; }
+
+private:
+ HailoRTDriver &m_driver;
+ const vdma_mapped_buffer_driver_identifier m_driver_allocated_buffer_id;
+
+ MmapBuffer<void> m_mmapped_buffer;
+};
+
+Expected<DmaAbleBufferPtr> DmaAbleBuffer::create(size_t size, void *user_address)
+{
+ if (nullptr != user_address) {
+ return UserAllocatedDmaAbleBuffer::create(user_address, size);
+ } else {
+ return PageAlignedDmaAbleBuffer::create(size);
+ }
+}
+
+Expected<DmaAbleBufferPtr> DmaAbleBuffer::create(HailoRTDriver &driver, size_t size, void *user_address)
+{
+ if ((nullptr == user_address) && driver.allocate_driver_buffer()) {
+ return DriverAllocatedDmaAbleBuffer::create(driver, size);
+ } else {
+ // The driver is not needed.
+ return create(size, user_address);
+ }
+}
+
+#elif defined(__QNX__)
+
+class SharedMemoryDmaAbleBuffer : public DmaAbleBuffer {
+public:
+
+ static Expected<DmaAbleBufferPtr> create(size_t size)
+ {
+ auto shm_fd = open_shared_memory_fd(size);
+ CHECK_EXPECTED(shm_fd);
+
+ auto mmapped_buffer = MmapBuffer<void>::create_file_map_nocache(size, shm_fd.value(), 0);
+ CHECK_EXPECTED(mmapped_buffer);
+
+ auto buffer = make_shared_nothrow<SharedMemoryDmaAbleBuffer>(shm_fd.release(), mmapped_buffer.release());
+ CHECK_NOT_NULL_AS_EXPECTED(buffer, HAILO_OUT_OF_HOST_MEMORY);
+ return std::static_pointer_cast<DmaAbleBuffer>(buffer);
+ }
+
+ SharedMemoryDmaAbleBuffer(FileDescriptor &&shm_fd, MmapBuffer<void> &&mmapped_buffer) :
+ m_shm_fd(std::move(shm_fd)),
+ m_mmapped_buffer(std::move(mmapped_buffer))
+ {}
+
+ virtual void *user_address() override { return m_mmapped_buffer.address(); }
+ virtual size_t size() const override { return m_mmapped_buffer.size(); }
+ virtual vdma_mapped_buffer_driver_identifier buffer_identifier() override { return m_shm_fd; }
+
+private:
+
+ static Expected<FileDescriptor> open_shared_memory_fd(size_t size)
+ {
+ static const int INVALID_FD = -1;
+ static const char* VDMA_BUFFER_TYPE_MEMORY_NAME = "/memory/below4G/ram/below1G";
+
+ FileDescriptor type_mem_fd = posix_typed_mem_open(VDMA_BUFFER_TYPE_MEMORY_NAME, O_RDWR, POSIX_TYPED_MEM_ALLOCATE);
+ CHECK_AS_EXPECTED(INVALID_FD != type_mem_fd, HAILO_FILE_OPERATION_FAILURE,
+ "Error getting fd from typed memory of type {}, errno {}", VDMA_BUFFER_TYPE_MEMORY_NAME, errno);
+
+ FileDescriptor shm_fd = shm_open(SHM_ANON, O_RDWR | O_CREAT, 0777);
+ CHECK_AS_EXPECTED(INVALID_FD != shm_fd, HAILO_FILE_OPERATION_FAILURE,
+ "Error creating shm object, errno is: {}", errno);
+
+ // backs the shared memory object with physical memory. After calling shm_tl, the type_mem_fd can be released.
+ int err = shm_ctl(shm_fd, SHMCTL_ANON | SHMCTL_TYMEM, (uint64_t)type_mem_fd, size);
+ CHECK_AS_EXPECTED(-1 != err, HAILO_FILE_OPERATION_FAILURE,
+ "Error backing shm object in physical memory, errno is: {}", errno);
+
+ return shm_fd;
+ }
+
+ // Initialization dependency
+ FileDescriptor m_shm_fd;
+ MmapBuffer<void> m_mmapped_buffer;
+};
+
+Expected<DmaAbleBufferPtr> DmaAbleBuffer::create(size_t size, void *user_address)
+{
+ CHECK_AS_EXPECTED(nullptr == user_address, HAILO_NOT_SUPPORTED, "Mapping user address is not supported on QNX");
+ return SharedMemoryDmaAbleBuffer::create(size);
+}
+
+Expected<DmaAbleBufferPtr> DmaAbleBuffer::create(HailoRTDriver &driver, size_t size, void *user_address)
+{
+ // qnx don't need the driver for the allocation
+ (void)driver;
+ return DmaAbleBuffer::create(size, user_address);
+}
+
+#else
+#error "unsupported platform!"
+#endif
+
+} /* namespace vdma */
+} /* namespace hailort */
--- /dev/null
+/**
+ * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+**/
+/**
+ * @file dma_able_buffer.hpp
+ * @brief A Buffer that can be mapped to some device for dma operations.
+ * There are several options for that buffer:
+ * 1. No allocation - The user gives its own buffer pointer and address. The buffer must be page aligned.
+ * 2. Normal allocation - page aligned allocation. This is the default option for linux and windows.
+ * 3. Driver allocation - On some platforms, default user mode memory allocation is not DMAAble. To overcome
+ * this, we allocate the buffer in a low memory using hailort driver. We check it querying
+ * HailoRTDriver::allocate_driver_buffer().
+ * 4. QNX shared memory allocation - for qnx, in order to pass the driver to the resources manager, we need to
+ * create a shared memory object, and pass an handle to it in the mapping. TODO: HRT-10298 implement this.
+ **/
+
+#ifndef _HAILO_DMA_ABLE_BUFFER_HPP_
+#define _HAILO_DMA_ABLE_BUFFER_HPP_
+
+#include "hailo/expected.hpp"
+#include "os/hailort_driver.hpp"
+#include "os/mmap_buffer.hpp"
+
+namespace hailort {
+namespace vdma {
+
+class DmaAbleBuffer;
+using DmaAbleBufferPtr = std::shared_ptr<DmaAbleBuffer>;
+
+class DmaAbleBuffer {
+public:
+ // If user_address is not nullptr, allocation is not needed.
+ static Expected<DmaAbleBufferPtr> create(size_t size, void *user_address = nullptr);
+
+ // The driver is used only if driver.allocate_driver_buffer is true, and that the user address is nullptr.
+ static Expected<DmaAbleBufferPtr> create(HailoRTDriver &driver, size_t size, void *user_address = nullptr);
+
+ DmaAbleBuffer() = default;
+ DmaAbleBuffer(DmaAbleBuffer &&other) = delete;
+ DmaAbleBuffer(const DmaAbleBuffer &other) = delete;
+ DmaAbleBuffer &operator=(const DmaAbleBuffer &other) = delete;
+ DmaAbleBuffer &operator=(DmaAbleBuffer &&other) = delete;
+ virtual ~DmaAbleBuffer() = default;
+
+ virtual void* user_address() = 0;
+ virtual size_t size() const = 0;
+ virtual vdma_mapped_buffer_driver_identifier buffer_identifier() = 0;
+};
+
+} /* namespace vdma */
+} /* namespace hailort */
+
+#endif /* _HAILO_DMA_ABLE_BUFFER_HPP_ */
+++ /dev/null
-/**
- * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved.
- * Distributed under the MIT license (https://opensource.org/licenses/MIT)
-**/
-/**
- * @file vmda_mapped_buffer.cpp
- * @brief Vdma mapped buffer implementation
- **/
-
-#include "hailo/dma_mapped_buffer.hpp"
-
-#include "vdma/memory/mapped_buffer_impl.hpp"
-#include "vdma/vdma_device.hpp"
-
-
-namespace hailort {
-
-static Expected<HailoRTDriver::DmaDirection> convert_flags_to_driver_enum(hailo_vdma_buffer_direction_flags_t data_direction)
-{
- static const auto BOTH_DIRECTIONS = HAILO_VDMA_BUFFER_DIRECTION_FLAGS_H2D | HAILO_VDMA_BUFFER_DIRECTION_FLAGS_D2H;
- if ((data_direction & BOTH_DIRECTIONS) == BOTH_DIRECTIONS) {
- return HailoRTDriver::DmaDirection::BOTH;
- }
-
- if ((data_direction & HAILO_VDMA_BUFFER_DIRECTION_FLAGS_H2D) == HAILO_VDMA_BUFFER_DIRECTION_FLAGS_H2D) {
- return HailoRTDriver::DmaDirection::H2D;
- }
-
- if ((data_direction & HAILO_VDMA_BUFFER_DIRECTION_FLAGS_D2H) == HAILO_VDMA_BUFFER_DIRECTION_FLAGS_D2H) {
- return HailoRTDriver::DmaDirection::D2H;
- }
-
- return make_unexpected(HAILO_INVALID_ARGUMENT);
-}
-
-// TODO: this should maybe be a vdevice (for mapping buffers to multiple devs)
-// TODO: a helper function for the cast to VdmaDevice
-Expected<DmaMappedBuffer> DmaMappedBuffer::create(size_t size,
- hailo_vdma_buffer_direction_flags_t data_direction_flags, Device &device)
-{
- static const auto ALLOCATE_BUFFER = nullptr;
- return create(ALLOCATE_BUFFER, size, data_direction_flags, device);
-}
-
-Expected<DmaMappedBuffer> DmaMappedBuffer::create_from_user_address(void *user_address, size_t size,
- hailo_vdma_buffer_direction_flags_t data_direction_flags, Device &device)
-{
- CHECK_ARG_NOT_NULL_AS_EXPECTED(user_address);
- return create(user_address, size, data_direction_flags, device);
-}
-
-Expected<DmaMappedBuffer> DmaMappedBuffer::create(void *user_address, size_t size,
- hailo_vdma_buffer_direction_flags_t data_direction_flags, Device &device)
-{
- const auto device_type = device.get_type();
- CHECK_AS_EXPECTED(((Device::Type::INTEGRATED == device_type) || (Device::Type::PCIE == device_type)),
- HAILO_INVALID_ARGUMENT, "Invalid device type (expected integrated/pcie, received {})", device_type);
- VdmaDevice *vdma_device = reinterpret_cast<VdmaDevice*>(&device);
-
- auto data_direction = convert_flags_to_driver_enum(data_direction_flags);
- CHECK_EXPECTED(data_direction, "Invalid direction flags received {}", data_direction_flags);
-
- auto pimpl_exp = Impl::create(vdma_device->get_driver(), data_direction.release(), size, user_address);
- CHECK_EXPECTED(pimpl_exp);
-
- auto pimpl = make_unique_nothrow<Impl>(pimpl_exp.release());
- CHECK_NOT_NULL_AS_EXPECTED(pimpl, HAILO_OUT_OF_HOST_MEMORY);
-
- return DmaMappedBuffer(std::move(pimpl));
-}
-
-DmaMappedBuffer::DmaMappedBuffer(std::unique_ptr<Impl> pimpl) :
- pimpl(std::move(pimpl))
-{}
-
-// Note: These can't be defined in the header due to the use of pimpl (it'll cause a compilation error)
-DmaMappedBuffer::DmaMappedBuffer(DmaMappedBuffer &&other) noexcept = default;
-DmaMappedBuffer::~DmaMappedBuffer() = default;
-
-void *DmaMappedBuffer::user_address()
-{
- return pimpl->user_address();
-}
-
-size_t DmaMappedBuffer::size() const
-{
- return pimpl->size();
-}
-
-hailo_status DmaMappedBuffer::synchronize()
-{
- static constexpr auto BUFFER_START = 0;
- return pimpl->synchronize(BUFFER_START, size());
-}
-
-} /* namespace hailort */
--- /dev/null
+/**
+ * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+**/
+/**
+ * @file mapped_buffer.cpp
+ * @brief Vdma mapped buffer implementation
+ **/
+
+#include "mapped_buffer.hpp"
+
+#include "vdma/vdma_device.hpp"
+
+
+namespace hailort {
+namespace vdma {
+
+Expected<MappedBuffer> MappedBuffer::create(HailoRTDriver &driver,
+ std::shared_ptr<DmaAbleBuffer> buffer, HailoRTDriver::DmaDirection data_direction)
+{
+ auto status = HAILO_UNINITIALIZED;
+ auto result = MappedBuffer(driver, buffer, data_direction, status);
+ CHECK_SUCCESS_AS_EXPECTED(status);
+
+ return result;
+}
+
+Expected<MappedBufferPtr> MappedBuffer::create_shared(HailoRTDriver &driver, std::shared_ptr<DmaAbleBuffer> buffer,
+ HailoRTDriver::DmaDirection data_direction)
+{
+ auto dma_mapped_buffer = create(driver, buffer, data_direction);
+ CHECK_EXPECTED(dma_mapped_buffer);
+
+ auto result = make_shared_nothrow<MappedBuffer>(dma_mapped_buffer.release());
+ CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY);
+
+ return result;
+}
+
+Expected<MappedBuffer> MappedBuffer::create(HailoRTDriver &driver,
+ HailoRTDriver::DmaDirection data_direction, size_t size, void *user_address)
+{
+ auto buffer = DmaAbleBuffer::create(driver, size, user_address);
+ CHECK_EXPECTED(buffer);
+
+ return create(driver, buffer.release(), data_direction);
+}
+
+Expected<MappedBufferPtr> MappedBuffer::create_shared(HailoRTDriver &driver,
+ HailoRTDriver::DmaDirection data_direction, size_t size, void *user_address)
+{
+ auto dma_mapped_buffer = create(driver, data_direction, size, user_address);
+ CHECK_EXPECTED(dma_mapped_buffer);
+
+ auto result = make_shared_nothrow<MappedBuffer>(dma_mapped_buffer.release());
+ CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY);
+
+ return result;
+}
+
+MappedBuffer::MappedBuffer(HailoRTDriver &driver, std::shared_ptr<DmaAbleBuffer> buffer,
+ HailoRTDriver::DmaDirection data_direction, hailo_status &status) :
+ m_driver(driver),
+ m_buffer(buffer),
+ m_mapping_handle(HailoRTDriver::INVALID_DRIVER_VDMA_MAPPING_HANDLE_VALUE),
+ m_data_direction(data_direction)
+{
+ auto expected_handle = driver.vdma_buffer_map(m_buffer->user_address(), m_buffer->size(), m_data_direction,
+ m_buffer->buffer_identifier());
+ if (!expected_handle) {
+ LOGGER__ERROR("Mapping address {} to dma failed", m_buffer->user_address());
+ status = expected_handle.status();
+ return;
+ }
+
+ m_mapping_handle = expected_handle.release();
+ status = HAILO_SUCCESS;
+}
+
+MappedBuffer::~MappedBuffer()
+{
+ if (HailoRTDriver::INVALID_DRIVER_VDMA_MAPPING_HANDLE_VALUE != m_mapping_handle) {
+ m_driver.vdma_buffer_unmap(m_mapping_handle);
+ m_mapping_handle = HailoRTDriver::INVALID_DRIVER_VDMA_MAPPING_HANDLE_VALUE;
+ }
+}
+
+MappedBuffer::MappedBuffer(MappedBuffer &&other) noexcept :
+ m_driver(other.m_driver),
+ m_buffer(std::move(other.m_buffer)),
+ m_mapping_handle(std::exchange(other.m_mapping_handle, HailoRTDriver::INVALID_DRIVER_VDMA_MAPPING_HANDLE_VALUE)),
+ m_data_direction(other.m_data_direction)
+{}
+
+void* MappedBuffer::user_address()
+{
+ return m_buffer->user_address();
+}
+
+size_t MappedBuffer::size() const
+{
+ return m_buffer->size();
+}
+
+HailoRTDriver::VdmaBufferHandle MappedBuffer::handle()
+{
+ return m_mapping_handle;
+}
+
+hailo_status MappedBuffer::synchronize(HailoRTDriver::DmaSyncDirection sync_direction)
+{
+ static constexpr auto BUFFER_START = 0;
+ return synchronize(BUFFER_START, size(), sync_direction);
+}
+
+hailo_status MappedBuffer::synchronize(size_t offset, size_t count, HailoRTDriver::DmaSyncDirection sync_direction)
+{
+ CHECK(offset + count <= size(), HAILO_INVALID_ARGUMENT,
+ "Synchronizing {} bytes starting at offset {} will overflow (buffer size {})",
+ offset, count, size());
+ return m_driver.vdma_buffer_sync(m_mapping_handle, sync_direction, offset, count);
+}
+
+} /* namespace vdma */
+} /* namespace hailort */
--- /dev/null
+/**
+ * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+ **/
+/**
+ * @file mapped_buffer.hpp
+ * @brief The mapped buffer that is continuous in virtual memory, but not on physical memory.
+ * We map the buffer to the IOMMU.
+ *
+ * The buffer can be used only with the help of a descriptors list that contains pointers to a physical
+ * continuous "dma pages".
+ *
+ * There are 2 options to allocated the buffer:
+ * 1. User mode allocation - the user mode calls `malloc` or `mmap` to allocate the buffer, then
+ * using HailoRTDriver we map the driver to the IOMMU (and pin the pages to avoid pagigs).
+ * This is the default option
+ * 2. Kernel mode allocation - on some systems, the user mode doesn't allocate the memory in a "dma-able" address,
+ * so we need to allocate the pages in driver.
+ **/
+
+#ifndef _HAILO_DMA_MAPPED_BUFFER_HPP_
+#define _HAILO_DMA_MAPPED_BUFFER_HPP_
+
+#include "hailo/expected.hpp"
+#include "os/hailort_driver.hpp"
+#include "vdma/memory/dma_able_buffer.hpp"
+
+#include <memory>
+
+
+namespace hailort {
+namespace vdma {
+
+
+class MappedBuffer;
+using MappedBufferPtr = std::shared_ptr<MappedBuffer>;
+
+class MappedBuffer final
+{
+public:
+ // Maps the given DmaAbleBuffer in the right direction.
+ static Expected<MappedBuffer> create(HailoRTDriver &driver, std::shared_ptr<DmaAbleBuffer> buffer,
+ HailoRTDriver::DmaDirection data_direction);
+ static Expected<MappedBufferPtr> create_shared(HailoRTDriver &driver, std::shared_ptr<DmaAbleBuffer> buffer,
+ HailoRTDriver::DmaDirection data_direction);
+
+ // If user_address is nullptr, a buffer of size 'size' will be allocated and mapped to dma in 'data_direction'
+ // Otherwise, the buffer pointed to by user_address will be mapped to dma in 'data_direction'
+ static Expected<MappedBuffer> create(HailoRTDriver &driver, HailoRTDriver::DmaDirection data_direction,
+ size_t size, void *user_address = nullptr);
+ static Expected<MappedBufferPtr> create_shared(HailoRTDriver &driver, HailoRTDriver::DmaDirection data_direction,
+ size_t size, void *user_address = nullptr);
+
+
+ MappedBuffer(MappedBuffer &&other) noexcept;
+ MappedBuffer(const MappedBuffer &other) = delete;
+ MappedBuffer &operator=(const MappedBuffer &other) = delete;
+ MappedBuffer &operator=(MappedBuffer &&other) = delete;
+ ~MappedBuffer();
+
+ size_t size() const;
+ void *user_address();
+ HailoRTDriver::VdmaBufferHandle handle();
+ hailo_status synchronize(HailoRTDriver::DmaSyncDirection sync_direction);
+ // TODO: validate that offset is cache aligned (HRT-9811)
+ hailo_status synchronize(size_t offset, size_t count, HailoRTDriver::DmaSyncDirection sync_direction);
+
+private:
+ MappedBuffer(HailoRTDriver &driver, std::shared_ptr<DmaAbleBuffer> buffer, HailoRTDriver::DmaDirection data_direction,
+ hailo_status &status);
+
+ HailoRTDriver &m_driver;
+ std::shared_ptr<DmaAbleBuffer> m_buffer;
+ HailoRTDriver::VdmaBufferHandle m_mapping_handle;
+ const HailoRTDriver::DmaDirection m_data_direction;
+};
+
+} /* namespace vdma */
+} /* namespace hailort */
+
+#endif /* _HAILO_DMA_MAPPED_BUFFER_HPP_ */
\ No newline at end of file
+++ /dev/null
-/**\r
- * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved.\r
- * Distributed under the MIT license (https://opensource.org/licenses/MIT)\r
-**/\r
-/**\r
- * @file mapped_buffer_factory.cpp\r
- * @brief Static utility class for creating DmaMappedBuffers internally in hailort\r
- **/\r
-\r
-#include "vdma/memory/mapped_buffer_factory.hpp"\r
-#include "vdma/memory/mapped_buffer_impl.hpp"\r
-\r
-namespace hailort\r
-{\r
-namespace vdma\r
-{\r
-\r
-Expected<DmaMappedBuffer> MappedBufferFactory::create_mapped_buffer(size_t size,\r
- HailoRTDriver::DmaDirection data_direction, HailoRTDriver &driver)\r
-{\r
- auto pimpl_exp = DmaMappedBuffer::Impl::create(driver, data_direction, size);\r
- CHECK_EXPECTED(pimpl_exp);\r
-\r
- auto pimpl = make_unique_nothrow<DmaMappedBuffer::Impl>(pimpl_exp.release());\r
- CHECK_NOT_NULL_AS_EXPECTED(pimpl, HAILO_OUT_OF_HOST_MEMORY);\r
- return DmaMappedBuffer(std::move(pimpl));\r
-}\r
-\r
-} /* namespace vdma */\r
-} /* namespace hailort */\r
+++ /dev/null
-/**\r
- * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved.\r
- * Distributed under the MIT license (https://opensource.org/licenses/MIT)\r
-**/\r
-/**\r
- * @file mapped_buffer_factory.hpp\r
- * @brief Static utility class for creating DmaMappedBuffers internally in hailort\r
- **/\r
-\r
-#ifndef _HAILO_MAPPED_BUFFER_FACTORY_HPP_\r
-#define _HAILO_MAPPED_BUFFER_FACTORY_HPP_\r
-\r
-#include "hailo/hailort.h"\r
-#include "hailo/dma_mapped_buffer.hpp"\r
-#include "os/hailort_driver.hpp"\r
-\r
-namespace hailort\r
-{\r
-namespace vdma\r
-{\r
-\r
-class MappedBufferFactory\r
-{\r
-public:\r
- MappedBufferFactory() = delete;\r
- static Expected<DmaMappedBuffer> create_mapped_buffer(size_t size,\r
- HailoRTDriver::DmaDirection data_direction, HailoRTDriver &driver);\r
-};\r
-\r
-} /* namespace vdma */\r
-} /* namespace hailort */\r
-\r
-#endif /* _HAILO_MAPPED_BUFFER_FACTORY_HPP_ */\r
+++ /dev/null
-/**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
- * Distributed under the MIT license (https://opensource.org/licenses/MIT)
- **/
-/**
- * @file mapped_buffer_impl.cpp
- * @brief Dma mapped buffer pimpl class implementation
- **/
-#include "mapped_buffer_impl.hpp"
-
-namespace hailort {
-
-#if defined(__linux__) || defined(_MSC_VER)
-
-Expected<DmaMappedBuffer::Impl> DmaMappedBuffer::Impl::create(HailoRTDriver &driver,
- HailoRTDriver::DmaDirection data_direction, size_t size, void *user_address)
-{
- if (nullptr != user_address) {
- // User allocated buffer - create an empty MmapBuffer<void> (it doesn't hold the buffer)
- auto status = HAILO_UNINITIALIZED;
- auto result = DmaMappedBuffer::Impl(HailoRTDriver::INVALID_DRIVER_BUFFER_HANDLE_VALUE, size,
- data_direction, user_address, MmapBuffer<void>(), driver, status);
- CHECK_SUCCESS_AS_EXPECTED(status);
-
- return result;
- } else if (driver.allocate_driver_buffer()) {
- // Allocate buffer via driver
- auto driver_buffer_handle = driver.vdma_low_memory_buffer_alloc(size);
- CHECK_EXPECTED(driver_buffer_handle);
-
- uintptr_t driver_buff_handle = driver_buffer_handle.release();
-
- auto mapped_buffer = MmapBuffer<void>::create_file_map(size, driver.fd(), driver_buff_handle);
- CHECK_EXPECTED(mapped_buffer);
-
- auto status = HAILO_UNINITIALIZED;
- auto result = DmaMappedBuffer::Impl(driver_buff_handle, size, data_direction, mapped_buffer.release(),
- driver, status);
- CHECK_SUCCESS_AS_EXPECTED(status);
-
- return result;
- } else {
- // Standard userspace allocation
- auto mapped_buffer = MmapBuffer<void>::create_shared_memory(size);
- CHECK_EXPECTED(mapped_buffer);
-
- auto status = HAILO_UNINITIALIZED;
- auto result = DmaMappedBuffer::Impl(HailoRTDriver::INVALID_DRIVER_BUFFER_HANDLE_VALUE, size,
- data_direction, mapped_buffer.release(), driver, status);
- CHECK_SUCCESS_AS_EXPECTED(status);
-
- return result;
- }
-}
-
-DmaMappedBuffer::Impl::Impl(vdma_mapped_buffer_driver_identifier driver_allocated_buffer_id,
- size_t size, HailoRTDriver::DmaDirection data_direction, void *user_address,
- MmapBuffer<void> &&mapped_buffer, HailoRTDriver &driver, hailo_status &status) :
- m_driver(driver),
- m_driver_allocated_buffer_id(driver_allocated_buffer_id),
- m_mapping_handle(HailoRTDriver::INVALID_DRIVER_VDMA_MAPPING_HANDLE_VALUE),
- m_mapped_buffer(std::move(mapped_buffer)),
- m_size(size),
- m_data_direction(data_direction),
- m_user_address(user_address)
-{
- if (m_mapped_buffer.is_mapped() && (m_user_address != m_mapped_buffer.address())) {
- status = HAILO_INVALID_ARGUMENT;
- return;
- }
-
- auto expected_handle = driver.vdma_buffer_map(m_user_address, m_size, m_data_direction,
- m_driver_allocated_buffer_id);
- if (!expected_handle) {
- status = expected_handle.status();
- return;
- }
-
- m_mapping_handle = expected_handle.release();
- status = HAILO_SUCCESS;
-}
-
-DmaMappedBuffer::Impl::Impl(vdma_mapped_buffer_driver_identifier driver_allocated_buffer_id,
- size_t size, HailoRTDriver::DmaDirection data_direction,
- MmapBuffer<void> &&mapped_buffer, HailoRTDriver &driver, hailo_status &status) :
- Impl(driver_allocated_buffer_id, size, data_direction, mapped_buffer.address(), std::move(mapped_buffer), driver, status)
-{}
-
-DmaMappedBuffer::Impl::Impl(Impl &&other) noexcept :
- m_driver(other.m_driver),
- m_driver_allocated_buffer_id(std::exchange(other.m_driver_allocated_buffer_id, HailoRTDriver::INVALID_DRIVER_BUFFER_HANDLE_VALUE)),
- m_mapping_handle(std::exchange(other.m_mapping_handle, HailoRTDriver::INVALID_DRIVER_VDMA_MAPPING_HANDLE_VALUE)),
- m_mapped_buffer(std::move(other.m_mapped_buffer)),
- m_size(std::move(other.m_size)),
- m_data_direction(std::move(other.m_data_direction)),
- m_user_address(std::move(other.m_user_address))
-{}
-
-DmaMappedBuffer::Impl::~Impl()
-{
- if (HailoRTDriver::INVALID_DRIVER_VDMA_MAPPING_HANDLE_VALUE != m_mapping_handle) {
- m_driver.vdma_buffer_unmap(m_mapping_handle);
- m_mapping_handle = HailoRTDriver::INVALID_DRIVER_VDMA_MAPPING_HANDLE_VALUE;
- }
-
- if (HailoRTDriver::INVALID_DRIVER_BUFFER_HANDLE_VALUE != m_driver_allocated_buffer_id) {
- m_driver.vdma_low_memory_buffer_free(m_driver_allocated_buffer_id);
- m_driver_allocated_buffer_id = HailoRTDriver::INVALID_DRIVER_BUFFER_HANDLE_VALUE;
- }
-}
-
-void* DmaMappedBuffer::Impl::user_address()
-{
- return m_user_address;
-}
-
-size_t DmaMappedBuffer::Impl::size() const
-{
- return m_size;
-}
-
-HailoRTDriver::VdmaBufferHandle DmaMappedBuffer::Impl::handle()
-{
- return m_mapping_handle;
-}
-
-hailo_status DmaMappedBuffer::Impl::synchronize(size_t offset, size_t count)
-{
- CHECK(offset + count <= size(), HAILO_INVALID_ARGUMENT,
- "Synchronizing {} bytes starting at offset {} will overflow (buffer size {})",
- offset, count, size());
- return m_driver.vdma_buffer_sync(m_mapping_handle, m_data_direction, offset, count);
-}
-
-#elif defined(__QNX__)
-
-#include <fcntl.h>
-
-const int DmaMappedBuffer::Impl::INVALID_FD = -1;
-const shm_handle_t DmaMappedBuffer::Impl::INVALID_HANDLE = (shm_handle_t)-1;
-const char* DmaMappedBuffer::Impl::VDMA_BUFFER_TYPE_MEMORY_NAME = "/memory/below4G/ram/below1G";
-
-Expected<DmaMappedBuffer::Impl> DmaMappedBuffer::Impl::create(HailoRTDriver &driver,
- HailoRTDriver::DmaDirection data_direction, size_t size, void *user_address)
-{
- // TODO: HRT-9508
- CHECK_AS_EXPECTED(user_address == nullptr, HAILO_NOT_IMPLEMENTED, "User allocated buffers not supported on qnx");
-
- // Destructor of type_mem_fd will close fd
- FileDescriptor type_mem_fd(posix_typed_mem_open(VDMA_BUFFER_TYPE_MEMORY_NAME, O_RDWR, POSIX_TYPED_MEM_ALLOCATE));
- if (INVALID_FD == type_mem_fd) {
- LOGGER__ERROR("Error getting fd from typed memory of type {}, errno {}\n", VDMA_BUFFER_TYPE_MEMORY_NAME,
- errno);
- return make_unexpected(HAILO_INTERNAL_FAILURE);
- }
-
- vdma_mapped_buffer_driver_identifier driver_buff_handle;
- driver_buff_handle.shm_fd = shm_open(SHM_ANON, O_RDWR | O_CREAT, 0777);
- CHECK_AS_EXPECTED(INVALID_FD != driver_buff_handle.shm_fd, HAILO_INTERNAL_FAILURE,
- "Error creating shm object, errno is: {}", errno);
-
- // backs the shared memory object with physical memory
- int err = shm_ctl(driver_buff_handle.shm_fd, SHMCTL_ANON | SHMCTL_TYMEM, (uint64_t)type_mem_fd,
- size);
- if (-1 == err) {
- LOGGER__ERROR("Error backing shm object in physical memory, errno is: {}", errno);
- close(driver_buff_handle.shm_fd);
- return make_unexpected(HAILO_INTERNAL_FAILURE);
- }
-
- // Create shared memory handle to send to driver
- err = shm_create_handle(driver_buff_handle.shm_fd, driver.resource_manager_pid(), O_RDWR,
- &driver_buff_handle.shm_handle, 0);
- if (0 != err) {
- LOGGER__ERROR("Error creating shm object handle, errno is: {}", errno);
- close(driver_buff_handle.shm_fd);
- return make_unexpected(HAILO_INTERNAL_FAILURE);
- }
-
- void *address = mmap(0, size, PROT_WRITE | PROT_READ | PROT_NOCACHE, MAP_SHARED, driver_buff_handle.shm_fd, 0);
- if (MAP_FAILED == address) {
- LOGGER__ERROR("Failed to mmap buffer with errno:{}", errno);
- shm_delete_handle(driver_buff_handle.shm_handle);
- close(driver_buff_handle.shm_fd);
- return make_unexpected(HAILO_OUT_OF_HOST_MEMORY);
- }
-
- hailo_status status = HAILO_UNINITIALIZED;
- auto result = DmaMappedBuffer::Impl(address, size, data_direction, driver_buff_handle.shm_handle,
- driver_buff_handle.shm_fd, driver, status);
- if (HAILO_SUCCESS != status) {
- LOGGER__ERROR("Failed to map buffer to vdma");
- munmap(address, size);
- shm_delete_handle(driver_buff_handle.shm_handle);
- close(driver_buff_handle.shm_fd);
- return make_unexpected(status);
- }
-
- return result;
-}
-
-DmaMappedBuffer::Impl::Impl(void *addr, size_t size, HailoRTDriver::DmaDirection data_direction,
- shm_handle_t shm_handle, int shm_fd, HailoRTDriver &driver, hailo_status &status) :
- m_driver(driver),
- m_address(addr),
- m_size(size),
- m_data_direction(data_direction)
-{
- m_driver_allocated_buffer_id.shm_handle = shm_handle;
- m_driver_allocated_buffer_id.shm_fd = shm_fd;
-
- auto expected_handle = driver.vdma_buffer_map(addr, size, data_direction, m_driver_allocated_buffer_id);
- if (!expected_handle) {
- status = expected_handle.status();
- return;
- }
-
- m_mapping_handle = expected_handle.release();
- status = HAILO_SUCCESS;
-}
-
-DmaMappedBuffer::Impl::Impl(Impl &&other) noexcept :
- m_driver(other.m_driver),
- m_address(std::exchange(other.m_address, nullptr)),
- m_size(std::move(other.m_size)),
- m_data_direction(std::move(other.m_data_direction)),
- m_mapping_handle(std::exchange(other.m_mapping_handle, HailoRTDriver::INVALID_DRIVER_VDMA_MAPPING_HANDLE_VALUE))
-{
- m_driver_allocated_buffer_id.shm_handle = std::exchange(other.m_driver_allocated_buffer_id.shm_handle, INVALID_HANDLE);
- m_driver_allocated_buffer_id.shm_fd = std::exchange(other.m_driver_allocated_buffer_id.shm_fd, INVALID_FD);
-}
-
-DmaMappedBuffer::Impl::~Impl()
-{
- if (HailoRTDriver::INVALID_DRIVER_VDMA_MAPPING_HANDLE_VALUE != m_mapping_handle) {
- m_driver.vdma_buffer_unmap(m_mapping_handle);
- m_mapping_handle = HailoRTDriver::INVALID_DRIVER_VDMA_MAPPING_HANDLE_VALUE;
- }
-
- if (nullptr != m_address) {
- if (0 != munmap(m_address, m_size)) {
- LOGGER__ERROR("Error unmapping memory at address {}, Errno: {}", m_address, errno);
- }
- }
-
- if (INVALID_FD != m_driver_allocated_buffer_id.shm_fd) {
- if (0 != close(m_driver_allocated_buffer_id.shm_fd)) {
- LOGGER__ERROR("Error closing shared memory fd, Errno: {}", errno);
- }
- }
-}
-
-void* DmaMappedBuffer::Impl::user_address()
-{
- return m_address;
-}
-size_t DmaMappedBuffer::Impl::size() const
-{
- return m_size;
-}
-
-HailoRTDriver::VdmaBufferHandle DmaMappedBuffer::Impl::handle()
-{
- return m_mapping_handle;
-}
-
-hailo_status DmaMappedBuffer::Impl::synchronize(size_t offset, size_t count)
-{
- CHECK(offset + count <= size(), HAILO_INVALID_ARGUMENT,
- "Synchronizing {} bytes starting at offset {} will overflow (buffer size {})",
- offset, count, size());
- return m_driver.vdma_buffer_sync(m_mapping_handle, m_data_direction, offset, count);
-}
-
-#else
-#error "unsupported platform!"
-#endif // defined(__linux__) || defined(_MSC_VER)
-
-} /* namespace hailort */
\ No newline at end of file
+++ /dev/null
-/**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
- * Distributed under the MIT license (https://opensource.org/licenses/MIT)
- **/
-/**
- * @file mapped_buffer_impl.hpp
- * @brief Vdma mapped buffer pimpl class defintion
- **/
-#ifndef _HAILO_VDMA_MAPPED_BUFFER_IMPL_HPP_
-#define _HAILO_VDMA_MAPPED_BUFFER_IMPL_HPP_
-
-#include "hailo/dma_mapped_buffer.hpp"
-#include "os/mmap_buffer.hpp"
-#include "os/hailort_driver.hpp"
-#include "hailo/expected.hpp"
-
-namespace hailort {
-
-#if defined(__linux__) || defined(_MSC_VER)
-
-class DmaMappedBuffer::Impl final {
-public:
- // If user_address is nullptr, a buffer of size 'size' will be allocated and mapped to dma in 'data_direction'
- // Otherwise, the buffer pointed to by user_address will be mapped to dma in 'data_direction'
- static Expected<Impl> create(HailoRTDriver &driver, HailoRTDriver::DmaDirection data_direction,
- size_t size, void *user_address = nullptr);
-
- Impl(Impl &&other) noexcept;
- Impl(const Impl &other) = delete;
- Impl &operator=(const Impl &other) = delete;
- Impl &operator=(Impl &&other) = delete;
- ~Impl();
-
- void* user_address();
- size_t size() const;
- HailoRTDriver::VdmaBufferHandle handle();
- // TODO: validate that offset is cache aligned (HRT-9811)
- hailo_status synchronize(size_t offset, size_t count);
-
-private:
- Impl(vdma_mapped_buffer_driver_identifier driver_allocated_buffer_id, size_t size,
- HailoRTDriver::DmaDirection data_direction, void *user_address, MmapBuffer<void> &&mapped_buffer,
- HailoRTDriver &driver, hailo_status &status);
- Impl(vdma_mapped_buffer_driver_identifier driver_allocated_buffer_id, size_t size,
- HailoRTDriver::DmaDirection data_direction, MmapBuffer<void> &&mapped_buffer, HailoRTDriver &driver,
- hailo_status &status);
-
- HailoRTDriver &m_driver;
- vdma_mapped_buffer_driver_identifier m_driver_allocated_buffer_id;
- HailoRTDriver::VdmaBufferHandle m_mapping_handle;
- MmapBuffer<void> m_mapped_buffer;
- const size_t m_size;
- const HailoRTDriver::DmaDirection m_data_direction;
- void *const m_user_address;
-};
-
-#elif defined(__QNX__)
-
-// TODO: merge qnx and non-qnx impls (HRT-9508)
-class DmaMappedBuffer::Impl final {
-public:
- static Expected<Impl> create(HailoRTDriver &driver, HailoRTDriver::DmaDirection data_direction,
- size_t size, void *user_address = nullptr);
-
- Impl(const Impl &other) = delete;
- Impl &operator=(const Impl &other) = delete;
- Impl &operator=(Impl &&other) = delete;
- Impl(Impl &&other) noexcept;
- ~Impl();
-
- void* user_address();
- size_t size() const;
- HailoRTDriver::VdmaBufferHandle handle();
- hailo_status synchronize(size_t offset, size_t count);
-
-private:
- Impl(void *addr, size_t size, HailoRTDriver::DmaDirection data_direction,
- shm_handle_t shm_handle, int shm_fd, HailoRTDriver &driver, hailo_status &status);
-
- static const int INVALID_FD;
- static const shm_handle_t INVALID_HANDLE;
- static const char* VDMA_BUFFER_TYPE_MEMORY_NAME;
-
- HailoRTDriver &m_driver;
- void *m_address;
- const size_t m_size;
- const HailoRTDriver::DmaDirection m_data_direction;
- vdma_mapped_buffer_driver_identifier m_driver_allocated_buffer_id;
- HailoRTDriver::VdmaBufferHandle m_mapping_handle;
-};
-
-#else
-#error "unsupported platform!"
-#endif // defined(__linux__) || defined(_MSC_VER)
-
-} /* namespace hailort */
-
-#endif /* _HAILO_VDMA_MAPPED_BUFFER_IMPL_HPP_ */
\ No newline at end of file
#include "vdma/memory/sg_buffer.hpp"
#include "vdma/channel/channel_id.hpp"
-#include "vdma/memory/mapped_buffer_factory.hpp"
namespace hailort {
namespace vdma {
Expected<SgBuffer> SgBuffer::create(HailoRTDriver &driver, size_t size, uint32_t desc_count, uint16_t desc_page_size,
- HailoRTDriver::DmaDirection data_direction, ChannelId channel_id)
+ bool is_circular, HailoRTDriver::DmaDirection data_direction, ChannelId channel_id)
{
CHECK_AS_EXPECTED(size <= (desc_count * desc_page_size), HAILO_INTERNAL_FAILURE,
"Requested buffer size {} must be smaller than {}", size, (desc_count * desc_page_size));
CHECK_AS_EXPECTED((size % desc_page_size) == 0, HAILO_INTERNAL_FAILURE,
"SgBuffer size must be a multiple of descriptors page size (size {})", size);
- auto mapped_buffer_exp = MappedBufferFactory::create_mapped_buffer(size,
- data_direction, driver);
- CHECK_EXPECTED(mapped_buffer_exp);
+ auto mapped_buffer = MappedBuffer::create_shared(driver, data_direction, size);
+ CHECK_EXPECTED(mapped_buffer);
- auto mapped_buffer = make_shared_nothrow<DmaMappedBuffer>(mapped_buffer_exp.release());
- CHECK_NOT_NULL_AS_EXPECTED(mapped_buffer, HAILO_OUT_OF_HOST_MEMORY);
-
- auto desc_list_exp = DescriptorList::create(desc_count, desc_page_size, driver);
+ auto desc_list_exp = DescriptorList::create(desc_count, desc_page_size, is_circular, driver);
CHECK_EXPECTED(desc_list_exp);
auto desc_list = make_shared_nothrow<DescriptorList>(desc_list_exp.release());
assert((desc_count * desc_page_size) <= std::numeric_limits<uint32_t>::max());
- auto status = desc_list->configure_to_use_buffer(*mapped_buffer, channel_id);
+ auto status = desc_list->configure_to_use_buffer(*mapped_buffer.value(), channel_id);
CHECK_SUCCESS_AS_EXPECTED(status);
- return SgBuffer(mapped_buffer, desc_list);
+ return SgBuffer(mapped_buffer.release(), desc_list);
}
-SgBuffer::SgBuffer(std::shared_ptr<DmaMappedBuffer> mapped_buffer, std::shared_ptr<DescriptorList> desc_list) :
+SgBuffer::SgBuffer(std::shared_ptr<MappedBuffer> mapped_buffer, std::shared_ptr<DescriptorList> desc_list) :
m_mapped_buffer(mapped_buffer),
m_desc_list(desc_list)
{}
return static_cast<uint32_t>(m_desc_list->count());
}
-uint8_t SgBuffer::depth() const
-{
- return m_desc_list->depth();
-}
-
-std::shared_ptr<DescriptorList> SgBuffer::get_desc_list()
-{
- return m_desc_list;
-}
-
-// TODO: Remove after HRT-7838
-void* SgBuffer::get_user_address()
-{
- return m_mapped_buffer->user_address();
-}
-
-hailo_status SgBuffer::read(void *buf_dst, size_t count, size_t offset, bool should_sync)
+hailo_status SgBuffer::read(void *buf_dst, size_t count, size_t offset)
{
CHECK(count + offset <= m_mapped_buffer->size(), HAILO_INSUFFICIENT_BUFFER);
if (count == 0) {
return HAILO_SUCCESS;
}
- if (should_sync) {
- const auto status = m_mapped_buffer->synchronize();
- CHECK_SUCCESS(status, "Failed synching SgBuffer buffer on read");
- }
+ const auto status = m_mapped_buffer->synchronize(offset, count, HailoRTDriver::DmaSyncDirection::TO_HOST);
+ CHECK_SUCCESS(status, "Failed synching SgBuffer buffer on read");
const auto src_addr = static_cast<uint8_t*>(m_mapped_buffer->user_address()) + offset;
memcpy(buf_dst, src_addr, count);
const auto dst_addr = static_cast<uint8_t*>(m_mapped_buffer->user_address()) + offset;
std::memcpy(dst_addr, buf_src, count);
- const auto status = m_mapped_buffer->synchronize();
+ const auto status = m_mapped_buffer->synchronize(offset, count, HailoRTDriver::DmaSyncDirection::TO_DEVICE);
CHECK_SUCCESS(status, "Failed synching SgBuffer buffer on write");
return HAILO_SUCCESS;
}
Expected<uint32_t> SgBuffer::program_descriptors(size_t transfer_size, InterruptsDomain last_desc_interrupts_domain,
- size_t desc_offset, bool is_circular)
+ size_t desc_offset)
{
- return m_desc_list->program_last_descriptor(transfer_size, last_desc_interrupts_domain, desc_offset, is_circular);
+ return m_desc_list->program_last_descriptor(transfer_size, last_desc_interrupts_domain, desc_offset);
}
hailo_status SgBuffer::reprogram_device_interrupts_for_end_of_batch(size_t transfer_size, uint16_t batch_size,
* @brief Scatter-gather vdma buffer, from the user-mode point of view the buffer is continuous,
* but not from the physical-memory point of view.
* The sg buffer contains 2 parts:
- * - DmaMappedBuffer - the actual buffer stores the data.
- * - Descriptors list - each descritpor points to a single "dma page" in the DmaMappedBuffer.
+ * - MappedBuffer - the actual buffer stores the data.
+ * - Descriptors list - each descritpor points to a single "dma page" in the MappedBuffer.
* The hw accept the descriptors list address and parses it to get the actual data.
**/
#ifndef _HAILO_VDMA_SG_BUFFER_HPP_
#define _HAILO_VDMA_SG_BUFFER_HPP_
-#include "hailo/dma_mapped_buffer.hpp"
-
#include "os/hailort_driver.hpp"
#include "vdma/memory/vdma_buffer.hpp"
#include "vdma/memory/descriptor_list.hpp"
+#include "vdma/memory/mapped_buffer.hpp"
namespace hailort {
class SgBuffer final : public VdmaBuffer {
public:
static Expected<SgBuffer> create(HailoRTDriver &driver, size_t size, uint32_t desc_count, uint16_t desc_page_size,
- HailoRTDriver::DmaDirection data_direction, vdma::ChannelId channel_id);
+ bool is_circular, HailoRTDriver::DmaDirection data_direction, vdma::ChannelId channel_id);
virtual ~SgBuffer() = default;
virtual uint64_t dma_address() const override;
virtual uint16_t desc_page_size() const override;
virtual uint32_t descs_count() const override;
- uint8_t depth() const;
-
- std::shared_ptr<DescriptorList> get_desc_list();
- // TODO: Remove after HRT-7838
- void *get_user_address();
- virtual hailo_status read(void *buf_dst, size_t count, size_t offset, bool should_sync) override;
+ virtual hailo_status read(void *buf_dst, size_t count, size_t offset) override;
virtual hailo_status write(const void *buf_src, size_t count, size_t offset) override;
virtual Expected<uint32_t> program_descriptors(size_t transfer_size, InterruptsDomain last_desc_interrupts_domain,
- size_t desc_offset, bool is_circular) override;
+ size_t desc_offset) override;
virtual hailo_status reprogram_device_interrupts_for_end_of_batch(size_t transfer_size, uint16_t batch_size,
InterruptsDomain new_interrupts_domain) override;
private:
- SgBuffer(std::shared_ptr<DmaMappedBuffer> mapped_buffer, std::shared_ptr<DescriptorList> desc_list);
+ SgBuffer(std::shared_ptr<MappedBuffer> mapped_buffer, std::shared_ptr<DescriptorList> desc_list);
// Initialization Dependency: The descriptor list points into the mapped buffer so it must be freed before it
- std::shared_ptr<DmaMappedBuffer> m_mapped_buffer;
+ std::shared_ptr<MappedBuffer> m_mapped_buffer;
std::shared_ptr<DescriptorList> m_desc_list;
};
return static_cast<uint32_t>(DIV_ROUND_UP(buffer_size, page_size));\r
}\r
\r
- virtual hailo_status read(void *buf_dst, size_t count, size_t offset, bool should_sync = true) = 0;\r
+ virtual hailo_status read(void *buf_dst, size_t count, size_t offset) = 0;\r
virtual hailo_status write(const void *buf_src, size_t count, size_t offset) = 0;\r
\r
virtual Expected<uint32_t> program_descriptors(size_t transfer_size, InterruptsDomain last_desc_interrupts_domain,\r
- size_t desc_offset, bool is_circular) = 0;\r
+ size_t desc_offset) = 0;\r
virtual hailo_status reprogram_device_interrupts_for_end_of_batch(size_t transfer_size, uint16_t batch_size,\r
InterruptsDomain new_interrupts_domain) = 0;\r
\r
// Take the first device
auto scan_result = scan();
CHECK_EXPECTED(scan_result, "Failed scanning pcie devices");
- CHECK_AS_EXPECTED(scan_result->size() == 1, HAILO_INVALID_OPERATION,
- "Expected only 1 PCIe device. Pass `hailo_pcie_device_info_t` to create a specific PCIe device");
+ CHECK_AS_EXPECTED(scan_result->size() >= 1, HAILO_INVALID_OPERATION,
+ "There are no PCIe devices on the system");
+
+ // choose first device
return create(scan_result->at(0));
}
auto device_info = find_device_info(pcie_device_info);
CHECK_EXPECTED(device_info);
- auto pcie_device_info_str = pcie_device_info_to_string(pcie_device_info);
- CHECK_EXPECTED(pcie_device_info_str);
-
- auto driver = HailoRTDriver::create(device_info->dev_path);
+ auto driver = HailoRTDriver::create(*device_info);
CHECK_EXPECTED(driver);
hailo_status status = HAILO_UNINITIALIZED;
- auto device = std::unique_ptr<PcieDevice>(new (std::nothrow) PcieDevice(driver.release(), pcie_device_info, status,
- pcie_device_info_str.release()));
+ auto device = std::unique_ptr<PcieDevice>(new (std::nothrow) PcieDevice(driver.release(), status));
CHECK_AS_EXPECTED((nullptr != device), HAILO_OUT_OF_HOST_MEMORY);
CHECK_SUCCESS_AS_EXPECTED(status, "Failed creating PcieDevice");
return device;
return std::string(device_string);
}
-PcieDevice::PcieDevice(HailoRTDriver &&driver, const hailo_pcie_device_info_t &device_info, hailo_status &status,
- const std::string &device_id) :
- VdmaDevice::VdmaDevice(std::move(driver), Device::Type::PCIE, device_id),
- m_device_info(device_info)
+bool PcieDevice::pcie_device_infos_equal(const hailo_pcie_device_info_t &first, const hailo_pcie_device_info_t &second)
+{
+ const bool bdf_equal = (first.bus == second.bus) && (first.device == second.device) && (first.func == second.func);
+ const bool domain_equal = (HAILO_PCIE_ANY_DOMAIN == first.domain) || (HAILO_PCIE_ANY_DOMAIN == second.domain) ||
+ (first.domain == second.domain);
+ return bdf_equal && domain_equal;
+}
+
+PcieDevice::PcieDevice(HailoRTDriver &&driver, hailo_status &status) :
+ VdmaDevice::VdmaDevice(std::move(driver), Device::Type::PCIE)
{
if (driver.is_fw_loaded()) {
status = update_fw_state();
m_is_control_version_supported = false;
}
- m_device_id = device_id;
-
status = HAILO_SUCCESS;
}
return m_driver.read_memory(HailoRTDriver::MemoryType::DIRECT_MEMORY, address, buffer, size);
}
-const char *PcieDevice::get_dev_id() const
-{
- return m_device_id.c_str();
-}
-
hailo_status PcieDevice::reset_impl(CONTROL_PROTOCOL__reset_type_t reset_type)
{
hailo_status status = HAILO_UNINITIALIZED;
// TODO: fix logic with respect to is_expecting_response, implement wait_for_wakeup();
if (HAILO_SUCCESS == status) {
status = Control::parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header,
- &payload, &request);
+ &payload, &request, *this);
CHECK_SUCCESS(status);
CHECK(is_expecting_response, HAILO_INTERNAL_FAILURE, "Recived valid response from FW for control who is not expecting one.");
} else if ((HAILO_FW_CONTROL_FAILURE == status) && (!is_expecting_response)){
static Expected<hailo_pcie_device_info_t> parse_pcie_device_info(const std::string &device_info_str,
bool log_on_failure);
static Expected<std::string> pcie_device_info_to_string(const hailo_pcie_device_info_t &device_info);
+ static bool pcie_device_infos_equal(const hailo_pcie_device_info_t &first, const hailo_pcie_device_info_t &second);
virtual ~PcieDevice() = default;
void set_is_control_version_supported(bool value);
virtual Expected<hailo_device_architecture_t> get_architecture() const override;
- const hailo_pcie_device_info_t get_device_info() const
- {
- return m_device_info;
- }
- virtual const char* get_dev_id() const override;
-
private:
- PcieDevice(HailoRTDriver &&driver, const hailo_pcie_device_info_t &device_info, hailo_status &status,
- const std::string &device_id);
+ PcieDevice(HailoRTDriver &&driver, hailo_status &status);
static Expected<HailoRTDriver::DeviceInfo> find_device_info(const hailo_pcie_device_info_t &pcie_device_info);
-
- const hailo_pcie_device_info_t m_device_info;
- std::string m_device_id;
};
} /* namespace hailort */
#include "hailo/hailort_common.hpp"
#include "vdma/vdma_async_stream.hpp"
+#include "common/os_utils.hpp"
namespace hailort
return;
}
+ if (channel->type() != vdma::BoundaryChannel::Type::ASYNC) {
+ LOGGER__ERROR("Can't create a async vdma stream with a non async channel. Received channel type {}", channel->type());
+ status = HAILO_INVALID_ARGUMENT;
+ return;
+ }
+
status = HAILO_SUCCESS;
}
-Expected<size_t> VdmaAsyncInputStream::sync_write_raw_buffer(const MemoryView &)
+hailo_status VdmaAsyncInputStream::wait_for_async_ready(size_t transfer_size, std::chrono::milliseconds timeout)
+{
+ const bool STOP_IF_DEACTIVATED = true;
+ return m_channel->wait(transfer_size, timeout, STOP_IF_DEACTIVATED);
+}
+
+Expected<size_t> VdmaAsyncInputStream::get_async_max_queue_size() const
{
- return make_unexpected(HAILO_NOT_IMPLEMENTED);
+ return get_buffer_frames_size();
}
-hailo_status VdmaAsyncInputStream::sync_write_all_raw_buffer_no_transform_impl(void *, size_t, size_t)
+hailo_status VdmaAsyncInputStream::write_buffer_only(const MemoryView &, const std::function<bool()> &)
{
- return HAILO_NOT_IMPLEMENTED;
+ LOGGER__ERROR("The write_buffer_only function is not supported by async streams");
+ return HAILO_INVALID_OPERATION;
}
-hailo_status VdmaAsyncInputStream::wait_for_ready(size_t transfer_size, std::chrono::milliseconds timeout)
+hailo_status VdmaAsyncInputStream::send_pending_buffer(const device_id_t &)
{
- return m_channel->wait(transfer_size, timeout);
+ LOGGER__ERROR("The send_pending_buffer function is not supported by async streams");
+ return HAILO_INVALID_OPERATION;
}
-hailo_status VdmaAsyncInputStream::write_async(std::shared_ptr<DmaMappedBuffer> buffer, const TransferDoneCallback &user_callback, void *opaque)
+hailo_status VdmaAsyncInputStream::write_async(TransferRequest &&transfer_request)
{
- return m_channel->transfer(buffer, user_callback, opaque);
+ return m_channel->transfer_async(std::move(transfer_request));
+}
+
+hailo_status VdmaAsyncInputStream::write_impl(const MemoryView &)
+{
+ LOGGER__ERROR("Sync write is not supported by async streams");
+ return HAILO_INVALID_OPERATION;
}
/** Output stream **/
return;
}
+ if (channel->type() != vdma::BoundaryChannel::Type::ASYNC) {
+ LOGGER__ERROR("Can't create an async vdma stream with a non async channel. Received channel type {}", channel->type());
+ status = HAILO_INVALID_ARGUMENT;
+ return;
+ }
+
+ status = HAILO_SUCCESS;
+}
+
+hailo_status VdmaAsyncOutputStream::wait_for_async_ready(size_t transfer_size, std::chrono::milliseconds timeout)
+{
+ const bool STOP_IF_DEACTIVATED = true;
+ return m_channel->wait(transfer_size, timeout, STOP_IF_DEACTIVATED);
+}
+
+Expected<size_t> VdmaAsyncOutputStream::get_async_max_queue_size() const
+{
+ return get_buffer_frames_size();
+}
+
+hailo_status VdmaAsyncOutputStream::read_impl(MemoryView &)
+{
+ LOGGER__ERROR("Sync read is not supported by async streams");
+ return HAILO_INVALID_OPERATION;
+}
+
+hailo_status VdmaAsyncOutputStream::read_async(TransferRequest &&transfer_request)
+{
+ return m_channel->transfer_async(std::move(transfer_request));
+}
+
+/** Output nms stream **/
+VdmaAsyncOutputNmsStream::VdmaAsyncOutputNmsStream(VdmaDevice &device, vdma::BoundaryChannelPtr channel,
+ const LayerInfo &edge_layer, EventPtr core_op_activated_event,
+ uint16_t batch_size, std::chrono::milliseconds transfer_timeout,
+ hailo_stream_interface_t interface, hailo_status &status) :
+ VdmaOutputStreamBase(device, channel, edge_layer, core_op_activated_event, batch_size,
+ transfer_timeout, interface, status),
+ m_queue_max_size(channel->get_transfers_count_in_buffer(get_info().hw_frame_size)),
+ m_queue_mutex(),
+ m_abort_mutex(),
+ m_queue_cond(),
+ m_queue(),
+ m_stream_aborted(false),
+ m_should_quit(false),
+ m_worker_thread([this] { process_transfer_requests(); })
+{
+ // Check status for base class c'tor
+ if (HAILO_SUCCESS != status) {
+ return;
+ }
+
+ if (edge_layer.format.order != HAILO_FORMAT_ORDER_HAILO_NMS) {
+ // This shouldn't happen
+ LOGGER__ERROR("Can't create NMS vdma async output stream if edge layer order isn't NMS. Order received {}",
+ edge_layer.format.order);
+ status = HAILO_INTERNAL_FAILURE;
+ return;
+ }
+
+ // TODO: after adding NMS single int, we can create an async channel for async nms output stream (HRT-10553)
+ if (channel->type() != vdma::BoundaryChannel::Type::BUFFERED) {
+ LOGGER__ERROR("Can't create an async nms vdma stream with a non buffered channel. Received channel type {}", channel->type());
+ status = HAILO_INVALID_ARGUMENT;
+ return;
+ }
+
status = HAILO_SUCCESS;
}
-Expected<size_t> VdmaAsyncOutputStream::sync_read_raw_buffer(MemoryView &)
+VdmaAsyncOutputNmsStream::~VdmaAsyncOutputNmsStream()
+{
+ // VdmaAsyncOutputNmsStream::deactivate_stream() calls VdmaOutputStreamBase::deactivate_stream().
+ // Because this dtor (i.e. ~VdmaAsyncOutputNmsStream()) is called before ~VdmaOutputStreamBase(), calling
+ // VdmaOutputStreamBase::deactivate_stream() inside VdmaAsyncOutputNmsStream::deactivate_stream() will work.
+ if (this->is_stream_activated) {
+ const auto status = deactivate_stream();
+ if (HAILO_SUCCESS != status) {
+ LOGGER__ERROR("Failed to deactivate stream with error status {}", status);
+ }
+ }
+
+ if (m_worker_thread.joinable()) {
+ signal_thread_quit();
+ m_worker_thread.join();
+ }
+}
+
+hailo_status VdmaAsyncOutputNmsStream::wait_for_async_ready(size_t transfer_size, std::chrono::milliseconds timeout)
{
- return make_unexpected(HAILO_NOT_IMPLEMENTED);
+ CHECK(transfer_size == get_info().hw_frame_size, HAILO_INSUFFICIENT_BUFFER,
+ "On nms stream transfer_size should be {} (given size {})", get_info().hw_frame_size, transfer_size);
+ std::unique_lock<std::mutex> lock(m_queue_mutex);
+ auto result = m_queue_cond.wait_for(lock, timeout,
+ [&]{ return m_should_quit || m_stream_aborted || (m_queue.size() < m_queue_max_size); });
+ if (result) {
+ if (m_should_quit) {
+ return HAILO_STREAM_NOT_ACTIVATED;
+ }
+ return m_stream_aborted ? HAILO_STREAM_ABORTED_BY_USER : HAILO_SUCCESS;
+ }
+ return HAILO_TIMEOUT;
+}
+
+Expected<size_t> VdmaAsyncOutputNmsStream::get_async_max_queue_size() const
+{
+ return Expected<size_t>(m_queue_max_size);
+}
+
+hailo_status VdmaAsyncOutputNmsStream::read_async(TransferRequest &&transfer_request)
+{
+ {
+ std::lock_guard<std::mutex> lock(m_queue_mutex);
+ CHECK(!m_stream_aborted, HAILO_STREAM_ABORTED_BY_USER);
+ CHECK(m_queue.size() < m_queue_max_size, HAILO_QUEUE_IS_FULL, "No space left in nms queue");
+
+ m_queue.emplace(std::move(transfer_request));
+ }
+ m_queue_cond.notify_one();
+ return HAILO_SUCCESS;
+}
+
+hailo_status VdmaAsyncOutputNmsStream::read(MemoryView /* buffer */)
+{
+ // We need to override read() since VdmaAsyncOutputNmsStream impl's read_impl. This will cause read() to succeed,
+ // however this isn't desired for async streams.
+ LOGGER__ERROR("The read function is not supported by async streams");
+ return HAILO_INVALID_OPERATION;
+}
+
+hailo_status VdmaAsyncOutputNmsStream::abort()
+{
+ std::unique_lock<std::mutex> lock(m_abort_mutex);
+ const auto status = VdmaOutputStreamBase::abort();
+ CHECK_SUCCESS(status);
+
+ m_stream_aborted = true;
+
+ return HAILO_SUCCESS;
+}
+
+hailo_status VdmaAsyncOutputNmsStream::clear_abort()
+{
+ std::unique_lock<std::mutex> lock(m_abort_mutex);
+ const auto status = VdmaOutputStreamBase::clear_abort();
+ CHECK_SUCCESS(status);
+
+ m_stream_aborted = false;
+
+ return HAILO_SUCCESS;
+}
+
+hailo_status VdmaAsyncOutputNmsStream::read_impl(MemoryView &buffer)
+{
+ CHECK((buffer.size() % HailoRTCommon::HW_DATA_ALIGNMENT) == 0, HAILO_INVALID_ARGUMENT,
+ "Size must be aligned to {} (got {})", HailoRTCommon::HW_DATA_ALIGNMENT, buffer.size());
+
+ return m_channel->transfer_sync(buffer.data(), buffer.size(), m_transfer_timeout);
+}
+
+hailo_status VdmaAsyncOutputNmsStream::deactivate_stream()
+{
+ std::unique_lock<std::mutex> lock(m_queue_mutex);
+
+ // abort is called because read_nms may block on a non-aborted channel
+ auto status = abort();
+ CHECK_SUCCESS(status);
+
+ // Now for every transfer processed in process_transfer_requests(), we'll pass HAILO_STREAM_ABORTED_BY_USER to the
+ // callback.
+ status = VdmaOutputStreamBase::deactivate_stream();
+ CHECK_SUCCESS(status);
+
+ // Block until all transfers have been emptied from the queue
+ auto result = m_queue_cond.wait_for(lock, m_transfer_timeout, [&]{ return m_queue.empty(); });
+ CHECK(result, HAILO_TIMEOUT, "Timeout while deactivating async nms output stream");
+
+ return HAILO_SUCCESS;
}
-hailo_status VdmaAsyncOutputStream::read_all(MemoryView &)
+hailo_status VdmaAsyncOutputNmsStream::activate_stream(uint16_t dynamic_batch_size, bool resume_pending_stream_transfers)
{
- return HAILO_NOT_IMPLEMENTED;
+ std::unique_lock<std::mutex> lock(m_queue_mutex);
+ auto status = VdmaOutputStreamBase::activate_stream(dynamic_batch_size, resume_pending_stream_transfers);
+ CHECK_SUCCESS(status);
+
+ status = clear_abort();
+ CHECK_SUCCESS(status);
+
+ return HAILO_SUCCESS;
}
-hailo_status VdmaAsyncOutputStream::wait_for_ready(size_t transfer_size, std::chrono::milliseconds timeout)
+Expected<size_t> VdmaAsyncOutputNmsStream::get_buffer_frames_size() const
{
- return m_channel->wait(transfer_size, timeout);
+ return Expected<size_t>(m_queue_max_size);
}
-hailo_status VdmaAsyncOutputStream::read_async(std::shared_ptr<DmaMappedBuffer> buffer, const TransferDoneCallback &user_callback, void *opaque)
+void VdmaAsyncOutputNmsStream::signal_thread_quit()
{
- return m_channel->transfer(buffer, user_callback, opaque);
+ {
+ std::unique_lock<std::mutex> lock(m_queue_mutex);
+ m_should_quit = true;
+ }
+ m_queue_cond.notify_all();
+}
+
+void VdmaAsyncOutputNmsStream::process_transfer_requests()
+{
+ static const size_t FROM_START_OF_BUFFER = 0;
+ OsUtils::set_current_thread_name("ASYNC_NMS");
+
+ while (true) {
+ std::unique_lock<std::mutex> lock(m_queue_mutex);
+ m_queue_cond.wait(lock, [&]{ return m_should_quit || !m_queue.empty(); });
+ if (m_should_quit) {
+ break;
+ }
+
+ auto transfer_request = m_queue.front();
+ m_queue.pop();
+
+ lock.unlock();
+ auto status = read_nms(transfer_request.buffer.data(), FROM_START_OF_BUFFER, transfer_request.buffer.size());
+ lock.lock();
+
+ if (!this->is_stream_activated) {
+ LOGGER__TRACE("Stream is not active (previous status {})", status);
+ transfer_request.callback(HAILO_STREAM_ABORTED_BY_USER);
+ } else if (status != HAILO_SUCCESS) {
+ // TODO: timeout? stream aborted? (HRT-10513)
+ transfer_request.callback(status);
+ } else {
+ transfer_request.callback(HAILO_SUCCESS);
+ }
+
+ lock.unlock();
+
+ // We notify after calling the callback, so that deactivate_stream() will block until the queue is empty + all callbacks have been called
+ m_queue_cond.notify_one();
+ }
}
} /* namespace hailort */
#include "vdma/vdma_stream_base.hpp"
#include "vdma/vdma_device.hpp"
#include "vdma/channel/async_channel.hpp"
+#include "vdevice/scheduler/scheduled_core_op_state.hpp"
+
+#include <thread>
+#include <queue>
+#include <mutex>
+#include <condition_variable>
namespace hailort
hailo_status &status);
virtual ~VdmaAsyncInputStream() = default;
- virtual hailo_status wait_for_ready(size_t transfer_size, std::chrono::milliseconds timeout) override;
- virtual hailo_status write_async(std::shared_ptr<DmaMappedBuffer> buffer, const TransferDoneCallback &user_callback, void *opaque);
+ virtual hailo_status wait_for_async_ready(size_t transfer_size, std::chrono::milliseconds timeout) override;
+ virtual Expected<size_t> get_async_max_queue_size() const override;
-private:
- virtual Expected<size_t> sync_write_raw_buffer(const MemoryView &buffer) override;
- virtual hailo_status sync_write_all_raw_buffer_no_transform_impl(void *buffer, size_t offset, size_t size) override;
+ virtual hailo_status write_buffer_only(const MemoryView &buffer, const std::function<bool()> &should_cancel) override;
+ virtual hailo_status send_pending_buffer(const device_id_t &device_id) override;
+
+ virtual hailo_status write_async(TransferRequest &&transfer_request) override;
+
+protected:
+ virtual hailo_status write_impl(const MemoryView &buffer) override;
};
class VdmaAsyncOutputStream : public VdmaOutputStreamBase
hailo_status &status);
virtual ~VdmaAsyncOutputStream() = default;
- virtual hailo_status wait_for_ready(size_t transfer_size, std::chrono::milliseconds timeout) override;
- virtual hailo_status read_async(std::shared_ptr<DmaMappedBuffer> buffer, const TransferDoneCallback &user_callback, void *opaque = nullptr) override;
+ virtual hailo_status wait_for_async_ready(size_t transfer_size, std::chrono::milliseconds timeout) override;
+ virtual Expected<size_t> get_async_max_queue_size() const override;
-private:
- virtual Expected<size_t> sync_read_raw_buffer(MemoryView &buffer);
- virtual hailo_status read_all(MemoryView &buffer) override;
+protected:
+ virtual hailo_status read_impl(MemoryView &buffer) override;
+ virtual hailo_status read_async(TransferRequest &&transfer_request) override;
};
+// NMS requires multiple reads from the device + parsing the output. Hence, a background thread is needed.
+// This class opens a worker thread that processes nms transfers, signalling the user's callback upon completion.
+// read_async adds transfer requests to a producer-consumer queue
+class VdmaAsyncOutputNmsStream : public VdmaOutputStreamBase
+{
+public:
+ VdmaAsyncOutputNmsStream(VdmaDevice &device, vdma::BoundaryChannelPtr channel, const LayerInfo &edge_layer,
+ EventPtr core_op_activated_event, uint16_t batch_size,
+ std::chrono::milliseconds transfer_timeout, hailo_stream_interface_t interface,
+ hailo_status &status);
+ virtual ~VdmaAsyncOutputNmsStream();
+
+ virtual hailo_status wait_for_async_ready(size_t transfer_size, std::chrono::milliseconds timeout) override;
+ virtual Expected<size_t> get_async_max_queue_size() const override;
+ virtual hailo_status read(MemoryView buffer) override;
+ virtual hailo_status abort() override;
+ virtual hailo_status clear_abort() override;
+
+private:
+ virtual hailo_status read_impl(MemoryView &buffer) override;
+ virtual hailo_status read_async(TransferRequest &&transfer_request) override;
+ virtual hailo_status deactivate_stream() override;
+ virtual hailo_status activate_stream(uint16_t dynamic_batch_size, bool resume_pending_stream_transfers) override;
+ virtual Expected<size_t> get_buffer_frames_size() const override;
+
+ void signal_thread_quit();
+ void process_transfer_requests();
+
+ // TODO: use SpscQueue (HRT-10554)
+ const size_t m_queue_max_size;
+ std::mutex m_queue_mutex;
+ std::mutex m_abort_mutex;
+ std::condition_variable m_queue_cond;
+ std::queue<TransferRequest> m_queue;
+ std::atomic_bool m_stream_aborted;
+ // m_should_quit is used to quit the thread (called on destruction)
+ bool m_should_quit;
+ std::thread m_worker_thread;
+};
} /* namespace hailort */
m_active_core_op_holder.set(*this);
- status = m_resources_manager->set_inter_context_channels_dynamic_batch_size(dynamic_batch_size);
+ status = m_resources_manager->set_dynamic_batch_size(dynamic_batch_size);
CHECK_SUCCESS(status, "Failed to set inter-context channels dynamic batch size.");
status = m_resources_manager->enable_state_machine(dynamic_batch_size);
return m_resources_manager->get_boundary_vdma_channel_by_stream_name(stream_name);
}
+Expected<HwInferResults> VdmaConfigCoreOp::run_hw_infer_estimator()
+{
+ return m_resources_manager->run_hw_only_infer();
+}
+
} /* namespace hailort */
virtual hailo_status set_scheduler_timeout(const std::chrono::milliseconds &timeout, const std::string &network_name) override;
virtual hailo_status set_scheduler_threshold(uint32_t threshold, const std::string &network_name) override;
virtual hailo_status set_scheduler_priority(uint8_t priority, const std::string &network_name) override;
+ virtual Expected<HwInferResults> run_hw_infer_estimator() override;
virtual ~VdmaConfigCoreOp() = default;
VdmaConfigCoreOp(const VdmaConfigCoreOp &other) = delete;
{
hailo_status VdmaConfigManager::switch_core_op(std::shared_ptr<VdmaConfigCoreOp> current_active_core_op,
- std::shared_ptr<VdmaConfigCoreOp> next_core_op, const uint16_t batch_size, bool resume_pending_stream_transfers)
+ std::shared_ptr<VdmaConfigCoreOp> next_core_op, const uint16_t batch_size, const bool resume_pending_stream_transfers)
{
static const auto RESET_NN_CONFIG = false;
CHECK((nullptr != current_active_core_op) || (nullptr != next_core_op), HAILO_INVALID_ARGUMENT);
auto status = current_active_core_op->deactivate_host_resources();
CHECK_SUCCESS(status, "Failed deactivating current core-op");
+ // TODO HRT-10799 Fix when enabling batch switch flow for hailo15
// TODO: In mercury we need to reset after deactivate. This will be fixed in MSW-762 and the "if" will be removed
// when we make the nn_manager responsible to reset the nn-core.
if (Device::Type::INTEGRATED == current_active_core_op->get_resources_manager()->get_device().get_type()) {
return HAILO_SUCCESS;
}
+hailo_status VdmaConfigManager::deactivate_core_op(std::shared_ptr<VdmaConfigCoreOp> current_active_core_op)
+{
+ static const auto RESUME_PENDING_STREAM_TRANSFERS = true;
+ static const uint16_t DEACTIVATE_BATCH_SIZE = 0;
+ const std::shared_ptr<VdmaConfigCoreOp> DEACTIVATE_NEXT_CORE_OP = nullptr;
+ return switch_core_op(current_active_core_op, DEACTIVATE_NEXT_CORE_OP, DEACTIVATE_BATCH_SIZE, RESUME_PENDING_STREAM_TRANSFERS);
+}
+
} /* namespace hailort */
VdmaConfigManager() = delete;
static hailo_status switch_core_op(std::shared_ptr<VdmaConfigCoreOp> current_active_core_op,
- std::shared_ptr<VdmaConfigCoreOp> next_core_op, const uint16_t batch_size, bool resume_pending_stream_transfers);
+ std::shared_ptr<VdmaConfigCoreOp> next_core_op, const uint16_t batch_size, const bool resume_pending_stream_transfers);
+
+ static hailo_status deactivate_core_op(std::shared_ptr<VdmaConfigCoreOp> current_active_core_op);
};
} /* namespace hailort */
static constexpr std::chrono::milliseconds DEFAULT_TIMEOUT(50000);
#endif /* ifndef HAILO_EMULATOR */
-VdmaDevice::VdmaDevice(HailoRTDriver &&driver, Device::Type type, const std::string &device_id) :
+VdmaDevice::VdmaDevice(HailoRTDriver &&driver, Device::Type type) :
DeviceBase::DeviceBase(type),
m_driver(std::move(driver)), m_is_configured(false)
{
- activate_notifications(device_id);
+ activate_notifications(get_dev_id());
}
Expected<std::unique_ptr<VdmaDevice>> VdmaDevice::create(const std::string &device_id)
return HAILO_SUCCESS;
}
+hailo_status VdmaDevice::clear_configured_apps()
+{
+ static const auto DONT_KEEP_NN_CONFIG_DURING_RESET = false;
+ auto status = Control::reset_context_switch_state_machine(*this, DONT_KEEP_NN_CONFIG_DURING_RESET);
+ CHECK_SUCCESS(status);
+
+ // In case of mercury need to reset nn core before activating network group to clear prior nn core state
+ if (Device::Type::INTEGRATED == get_type()) {
+ // On core device, the nn_manager is not responsible to reset the nn-core so
+ // we use the SCU control for that.
+ status = m_driver.reset_nn_core();
+ CHECK_SUCCESS(status);
+ }
+
+ status = Control::clear_configured_apps(*this);
+ CHECK_SUCCESS(status, "Failed to clear configured network groups with status {}", status);
+
+ return HAILO_SUCCESS;
+}
+
Expected<ConfiguredNetworkGroupVector> VdmaDevice::add_hef(Hef &hef, const NetworkGroupsParamsMap &configure_params)
{
auto status = mark_as_used();
// TODO: Do we need this control after fixing HRT-7519?
// Reset context_switch state machine - it may have been in an active state if a previous VdmaDevice
// wasn't dtor'd (due to SIGKILL for example)
- static const auto REMOVE_NN_CONFIG_DURING_RESET = false;
- status = Control::reset_context_switch_state_machine(*this, REMOVE_NN_CONFIG_DURING_RESET);
+ status = clear_configured_apps();
CHECK_SUCCESS_AS_EXPECTED(status);
- // In case of mercury need to reset nn core before activating network group to clear prior nn core state
- if (Device::Type::INTEGRATED == get_type()) {
- // On core device, the nn_manager is not responsible to reset the nn-core so
- // we use the SCU control for that.
- status = reset(HAILO_RESET_DEVICE_MODE_NN_CORE);
- CHECK_SUCCESS_AS_EXPECTED(status);
- }
-
- status = Control::clear_configured_apps(*this);
- CHECK_SUCCESS_AS_EXPECTED(status, "Failed to clear configured network groups with status {}", status);
-
assert(nullptr == m_vdma_interrupts_dispatcher);
auto interrupts_dispatcher = vdma::InterruptsDispatcher::create(std::ref(m_driver));
CHECK_EXPECTED(interrupts_dispatcher);
m_core_ops.emplace_back(core_op_ptr);
// TODO: HRT-8875
- auto net_flow_ops = hef.pimpl->post_process_ops(core_op_metadata->core_op_name());
- auto network_group_expected = ConfiguredNetworkGroupBase::create(config_params, std::move(core_ops), std::move(net_flow_ops));
+ auto metadata = hef.pimpl->network_group_metadata(core_op_metadata->core_op_name());
+ auto network_group_expected = ConfiguredNetworkGroupBase::create(config_params, std::move(core_ops), std::move(metadata));
CHECK_EXPECTED(network_group_expected);
auto network_group_ptr = network_group_expected.release();
return HAILO_RESET_DEVICE_MODE_SOFT;
}
-uint16_t VdmaDevice::get_default_desc_page_size() const
-{
- return m_driver.calc_desc_page_size(vdma::DEFAULT_DESC_PAGE_SIZE);
-}
-
hailo_status VdmaDevice::mark_as_used()
{
return m_driver.mark_as_used();
LOGGER__WARNING("Stopping notification thread ungracefully");
}
if (m_is_configured) {
- status = Control::clear_configured_apps(*this);
+ status = clear_configured_apps();
if (HAILO_SUCCESS != status) {
- LOGGER__ERROR("Failed to clear configured core-ops with status {}", status);
+ LOGGER__WARNING("clear configured apps ended with status {}", status);
}
}
}
// TODO: decide about core_op names - align with the Compiler
auto core_op_metadata = hef.pimpl->get_core_op_metadata(network_group_name, partial_clusters_layout_bitmap);
CHECK_EXPECTED(core_op_metadata);
-
- auto core_op_metadata_ptr = make_shared_nothrow<CoreOpMetadata>(core_op_metadata.release());
- CHECK_AS_EXPECTED(nullptr != core_op_metadata_ptr, HAILO_OUT_OF_HOST_MEMORY);
- core_ops_metadata_ptrs.emplace_back(core_op_metadata_ptr);
+ core_ops_metadata_ptrs.emplace_back(core_op_metadata.release());
}
return core_ops_metadata_ptrs;
virtual hailo_status wait_for_wakeup() override;
virtual void increment_control_sequence() override;
virtual hailo_reset_device_mode_t get_default_reset_mode() override;
- uint16_t get_default_desc_page_size() const;
-
hailo_status mark_as_used();
virtual Expected<size_t> read_log(MemoryView &buffer, hailo_cpu_id_t cpu_id) override;
- HailoRTDriver &get_driver() {
+ HailoRTDriver &get_driver()
+ {
return std::ref(m_driver);
};
+ virtual const char* get_dev_id() const override final
+ {
+ // m_driver.device_id() is reference. Hence, returning c_str is safe.
+ return m_driver.device_id().c_str();
+ };
+
ExpectedRef<vdma::InterruptsDispatcher> get_vdma_interrupts_dispatcher();
protected:
- VdmaDevice(HailoRTDriver &&driver, Type type, const std::string &device_id);
+ VdmaDevice(HailoRTDriver &&driver, Type type);
virtual Expected<D2H_EVENT_MESSAGE_t> read_notification() override;
virtual hailo_status disable_notifications() override;
HailoRTDriver m_driver;
std::vector<std::shared_ptr<CoreOp>> m_core_ops;
std::vector<std::shared_ptr<ConfiguredNetworkGroup>> m_network_groups; // TODO: HRT-9547 - Remove when ConfiguredNetworkGroup will be kept in global context
-
+
// The vdma interrupts dispatcher contains a callback with a reference to the current activated network group
// (reference to the ResourcesManager). Hence, it must be destructed before the networks groups are destructed.
std::unique_ptr<vdma::InterruptsDispatcher> m_vdma_interrupts_dispatcher;
std::vector<std::shared_ptr<CoreOpMetadata>> &core_ops,
Hef &hef, const ConfigureNetworkParams &config_params,
uint8_t network_group_index);
+ hailo_status clear_configured_apps();
Expected<ConfiguredNetworkGroupVector> create_networks_group_vector(Hef &hef, const NetworkGroupsParamsMap &configure_params);
Expected<std::vector<std::shared_ptr<CoreOpMetadata>>> create_core_ops_metadata(Hef &hef, const std::string &network_group_name,
uint32_t partial_clusters_layout_bitmap);
return;
}
+ if (channel->type() != vdma::BoundaryChannel::Type::BUFFERED) {
+ LOGGER__ERROR("Can't create a vdma stream with a non buffered channel. Received channel type {}", channel->type());
+ status = HAILO_INVALID_ARGUMENT;
+ return;
+ }
+
status = HAILO_SUCCESS;
}
-Expected<size_t> VdmaInputStream::sync_write_raw_buffer(const MemoryView &buffer)
+hailo_status VdmaInputStream::write_impl(const MemoryView &buffer)
{
- hailo_status status = HAILO_UNINITIALIZED;
-
- status = m_channel->wait(buffer.size(), m_channel_timeout);
- if ((status == HAILO_STREAM_ABORTED_BY_USER) || (status == HAILO_STREAM_NOT_ACTIVATED)) {
- return make_unexpected(status);
- }
- CHECK_AS_EXPECTED(HAILO_TIMEOUT != status, HAILO_TIMEOUT,
- "{} (H2D) failed with status={} (timeout={}ms)", name(), HAILO_TIMEOUT, m_channel_timeout.count());
- CHECK_SUCCESS_AS_EXPECTED(status);
-
- status = m_channel->transfer((void*)buffer.data(), buffer.size());
- if ((status == HAILO_STREAM_ABORTED_BY_USER) || (status == HAILO_STREAM_NOT_ACTIVATED)) {
- return make_unexpected(status);
- }
- CHECK_AS_EXPECTED(HAILO_TIMEOUT != status, HAILO_TIMEOUT,
- "{} (H2D) failed with status={} (timeout={}ms)", name(), HAILO_TIMEOUT, m_channel_timeout.count());
- CHECK_SUCCESS_AS_EXPECTED(status);
-
- return buffer.size();
+ return m_channel->transfer_sync((void*)buffer.data(), buffer.size(), m_channel_timeout);
}
hailo_status VdmaInputStream::write_buffer_only(const MemoryView &buffer,
return m_channel->write_buffer(buffer, m_channel_timeout, should_cancel);
}
-hailo_status VdmaInputStream::send_pending_buffer(size_t device_index)
+hailo_status VdmaInputStream::send_pending_buffer(const device_id_t &device_id)
{
+ (void)device_id;
std::unique_lock<std::mutex> lock(m_send_pending_mutex);
- CHECK(0 == device_index, HAILO_INVALID_OPERATION);
hailo_status status = m_channel->wait(get_frame_size(), m_channel_timeout);
if ((HAILO_STREAM_ABORTED_BY_USER == status) || (HAILO_STREAM_NOT_ACTIVATED == status)) {
return status;
return m_channel->send_pending_buffer();
}
-hailo_status VdmaInputStream::sync_write_all_raw_buffer_no_transform_impl(void *buffer, size_t offset, size_t size)
-{
- ASSERT(NULL != buffer);
-
- return sync_write_raw_buffer(MemoryView(static_cast<uint8_t*>(buffer) + offset, size)).status();
-}
-
/** Output stream **/
VdmaOutputStream::VdmaOutputStream(VdmaDevice &device, vdma::BoundaryChannelPtr channel, const LayerInfo &edge_layer,
EventPtr core_op_activated_event, uint16_t batch_size,
std::chrono::milliseconds transfer_timeout, hailo_stream_interface_t interface,
hailo_status &status) :
- VdmaOutputStreamBase(device, channel, edge_layer, core_op_activated_event, batch_size, transfer_timeout, interface, status),
- m_read_mutex()
+ VdmaOutputStreamBase(device, channel, edge_layer, core_op_activated_event, batch_size, transfer_timeout, interface, status)
{
// Check status for base class c'tor
if (HAILO_SUCCESS != status) {
return;
}
- status = HAILO_SUCCESS;
-}
-
-Expected<size_t> VdmaOutputStream::sync_read_raw_buffer(MemoryView &buffer)
-{
- hailo_status status = HAILO_UNINITIALIZED;
-
- status = m_channel->wait(buffer.size(), m_transfer_timeout);
- if ((status == HAILO_STREAM_ABORTED_BY_USER) || (status == HAILO_STREAM_NOT_ACTIVATED)) {
- return make_unexpected(status);
- }
- CHECK_AS_EXPECTED(HAILO_TIMEOUT != status, HAILO_TIMEOUT,
- "{} (D2H) failed with status={} (timeout={}ms)", name(), HAILO_TIMEOUT, m_transfer_timeout.count());
- CHECK_SUCCESS_AS_EXPECTED(status);
-
- status = m_channel->transfer(buffer.data(), buffer.size());
- if ((status == HAILO_STREAM_NOT_ACTIVATED) || (status == HAILO_STREAM_ABORTED_BY_USER)) {
- return make_unexpected(status);
+ if (channel->type() != vdma::BoundaryChannel::Type::BUFFERED) {
+ LOGGER__ERROR("Can't create a vdma stream with a non buffered channel. Received channel type {}", channel->type());
+ status = HAILO_INVALID_ARGUMENT;
+ return;
}
- CHECK_AS_EXPECTED(HAILO_TIMEOUT != status, HAILO_TIMEOUT,
- "{} (D2H) failed with status={} (timeout={}ms)", name(), HAILO_TIMEOUT, m_transfer_timeout.count());
- CHECK_SUCCESS_AS_EXPECTED(status);
- return buffer.size();
+ status = HAILO_SUCCESS;
}
-hailo_status VdmaOutputStream::read_all(MemoryView &buffer)
+hailo_status VdmaOutputStream::read_impl(MemoryView &buffer)
{
- std::unique_lock<std::mutex> lock(m_read_mutex);
- CHECK((buffer.size() % HailoRTCommon::HW_DATA_ALIGNMENT) == 0, HAILO_INVALID_ARGUMENT,
+ CHECK((buffer.size() % HailoRTCommon::HW_DATA_ALIGNMENT) == 0, HAILO_INVALID_ARGUMENT,
"Size must be aligned to {} (got {})", HailoRTCommon::HW_DATA_ALIGNMENT, buffer.size());
- return sync_read_raw_buffer(buffer).status();
+ return m_channel->transfer_sync(buffer.data(), buffer.size(), m_transfer_timeout);
}
} /* namespace hailort */
#include "vdma/vdma_stream_base.hpp"
#include "vdma/vdma_device.hpp"
#include "vdma/channel/boundary_channel.hpp"
+#include "vdevice/scheduler/scheduled_core_op_state.hpp"
namespace hailort
hailo_status &status);
virtual ~VdmaInputStream() = default;
- hailo_status write_buffer_only(const MemoryView &buffer, const std::function<bool()> &should_cancel = []() { return false; });
- hailo_status send_pending_buffer(size_t device_index = 0);
-
- void notify_all()
- {
- return m_channel->notify_all();
- }
+ virtual hailo_status write_buffer_only(const MemoryView &buffer, const std::function<bool()> &should_cancel = []() { return false; }) override;
+ virtual hailo_status send_pending_buffer(const device_id_t &device_id) override;
private:
- virtual Expected<size_t> sync_write_raw_buffer(const MemoryView &buffer) override;
- virtual hailo_status sync_write_all_raw_buffer_no_transform_impl(void *buffer, size_t offset, size_t size) override;
+ virtual hailo_status write_impl(const MemoryView &buffer) override;
std::mutex m_write_only_mutex;
std::mutex m_send_pending_mutex;
-
- friend class InputVDeviceBaseStream;
- friend class InputVDeviceNativeStream;
};
class VdmaOutputStream : public VdmaOutputStreamBase
virtual ~VdmaOutputStream() = default;
private:
- virtual Expected<size_t> sync_read_raw_buffer(MemoryView &buffer);
- virtual hailo_status read_all(MemoryView &buffer) override;
-
- std::mutex m_read_mutex;
-
- friend class OutputVDeviceBaseStream;
+ virtual hailo_status read_impl(MemoryView &buffer) override;
};
case Device::Type::PCIE:
interface_valid = (HAILO_STREAM_INTERFACE_PCIE == interface);
break;
-
+
case Device::Type::INTEGRATED:
interface_valid = (HAILO_STREAM_INTERFACE_INTEGRATED == interface);
break;
{
CHECK_AS_EXPECTED(validate_device_interface_compatibility(interface, device.get_type()), HAILO_INTERNAL_FAILURE);
+ hailo_status status = HAILO_UNINITIALIZED;
+ std::shared_ptr<VdmaInputStreamBase> result = nullptr;
if ((stream_params.flags & HAILO_STREAM_FLAGS_ASYNC) != 0) {
- CHECK_AS_EXPECTED(channel->type() == vdma::BoundaryChannel::Type::ASYNC, HAILO_INVALID_ARGUMENT,
- "Can't create a async vdma stream with a non async channel. Received channel type {}", channel->type());
-
- hailo_status status = HAILO_UNINITIALIZED;
- auto result = make_shared_nothrow<VdmaAsyncInputStream>(device, channel, edge_layer, core_op_activated_event,
+ result = make_shared_nothrow<VdmaAsyncInputStream>(device, channel, edge_layer, core_op_activated_event,
batch_size, DEFAULT_TRANSFER_TIMEOUT, interface, status);
- CHECK_SUCCESS_AS_EXPECTED(status);
- CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY);
-
- return std::static_pointer_cast<VdmaInputStreamBase>(result);
} else {
- CHECK_AS_EXPECTED(channel->type() == vdma::BoundaryChannel::Type::BUFFERED, HAILO_INVALID_ARGUMENT,
- "Can't create a vdma stream with a non buffered channel. Received channel type {}", channel->type());
-
- hailo_status status = HAILO_UNINITIALIZED;
- auto result = make_shared_nothrow<VdmaInputStream>(device, channel, edge_layer, core_op_activated_event,
+ result = make_shared_nothrow<VdmaInputStream>(device, channel, edge_layer, core_op_activated_event,
batch_size, DEFAULT_TRANSFER_TIMEOUT, interface, status);
- CHECK_SUCCESS_AS_EXPECTED(status);
- CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY);
-
- return std::static_pointer_cast<VdmaInputStreamBase>(result);
}
+
+ // Check that the creation of the various subclasses succeeded
+ CHECK_SUCCESS_AS_EXPECTED(status);
+ CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY);
+ return result;
}
VdmaInputStreamBase::VdmaInputStreamBase(VdmaDevice &device, vdma::BoundaryChannelPtr channel,
hailo_status VdmaInputStreamBase::flush()
{
- const auto dynamic_batch_size = (CONTROL_PROTOCOL__IGNORE_DYNAMIC_BATCH_SIZE == m_dynamic_batch_size) ?
+ const auto dynamic_batch_size = (CONTROL_PROTOCOL__IGNORE_DYNAMIC_BATCH_SIZE == m_dynamic_batch_size) ?
1 : m_dynamic_batch_size;
return m_channel->flush(m_channel_timeout * dynamic_batch_size);
}
return HAILO_SUCCESS;
}
- // Flush is best effort
- auto status = m_channel->flush(VDMA_FLUSH_TIMEOUT);
- if (HAILO_STREAM_ABORTED_BY_USER == status) {
- LOGGER__INFO("Flush input_channel is not needed because channel was aborted. (channel {})", m_channel->get_channel_id());
- status = HAILO_SUCCESS;
- } else if (HAILO_SUCCESS != status) {
- LOGGER__ERROR("Failed to flush input_channel. (status {} channel {})", status, m_channel->get_channel_id());
- }
- status = m_channel->deactivate();
+ auto status = m_channel->deactivate();
if (HAILO_SUCCESS != status) {
LOGGER__ERROR("Failed to stop channel with status {}", status);
}
return m_channel->get_h2d_pending_frames_count();
}
-hailo_status VdmaInputStreamBase::register_interrupt_callback(const vdma::ProcessingCompleteCallback &callback)
-{
- return m_channel->register_interrupt_callback(callback);
-}
-
hailo_status VdmaInputStreamBase::set_dynamic_batch_size(uint16_t dynamic_batch_size)
{
// TODO: use std::max in the configure stage
CHECK(dynamic_batch_size <= m_max_batch_size, HAILO_INVALID_ARGUMENT,
"Dynamic batch size ({}) must be <= than the configured batch size ({})",
dynamic_batch_size, m_max_batch_size);
-
+
if (CONTROL_PROTOCOL__IGNORE_DYNAMIC_BATCH_SIZE == dynamic_batch_size) {
LOGGER__TRACE("Received CONTROL_PROTOCOL__IGNORE_DYNAMIC_BATCH_SIZE == dynamic_batch_size; "
"Leaving previously set value of {}", m_dynamic_batch_size);
{
CHECK_AS_EXPECTED(validate_device_interface_compatibility(interface, device.get_type()), HAILO_INTERNAL_FAILURE);
+ hailo_status status = HAILO_UNINITIALIZED;
+ std::shared_ptr<VdmaOutputStreamBase> result = nullptr;
+ // TODO: after adding NMS single int, we can create an async channel for async nms output stream (HRT-10553)
if ((stream_params.flags & HAILO_STREAM_FLAGS_ASYNC) != 0) {
- CHECK_AS_EXPECTED(channel->type() == vdma::BoundaryChannel::Type::ASYNC, HAILO_INVALID_ARGUMENT,
- "Can't create a async vdma stream with a non async channel. Received channel type {}", channel->type());
-
- hailo_status status = HAILO_UNINITIALIZED;
- auto result = make_shared_nothrow<VdmaAsyncOutputStream>(device, channel, edge_layer, core_op_activated_event,
- batch_size, DEFAULT_TRANSFER_TIMEOUT, interface, status);
- CHECK_SUCCESS_AS_EXPECTED(status);
- CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY);
-
- return std::static_pointer_cast<VdmaOutputStreamBase>(result);
+ if (edge_layer.format.order == HAILO_FORMAT_ORDER_HAILO_NMS) {
+ result = make_shared_nothrow<VdmaAsyncOutputNmsStream>(device, channel, edge_layer, core_op_activated_event,
+ batch_size, DEFAULT_TRANSFER_TIMEOUT, interface, status);
+ } else {
+ result = make_shared_nothrow<VdmaAsyncOutputStream>(device, channel, edge_layer, core_op_activated_event,
+ batch_size, DEFAULT_TRANSFER_TIMEOUT, interface, status);
+ }
} else {
- CHECK_AS_EXPECTED(channel->type() == vdma::BoundaryChannel::Type::BUFFERED, HAILO_INVALID_ARGUMENT,
- "Can't create a vdma stream with a non buffered channel. Received channel type {}", channel->type());
+ result = make_shared_nothrow<VdmaOutputStream>(device, channel, edge_layer, core_op_activated_event,
+ batch_size, DEFAULT_TRANSFER_TIMEOUT, interface, status);
+ }
- hailo_status status = HAILO_UNINITIALIZED;
- auto result = make_shared_nothrow<VdmaOutputStream>(device, channel, edge_layer, core_op_activated_event,
- batch_size, DEFAULT_TRANSFER_TIMEOUT, interface, status);
- CHECK_SUCCESS_AS_EXPECTED(status);
- CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY);
+ // Check that the creation of the various subclasses succeeded
+ CHECK_SUCCESS_AS_EXPECTED(status);
+ CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY);
- return std::static_pointer_cast<VdmaOutputStreamBase>(result);
- }
+ return result;
}
VdmaOutputStreamBase::VdmaOutputStreamBase(VdmaDevice &device, vdma::BoundaryChannelPtr channel, const LayerInfo &edge_layer,
EventPtr core_op_activated_event, uint16_t batch_size,
std::chrono::milliseconds transfer_timeout, hailo_stream_interface_t interface,
hailo_status &status) :
- OutputStreamBase(edge_layer, std::move(core_op_activated_event), status),
+ OutputStreamBase(edge_layer, interface, std::move(core_op_activated_event), status),
m_device(&device),
m_channel(std::move(channel)),
m_interface(interface),
m_transfer_timeout(transfer_timeout),
m_max_batch_size(batch_size),
m_dynamic_batch_size(batch_size),
- m_transfer_size(get_transfer_size(m_stream_info))
+ m_transfer_size(get_transfer_size(m_stream_info, get_layer_info()))
{
// Check status for base class c'tor
if (HAILO_SUCCESS != status) {
return HAILO_SUCCESS;
}
-hailo_status VdmaOutputStreamBase::register_interrupt_callback(const vdma::ProcessingCompleteCallback &callback)
+void VdmaOutputStreamBase::register_interrupt_callback(const vdma::ProcessingCompleteCallback &callback)
{
- return m_channel->register_interrupt_callback(callback);
+ m_channel->register_interrupt_callback(callback);
}
hailo_status VdmaOutputStreamBase::deactivate_stream()
return HAILO_SUCCESS;
}
-uint32_t VdmaOutputStreamBase::get_transfer_size(const hailo_stream_info_t &stream_info)
+uint32_t VdmaOutputStreamBase::get_transfer_size(const hailo_stream_info_t &stream_info, const LayerInfo &layer_info)
{
- // The ppu outputs one bbox per vdma buffer in the case of nms
- return (HAILO_FORMAT_ORDER_HAILO_NMS == stream_info.format.order) ?
- stream_info.nms_info.bbox_size : stream_info.hw_frame_size;
+ return LayerInfoUtils::get_stream_transfer_size(stream_info, layer_info);
}
hailo_status VdmaOutputStreamBase::set_dynamic_batch_size(uint16_t dynamic_batch_size)
CHECK(dynamic_batch_size <= m_max_batch_size, HAILO_INVALID_ARGUMENT,
"Dynamic batch size ({}) must be <= than the configured batch size ({})",
dynamic_batch_size, m_max_batch_size);
-
+
if (CONTROL_PROTOCOL__IGNORE_DYNAMIC_BATCH_SIZE == dynamic_batch_size) {
LOGGER__TRACE("Received CONTROL_PROTOCOL__IGNORE_DYNAMIC_BATCH_SIZE == dynamic_batch_size; "
"Leaving previously set value of {}", m_dynamic_batch_size);
namespace hailort
{
-constexpr std::chrono::seconds VDMA_FLUSH_TIMEOUT(10);
class VdmaInputStreamBase : public InputStreamBase {
public:
Expected<vdma::BoundaryChannel::BufferState> get_buffer_state();
virtual Expected<size_t> get_buffer_frames_size() const override;
virtual Expected<size_t> get_pending_frames_count() const override;
- virtual hailo_status register_interrupt_callback(const vdma::ProcessingCompleteCallback &callback) override;
+
+ virtual hailo_status write_buffer_only(const MemoryView &buffer, const std::function<bool()> &should_cancel = []() { return false; }) = 0;
+ virtual hailo_status send_pending_buffer(const device_id_t &device_id) = 0;
+
+ void notify_all()
+ {
+ m_channel->notify_all();
+ }
protected:
VdmaInputStreamBase(VdmaDevice &device, vdma::BoundaryChannelPtr channel, const LayerInfo &edge_layer,
virtual hailo_status deactivate_stream() override;
hailo_status set_dynamic_batch_size(uint16_t dynamic_batch_size);
+ friend class VDeviceInputStreamBase;
+ friend class VDeviceNativeInputStream;
+
VdmaDevice *m_device;
vdma::BoundaryChannelPtr m_channel;
const hailo_stream_interface_t m_interface;
virtual Expected<size_t> get_buffer_frames_size() const override;
virtual Expected<size_t> get_pending_frames_count() const override;
- virtual hailo_status register_interrupt_callback(const vdma::ProcessingCompleteCallback &callback);
+ void register_interrupt_callback(const vdma::ProcessingCompleteCallback &callback);
protected:
VdmaOutputStreamBase(VdmaDevice &device, vdma::BoundaryChannelPtr channel, const LayerInfo &edge_layer,
virtual hailo_status activate_stream(uint16_t dynamic_batch_size, bool resume_pending_stream_transfers) override;
virtual hailo_status deactivate_stream() override;
- static uint32_t get_transfer_size(const hailo_stream_info_t &stream_info);
+ static uint32_t get_transfer_size(const hailo_stream_info_t &stream_info, const LayerInfo &layer_info);
hailo_status set_dynamic_batch_size(uint16_t dynamic_batch_size);
+ friend class VDeviceOutputStreamBase;
+
VdmaDevice *m_device;
vdma::BoundaryChannelPtr m_channel;
const hailo_stream_interface_t m_interface;
const uint16_t m_max_batch_size;
uint16_t m_dynamic_batch_size;
const uint32_t m_transfer_size;
- std::mutex m_read_mutex;
};
)
endfunction()
-git_clone(pybind11 https://github.com/pybind/pybind11.git 80dc998efced8ceb2be59756668a7e90e8bef917)
+include(${CMAKE_CURRENT_LIST_DIR}/../../libhailort/bindings/python/externals/pybind11.cmake)
git_clone(Catch2 https://github.com/catchorg/Catch2.git c4e3767e265808590986d5db6ca1b5532a7f3d13)
git_clone(CLI11 https://github.com/hailo-ai/CLI11.git f1644f15f219303b7ad670732c21018a1e6f0e11)
git_clone(spdlog https://github.com/gabime/spdlog.git e2789531912a5c6ab28a90387f97c52963eec08a)
rpc ConfiguredNetworkGroup_get_latency_measurement (ConfiguredNetworkGroup_get_latency_measurement_Request) returns (ConfiguredNetworkGroup_get_latency_measurement_Reply) {}
rpc ConfiguredNetworkGroup_is_multi_context (ConfiguredNetworkGroup_is_multi_context_Request) returns (ConfiguredNetworkGroup_is_multi_context_Reply) {}
rpc ConfiguredNetworkGroup_get_config_params(ConfiguredNetworkGroup_get_config_params_Request) returns (ConfiguredNetworkGroup_get_config_params_Reply) {}
+ rpc ConfiguredNetworkGroup_get_sorted_output_names(ConfiguredNetworkGroup_get_sorted_output_names_Request) returns (ConfiguredNetworkGroup_get_sorted_output_names_Reply) {}
+ rpc ConfiguredNetworkGroup_get_stream_names_from_vstream_name(ConfiguredNetworkGroup_get_stream_names_from_vstream_name_Request) returns (ConfiguredNetworkGroup_get_stream_names_from_vstream_name_Reply) {}
+ rpc ConfiguredNetworkGroup_get_vstream_names_from_stream_name(ConfiguredNetworkGroup_get_vstream_names_from_stream_name_Request) returns (ConfiguredNetworkGroup_get_vstream_names_from_stream_name_Reply) {}
rpc InputVStreams_create (VStream_create_Request) returns (VStreams_create_Reply) {}
rpc InputVStream_dup_handle (dup_handle_Request) returns (dup_handle_Reply) {}
rpc OutputVStream_network_name (VStream_network_name_Request) returns (VStream_network_name_Reply) {}
rpc InputVStream_abort (VStream_abort_Request) returns (VStream_abort_Reply) {}
rpc OutputVStream_abort (VStream_abort_Request) returns (VStream_abort_Reply) {}
+ rpc InputVStream_stop_and_clear (VStream_stop_and_clear_Request) returns (VStream_stop_and_clear_Reply) {}
+ rpc InputVStream_start_vstream (VStream_start_vstream_Request) returns (VStream_start_vstream_Reply) {}
+ rpc OutputVStream_stop_and_clear (VStream_stop_and_clear_Request) returns (VStream_stop_and_clear_Reply) {}
+ rpc OutputVStream_start_vstream (VStream_start_vstream_Request) returns (VStream_start_vstream_Reply) {}
rpc InputVStream_resume (VStream_resume_Request) returns (VStream_resume_Reply) {}
rpc OutputVStream_resume (VStream_resume_Request) returns (VStream_resume_Reply) {}
rpc InputVStream_get_user_buffer_format (VStream_get_user_buffer_format_Request) returns (VStream_get_user_buffer_format_Reply) {}
rpc OutputVStream_get_user_buffer_format (VStream_get_user_buffer_format_Request) returns (VStream_get_user_buffer_format_Reply) {}
rpc InputVStream_get_info (VStream_get_info_Request) returns (VStream_get_info_Reply) {}
rpc OutputVStream_get_info (VStream_get_info_Request) returns (VStream_get_info_Reply) {}
+ rpc InputVStream_is_aborted (VStream_is_aborted_Request) returns (VStream_is_aborted_Reply) {}
+ rpc OutputVStream_is_aborted (VStream_is_aborted_Request) returns (VStream_is_aborted_Reply) {}
}
message empty {}
message Release_Request {
uint32 handle = 1;
+ uint32 pid = 2;
}
message Release_Reply {
string original_name = 2;
}
+enum ProtoNmsBurstType {
+ // No burst
+ PROTO_NMS_BURST_TYPE_NO_BURST = 0;
+ // No image delimiter, burst per class
+ PROTO_NMS_BURST_TYPE_H8_PER_CLASS = 1;
+ // Image delimiter and burst per class
+ PROTO_NMS_BURST_TYPE_H15_PER_CLASS = 2;
+ // Image delimiter and burst per image
+ PROTO_NMS_BURST_TYPE_H15_PER_FRAME = 3;
+}
+
message ProtoNmsInfo {
uint32 number_of_classes = 1;
uint32 max_bboxes_per_class = 2;
uint32 chunks_per_frame = 4;
bool is_defused = 5;
ProtoNmsDefuseInfo defuse_info = 6;
+ uint32 burst_size = 7;
+ ProtoNmsBurstType burst_type = 8;
}
message ProtoQuantInfo {
ProtoConfigureNetworkParams params = 2;
}
+message ConfiguredNetworkGroup_get_sorted_output_names_Request {
+ uint32 handle = 1;
+}
+
+message ConfiguredNetworkGroup_get_sorted_output_names_Reply {
+ uint32 status = 1;
+ repeated string sorted_output_names = 2;
+}
+
+message ConfiguredNetworkGroup_get_stream_names_from_vstream_name_Request {
+ uint32 handle = 1;
+ string vstream_name = 2;
+}
+
+message ConfiguredNetworkGroup_get_stream_names_from_vstream_name_Reply {
+ uint32 status = 1;
+ repeated string streams_names = 2;
+}
+
+message ConfiguredNetworkGroup_get_vstream_names_from_stream_name_Request {
+ uint32 handle = 1;
+ string stream_name = 2;
+}
+
+message ConfiguredNetworkGroup_get_vstream_names_from_stream_name_Reply {
+ uint32 status = 1;
+ repeated string vstreams_names = 2;
+}
+
message InputVStream_write_Request {
uint32 handle = 1;
bytes data = 2;
uint32 status = 1;
}
+message VStream_stop_and_clear_Request {
+ uint32 handle = 1;
+}
+
+message VStream_stop_and_clear_Reply {
+ uint32 status = 1;
+}
+
+message VStream_start_vstream_Request {
+ uint32 handle = 1;
+}
+
+message VStream_start_vstream_Reply {
+ uint32 status = 1;
+}
+
message VStream_resume_Request {
uint32 handle = 1;
}
message VStream_get_info_Reply {
uint32 status = 1;
ProtoVStreamInfo vstream_info = 2;
+}
+
+message VStream_is_aborted_Request {
+ uint32 handle = 1;
+}
+
+message VStream_is_aborted_Reply {
+ uint32 status = 1;
+ bool is_aborted = 2;
}
\ No newline at end of file
@ECHO OFF
set BASE_URI=https://hailo-hailort.s3.eu-west-2.amazonaws.com
-set HRT_VERSION=4.13.0
+set HRT_VERSION=4.14.0
set FW_DIR=Hailo8/%HRT_VERSION%/FW
set FW=hailo8_fw.%HRT_VERSION%_eth.bin
set -e
readonly BASE_URI="https://hailo-hailort.s3.eu-west-2.amazonaws.com"
-readonly HRT_VERSION=4.13.0
+readonly HRT_VERSION=4.14.0
readonly FW_AWS_DIR="Hailo8/${HRT_VERSION}/FW"
readonly FW="hailo8_fw.${HRT_VERSION}_eth.bin"
:: cmd
@ECHO OFF
set BASE_URI=https://hailo-hailort.s3.eu-west-2.amazonaws.com
-set HRT_VERSION=4.13.0
+set HRT_VERSION=4.14.0
set REMOTE_HEF_DIR=Hailo8/%HRT_VERSION%/HEFS
set LOCAL_EXAMPLES_HEF_DIR=..\libhailort\examples\hefs
-set LOCAL_TUTORIALS_HEF_DIR=..\libhailort\bindings\python\platform\tutorials\hefs
+set LOCAL_TUTORIALS_HEF_DIR=..\libhailort\bindings\python\platform\hailo_tutorials\hefs
set EXAMPLES_HEFS=(multi_network_shortcut_net.hef shortcut_net.hef)
-set TUTORIALS_HEFS=(resnet_v1_18.hef)
+set TUTORIALS_HEFS=(resnet_v1_18.hef shortcut_net.hef)
if not exist %LOCAL_EXAMPLES_HEF_DIR% mkdir %LOCAL_EXAMPLES_HEF_DIR%
if not exist %LOCAL_TUTORIALS_HEF_DIR% mkdir %LOCAL_TUTORIALS_HEF_DIR%
set -e
readonly BASE_URI="https://hailo-hailort.s3.eu-west-2.amazonaws.com"
-readonly HRT_VERSION=4.13.0
+readonly HRT_VERSION=4.14.0
readonly REMOTE_HEF_DIR="Hailo8/${HRT_VERSION}/HEFS"
readonly LOCAL_EXAMPLES_HEF_DIR="../libhailort/examples/hefs"
-readonly LOCAL_TUTORIALS_HEF_DIR="../libhailort/bindings/python/platform/tutorials/hefs/"
+readonly LOCAL_TUTORIALS_HEF_DIR="../libhailort/bindings/python/platform/hailo_tutorials/hefs"
readonly EXAMPLES_HEFS=(
"shortcut_net.hef"
"multi_network_shortcut_net.hef"
)
readonly TUTORIALS_HEFS=(
"resnet_v1_18.hef"
+ "shortcut_net.hef"
)
function create_hef_dir(){
--- /dev/null
+#! /bin/bash
+set -e
+
+# Environment declarations
+script_directory=$(cd $(dirname "${BASH_SOURCE[0]}") && pwd)
+local_platform_sw_path="$script_directory"/../../../
+h15="10.0.0.1"
+ssh-copy-id root@$h15
\ No newline at end of file
--- /dev/null
+#! /bin/bash
+set -e
+
+# Include Environment declarations
+script_directory=$(cd $(dirname "${BASH_SOURCE[0]}") && pwd)
+source "$script_directory"/hailo15_env_vars.sh
+
+cd $local_platform_sw_path
+./install.sh comp build_integrated_nnc_driver --image-path /local/bkc/v0.29-build-2023-05-07
+path="$local_platform_sw_path"/hailort/drivers/linux/integrated_nnc/hailo_integrated_nnc.ko
+scp $path root@$h15:/lib/modules/5.15.32-yocto-standard/kernel/drivers/misc/hailo_integrated_nnc.ko
+
+ssh root@$h15 "modprobe -r hailo_integrated_nnc && modprobe hailo_integrated_nnc"
--- /dev/null
+#! /bin/bash
+set -e
+
+# Include Environment declarations
+script_directory=$(cd $(dirname "${BASH_SOURCE[0]}") && pwd)
+source "$script_directory"/hailo15_env_vars.sh
+
+cd $local_platform_sw_path
+./install.sh comp build_fw --fw vpu --hw-arch hailo15
+scp firmware/vpu_firmware/build/hailo15_nnc_fw.bin root@$h15:/lib/firmware/hailo/hailo15_nnc_fw.bin
+ssh root@$h15 "modprobe -r hailo_integrated_nnc && modprobe hailo_integrated_nnc"
--- /dev/null
+#! /bin/bash
+set -e
+
+# Include Environment declarations
+script_directory=$(cd $(dirname "${BASH_SOURCE[0]}") && pwd)
+source "$script_directory"/hailo15_env_vars.sh
+
+cd $local_platform_sw_path
+./build.sh -aaarch64 -brelease install
+
+scp lib/linux.aarch64.release/libhailort.* root@$h15:/usr/lib/
+scp bin/linux.aarch64.release/hailortcli root@$h15:/usr/bin/
+scp bin/linux.aarch64.release/debalex root@$h15:/usr/bin/
+scp bin/linux.aarch64.release/board_tests root@$h15:/usr/bin/
--- /dev/null
+#! /bin/bash
+set -e
+
+# Include Environment declarations
+script_directory=$(cd $(dirname "${BASH_SOURCE[0]}") && pwd)
+source "$script_directory"/hailo15_env_vars.sh
+
+cd $local_platform_sw_path
+# Compile PCR
+./install.sh comp build_infra_tools --arch aarch64 --build-hailort --build-type release
+
+scp platform_internals/hailo_platform_internals/low_level_tools/build/linux.aarch64.release/pcr/pcr root@$h15:/usr/bin/
--- /dev/null
+#! /bin/bash
+set -e
+
+# Include Environment declarations
+script_directory=$(cd $(dirname "${BASH_SOURCE[0]}") && pwd)
+source "$script_directory"/hailo15_env_vars.sh
+
+cd $local_platform_sw_path
+source hailo_platform_venv/bin/activate
+ssh root@$h15 "hailortcli fw-logger /tmp/fw_log.dat"
+scp root@$h15:/tmp/fw_log.dat /tmp
+ssh root@$h15 "rm /tmp/fw_log.dat"
+
+python ./platform_internals/hailo_platform_internals/tools/firmware/parse_tracelog.py --fw vpu --core-log-entries firmware/vpu_firmware/build/hailo15_nnc_fw_*_log_entries.csv --core-only --raw-input-file /tmp/fw_log.dat
+
--- /dev/null
+#! /bin/bash
+set -e
+
+# Include Environment declarations
+script_directory=$(cd $(dirname "${BASH_SOURCE[0]}") && pwd)
+source "$script_directory"/hailo15_env_vars.sh
+
+ssh root@$h15 "hailortcli run /etc/hailo/hefs/hailo15/shortcut_net/28_28_3/shortcut_net.hef -c 1"
--- /dev/null
+#! /bin/bash
+set -e
+
+# Include Environment declarations
+script_directory=$(cd $(dirname "${BASH_SOURCE[0]}") && pwd)
+source "$script_directory"/hailo15_env_vars.sh
+
+# Build hailo15 artifacts
+/bin/bash "$script_directory"/load_hrt.sh
+
+# Build hailo15 PCR
+/bin/bash "$script_directory"/load_pcr.sh
+
+# Build hailo15 fw
+cd $local_platform_sw_path
+./install.sh comp build_fw --fw vpu --hw-arch hailo15
+scp firmware/vpu_firmware/build/hailo15_nnc_fw.bin root@$h15:/lib/firmware/hailo/hailo15_nnc_fw.bin
+
+# Build integrated_nnc (hailo15) driver
+/bin/bash "$script_directory"/load_driver.sh
+
+# Run sanity infer
+/bin/bash "$script_directory"/sanity_infer.sh
${HAILO_OS_DIR}/hailort_driver.cpp
${HAILO_OS_DIR}/file_descriptor.cpp
${HAILO_FULL_OS_DIR}/driver_scan.cpp
- # TODO: HRT-3816 remove mmap header
- ${HAILO_OS_DIR}/mmap_buffer.cpp
)
if(WIN32)
else()
message(WARNING "Could not find readline library. To better UI, please install it by calling `sudo apt install libreadline6-dev`")
endif()
+
+install(TARGETS debalex
+ RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
+)
+cli11_install_completion_file(debalex)
\ No newline at end of file
return device_ids;
}
-std::string get_device_filepath(const std::string &device_id)
+HailoRTDriver::DeviceInfo get_device_info(const std::string &device_id)
{
auto scan_results = HailoRTDriver::scan_devices();
if (!scan_results) {
throw std::runtime_error("Requested device not found");
}
- return device_found->dev_path;
+ return *device_found;
}
std::shared_ptr<HailoRTDriver> create_driver_object(const std::string &device_id)
{
- auto device_path = get_device_filepath(device_id);
- auto hailort_driver = HailoRTDriver::create(device_path);
+ auto device_info = get_device_info(device_id);
+ auto hailort_driver = HailoRTDriver::create(device_info);
if (!hailort_driver) {
throw std::runtime_error("Failed create hailort driver object");
}