From: HailoRT-Automation <98901220+HailoRT-Automation@users.noreply.github.com> Date: Thu, 29 Jun 2023 12:02:42 +0000 (+0300) Subject: v4.14.0 (#9) X-Git-Tag: accepted/tizen/unified/20250310.024602~9 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=9bce73eb42bad4da7876f6bafa2521f3c411937e;p=platform%2Fupstream%2Fhailort.git v4.14.0 (#9) --- diff --git a/.hailort.jpg b/.hailort.jpg new file mode 100644 index 0000000..84d2988 Binary files /dev/null and b/.hailort.jpg differ diff --git a/.hailort.png b/.hailort.png deleted file mode 100644 index c9adb26..0000000 Binary files a/.hailort.png and /dev/null differ diff --git a/CMakeLists.txt b/CMakeLists.txt index f44f4fc..55d54df 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -14,20 +14,12 @@ if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_BINARY_DIR}") In order to build, please create a new `build` directory and run `cmake ..` from there.") endif() -# Check build type -if (NOT CMAKE_BUILD_TYPE) - message(STATUS "No build type selected, default to Debug") - set(CMAKE_BUILD_TYPE "Debug") -endif() -message(STATUS "Building ${PROJECT_NAME} in ${CMAKE_BUILD_TYPE}") - # Set compiler flags in HAILORT_COMPILE_OPTIONS # TODO: Change HAILORT_COMPILE_OPTIONS to add_compile_options if(WIN32) # TODO: set this eventually? set(HAILORT_COMPILE_OPTIONS /Wall) set(HAILORT_COMPILE_OPTIONS ${HAILORT_COMPILE_OPTIONS} /W4 - /WX /DWIN32_LEAN_AND_MEAN /DNOMINMAX # NOMINMAX is required in order to play nice with std::min/std::max (otherwise Windows.h defines it's own) /D_HAILO_EXPORTING @@ -37,9 +29,9 @@ if(WIN32) add_definitions(-D_CRT_SECURE_NO_WARNINGS) # Disable "unsafe function" warnings elseif(UNIX) if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU" OR CMAKE_CXX_COMPILER_ID STREQUAL "QCC") - set(HAILORT_COMPILE_OPTIONS ${HAILORT_COMPILE_OPTIONS} -Werror -Wall -Wextra -Wconversion) + set(HAILORT_COMPILE_OPTIONS ${HAILORT_COMPILE_OPTIONS} -Wall -Wextra -Wconversion) elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Clang") - set(HAILORT_COMPILE_OPTIONS ${HAILORT_COMPILE_OPTIONS} -Werror -Wall -Wextra + set(HAILORT_COMPILE_OPTIONS ${HAILORT_COMPILE_OPTIONS} -Wall -Wextra # TODO: remove me warnings -Wno-conversion -Wno-deprecated-declarations # On c structures with deprecated attribute, clang generates implicit move ctor diff --git a/README.md b/README.md index 7e3ad5e..ecfe8e0 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,12 @@

- +

# HailoRT # HailoRT is a lightweight, production-grade runtime library that runs on the host processor and provides a robust -user-space runtime library (the HailoRT Library) with intuitive APIs in C/C++ for optimized performance +user-space library (the HailoRT Library) with intuitive APIs in C/C++ for optimized performance HailoRT consists of the following main components: - HailoRT Library. @@ -42,8 +42,8 @@ Contact information and support is available at [**hailo.ai**](https://hailo.ai/ ## About Hailo-8™ -Hailo-8 is a deep learning processor for edge devices. The Hailo-8 provides groundbraking efficiency for neural network deployment. -The Hailo-8 edge AI processor, featuring up to 26 tera-operations per second (TOPS), significantly outperforms all other edge processors. +Hailo-8 is a deep learning processor for edge devices. The Hailo-8 provides groundbreaking efficiency for neural network deployment. +The Hailo-8 edge AI processor, featuring up to 26 Tera-Operations-Per-Second (TOPS), significantly outperforms all other edge processors. Hailo-8 is available in various form-factors, including the Hailo-8 M.2 Module. The Hailo-8 AI processor is designed to fit into a multitude of smart machines and devices, for a wide variety of sectors including Automotive, Smart Cities, Industry 4.0, diff --git a/common/include/context_switch_defs.h b/common/include/context_switch_defs.h index b7a0fe8..cf4858f 100644 --- a/common/include/context_switch_defs.h +++ b/common/include/context_switch_defs.h @@ -54,6 +54,9 @@ extern "C" { (vdma_channel_index) = ((src) & CONTEXT_SWITCH_DEFS__PACKED_VDMA_CHANNEL_ID__VDMA_CHANNEL_INDEX_MASK); \ } while (0) +#define CONTEXT_SWITCH_DEFS__WRITE_ACTION_BY_TYPE_MAX_SIZE (4) + + #pragma pack(push, 1) typedef struct { uint16_t core_bytes_per_buffer; @@ -104,6 +107,8 @@ typedef enum __attribute__((packed)) { CONTEXT_SWITCH_DEFS__ACTION_TYPE_OPEN_BOUNDARY_INPUT_CHANNEL, CONTEXT_SWITCH_DEFS__ACTION_TYPE_OPEN_BOUNDARY_OUTPUT_CHANNEL, CONTEXT_SWITCH_DEFS__ACTION_TYPE_ENABLE_NMS, + CONTEXT_SWITCH_DEFS__ACTION_TYPE_WRITE_DATA_BY_TYPE, + CONTEXT_SWITCH_DEFS__ACTION_TYPE_SWITCH_LCU_BATCH, /* Must be last */ CONTEXT_SWITCH_DEFS__ACTION_TYPE_COUNT @@ -358,8 +363,33 @@ typedef struct { typedef struct { uint8_t nms_unit_index; uint8_t network_index; + uint16_t number_of_classes; + uint16_t burst_size; } CONTEXT_SWITCH_DEFS__enable_nms_action_t; +typedef enum { + WRITE_ACTION_TYPE_GENERAL = 0, + WRITE_ACTION_TYPE_WRITE_BATCH = 1, + + /* Must be last */ + WRITE_ACTION_BY_TYPE_COUNT +} CONTEXT_SWITCH_DEFS__WRITE_ACTION_TYPE_t; + +typedef struct { + uint32_t address; + uint8_t data_type; //CONTEXT_SWITCH_DEFS__WRITE_ACTION_TYPE_t + uint32_t data; + uint8_t shift; + uint32_t mask; + uint8_t network_index; +} CONTEXT_SWITCH_DEFS__write_data_by_type_action_t; + +typedef struct { + uint8_t packed_lcu_id; + uint8_t network_index; + uint32_t kernel_done_count; +} CONTEXT_SWITCH_DEFS__switch_lcu_batch_action_data_t; + #pragma pack(pop) #ifdef __cplusplus diff --git a/common/include/control_protocol.h b/common/include/control_protocol.h index c5889ec..73d31d5 100644 --- a/common/include/control_protocol.h +++ b/common/include/control_protocol.h @@ -1017,6 +1017,7 @@ typedef enum { CONTROL_PROTOCOL__CONTEXT_SWITCH_STATUS_COUNT, } CONTROL_PROTOCOL__CONTEXT_SWITCH_STATUS_t; +#define CONTROL_PROTOCOL__INIFINITE_BATCH_COUNT (0) typedef struct { uint32_t state_machine_status_length; uint8_t state_machine_status; @@ -1024,6 +1025,8 @@ typedef struct { uint8_t application_index; uint32_t dynamic_batch_size_length; uint16_t dynamic_batch_size; + uint32_t batch_count_length; + uint16_t batch_count; uint32_t keep_nn_config_during_reset_length; uint8_t keep_nn_config_during_reset; } CONTROL_PROTOCOL__change_context_switch_status_request_t; @@ -1315,6 +1318,8 @@ typedef struct { uint8_t application_index; uint32_t dynamic_batch_size_length; uint16_t dynamic_batch_size; + uint32_t batch_count_length; + uint16_t batch_count; uint32_t channels_info_length; CONTROL_PROTOCOL__hw_infer_channels_info_t channels_info; } CONTROL_PROTOCOL__change_hw_infer_status_request_t; diff --git a/common/include/d2h_events.h b/common/include/d2h_events.h index 6eff396..b9009ef 100644 --- a/common/include/d2h_events.h +++ b/common/include/d2h_events.h @@ -57,6 +57,8 @@ typedef enum { HEALTH_MONITOR_CPU_ECC_FATAL_EVENT_ID, CONTEXT_SWITCH_BREAKPOINT_REACHED, HEALTH_MONITOR_CLOCK_CHANGED_EVENT_ID, + HW_INFER_MANAGER_INFER_DONE, + D2H_EVENT_ID_COUNT /* Must be last*/ } D2H_EVENT_ID_t; @@ -138,6 +140,12 @@ typedef struct { #define D2H_EVENT_HEALTH_MONITOR_CLOCK_CHANGED_EVENT_PARAMETER_COUNT (2) +typedef struct { + uint32_t infer_cycles; +} D2H_EVENT_hw_infer_mamager_infer_done_message_t; + +#define D2H_EVENT_HW_INFER_MANAGER_INFER_DONE_PARAMETER_COUNT (1) + /* D2H_EVENT__message_parameters_t should be in the same order as hailo_notification_message_parameters_t */ typedef union { D2H_EVENT_rx_error_event_message_t rx_error_event; @@ -149,6 +157,7 @@ typedef union { D2H_EVENT_health_monitor_cpu_ecc_event_message_t health_monitor_cpu_ecc_event; D2H_EVENT_context_switch_breakpoint_reached_event_massage_t context_switch_breakpoint_reached_event; D2H_EVENT_health_monitor_clock_changed_event_message_t health_monitor_clock_changed_event; + D2H_EVENT_hw_infer_mamager_infer_done_message_t hw_infer_manager_infer_done_event; } D2H_EVENT__message_parameters_t; typedef struct { diff --git a/common/include/firmware_status.h b/common/include/firmware_status.h index 193bfef..f45d9c1 100644 --- a/common/include/firmware_status.h +++ b/common/include/firmware_status.h @@ -411,6 +411,7 @@ Updating rules: FIRMWARE_STATUS__X(CONTROL_PROTOCOL_STATUS_INVALID_SLEEP_STATE)\ FIRMWARE_STATUS__X(CONTROL_PROTOCOL_STATUS_INVALID_HW_INFER_STATE_LENGTH)\ FIRMWARE_STATUS__X(CONTROL_PROTOCOL_STATUS_INVALID_CHANNELS_INFO_LENGTH)\ + FIRMWARE_STATUS__X(CONTROL_PROTOCOL_STATUS_INVALID_BATCH_COUNT_LENGTH)\ \ FIRMWARE_MODULE__X(FIRMWARE_MODULE__POWER_MEASUREMENT)\ FIRMWARE_STATUS__X(HAILO_POWER_MEASUREMENT_STATUS_POWER_INIT_ERROR)\ @@ -554,6 +555,7 @@ Updating rules: FIRMWARE_STATUS__X(PCIE_SERVICE_STATUS_INVALID_H2D_CHANNEL_INDEX)\ FIRMWARE_STATUS__X(PCIE_SERVICE_STATUS_INVALID_D2H_CHANNEL_INDEX)\ FIRMWARE_STATUS__X(PCIE_SERVICE_INVALID_INITIAL_CREDIT_SIZE)\ + FIRMWARE_STATUS__X(PCIE_SERVICE_ERROR_ADDING_CREDITS_TO_PCIE_CHANNEL)\ \ FIRMWARE_MODULE__X(FIRMWARE_MODULE__FIRMWARE_UPDATE)\ FIRMWARE_STATUS__X(FIRMWARE_UPDATE_STATUS_INVALID_PARAMETERS)\ @@ -753,6 +755,9 @@ Updating rules: FIRMWARE_STATUS__X(CONTEXT_SWITCH_STATUS_INVALID_DYNAMIC_CONTEXT_COUNT)\ FIRMWARE_STATUS__X(CONTEXT_SWITCH_STATUS_CONTEXT_INDEX_OUT_OF_RANGE)\ FIRMWARE_STATUS__X(CONTEXT_SWITCH_STATUS_TOTAL_PROVIDED_EDGE_LAYERS_LARGER_THEN_EXPECTED)\ + FIRMWARE_STATUS__X(CONTEXT_SWITCH_STATUS_REACHED_TIMEOUT_WHILE_WAITING_FOR_NETWORK_IDLE)\ + FIRMWARE_STATUS__X(CONTEXT_SWITCH_STATUS_WRITE_DATA_BY_TYPE_ACTION_INVALID_TYPE)\ + FIRMWARE_STATUS__X(CONTEXT_SWITCH_STATUS_WRITE_DATA_BY_TYPE_ACTION_INVALID_MEMORY_SPACE)\ \ FIRMWARE_MODULE__X(FIRMWARE_MODULE__D2H_EVENT_MANAGER)\ FIRMWARE_STATUS__X(HAILO_D2H_EVENT_MANAGER_STATUS_MESSAGE_HIGH_PRIORITY_QUEUE_CREATE_FAILED)\ @@ -1010,6 +1015,7 @@ Updating rules: FIRMWARE_STATUS__X(VDMA_SERVICE_STATUS_INVALID_CONSTANTS)\ FIRMWARE_STATUS__X(VDMA_SERVICE_STATUS_INVALID_CHANNEL_INDEX)\ FIRMWARE_STATUS__X(VDMA_SERVICE_STATUS_INVALID_EDGE_LAYER_DIRECTION)\ + FIRMWARE_STATUS__X(VDMA_SERVICE_INSUFFICIENT_DESCRIPTORS_COUNT)\ \ FIRMWARE_MODULE__X(FIRMWARE_MODULE__MEMORY_LOGGER)\ FIRMWARE_STATUS__X(MEMORY_LOGGER_STATUS_DEBUG_INSUFFICIENT_MEMORY)\ @@ -1079,6 +1085,9 @@ Updating rules: FIRMWARE_STATUS__X(NMS_MANAGER_STATUS_INVALID_NETWORK_INDEX)\ FIRMWARE_STATUS__X(NMS_MANAGER_STATUS_INVALID_NMS_UNIT_INDEX)\ FIRMWARE_STATUS__X(NMS_MANAGER_STATUS_INVALID_BATCH_SIZE)\ + FIRMWARE_STATUS__X(NMS_MANAGER_STATUS_INVALID_NUM_CLASSES_SIZE)\ + FIRMWARE_STATUS__X(NMS_MANAGER_STATUS_INVALID_BURST_SIZE)\ + FIRMWARE_STATUS__X(NMS_MANAGER_STATUS_INVALID_LAST_FRAME_IN_BATCH_SIZE)\ \ FIRMWARE_MODULE__X(FIRMWARE_MODULE__CLUSTER_MANAGER)\ FIRMWARE_STATUS__X(CLUSTER_MANAGER_STATUS_INVALID_CLUSTER_INDEX)\ @@ -1087,6 +1096,7 @@ Updating rules: FIRMWARE_STATUS__X(CLUSTER_MANAGER_STATUS_INVALID_LCU_INDEX)\ FIRMWARE_STATUS__X(CLUSTER_MANAGER_STATUS_INVALID_KERNEL_DONE_ADDRESS)\ FIRMWARE_STATUS__X(CLUSTER_MANAGER_STATUS_RECEIVED_UNEXPECTED_INTERRUPT)\ + FIRMWARE_STATUS__X(CLUSTER_MANAGER_STATUS_INVALID_NETWORK_INDEX)\ \ FIRMWARE_MODULE__X(FIRMWARE_MODULE__HW_INFER_MANAGER)\ FIRMWARE_STATUS__X(HW_INFER_MANAGER_STATUS_NETWORK_GROUP_NOT_CONFIGURED_BEFORE_INFER_START)\ diff --git a/common/include/utils.h b/common/include/utils.h index 7e48489..860d1fa 100644 --- a/common/include/utils.h +++ b/common/include/utils.h @@ -10,6 +10,8 @@ #ifndef __UTILS_H__ #define __UTILS_H__ +#include + /** A compile time assertion check. * * Validate at compile time that the predicate is true without @@ -125,4 +127,20 @@ _PP_ISEMPTY( \ #define MICROSECONDS_IN_MILLISECOND (1000) +static inline uint8_t ceil_log2(uint32_t n) +{ + uint8_t result = 0; + + if (n <= 1) { + return 0; + } + + while (n > 1) { + result++; + n = (n + 1) >> 1; + } + + return result; +} + #endif /* __UTILS_H__ */ diff --git a/hailort/CMakeLists.txt b/hailort/CMakeLists.txt index 7d90f9d..5f790bf 100644 --- a/hailort/CMakeLists.txt +++ b/hailort/CMakeLists.txt @@ -9,6 +9,18 @@ option(HAILO_BUILD_EXAMPLES "Build examples" OFF) option(HAILO_OFFLINE_COMPILATION "Don't download external dependencies" OFF) option(HAILO_BUILD_SERVICE "Build hailort service" OFF) option(HAILO_BUILD_PROFILER "Build hailort profiler" ON) +option(HAILO_COMPILE_WARNING_AS_ERROR "Add compilation flag for treating compilation warnings as errors" OFF) +option(HAILO_SUPPORT_PACKAGING "Create HailoRT package (internal)" OFF) + +if (HAILO_COMPILE_WARNING_AS_ERROR) + if(WIN32) + set(HAILORT_COMPILE_OPTIONS ${HAILORT_COMPILE_OPTIONS} /WX) + elseif(UNIX) + set(HAILORT_COMPILE_OPTIONS ${HAILORT_COMPILE_OPTIONS} -Werror) + else() + message(FATAL_ERROR "Unexpeced host, stopping build") + endif() +endif() # Flag for emulator (FPGA/Veloce) if(HAILO_BUILD_EMULATOR) @@ -18,7 +30,7 @@ endif() # Set firmware version add_definitions( -DFIRMWARE_VERSION_MAJOR=4 ) -add_definitions( -DFIRMWARE_VERSION_MINOR=13 ) +add_definitions( -DFIRMWARE_VERSION_MINOR=14 ) add_definitions( -DFIRMWARE_VERSION_REVISION=0 ) if(HAILO_BUILD_SERVICE) add_definitions( -DHAILO_SUPPORT_MULTI_PROCESS ) @@ -78,20 +90,6 @@ set(COMMON_INC_DIR ${PROJECT_SOURCE_DIR}/common/include) set(DRIVER_INC_DIR ${PROJECT_SOURCE_DIR}/hailort/drivers/common) set(RPC_DIR ${PROJECT_SOURCE_DIR}/hailort/rpc) -if(HAILO_BUILD_PYBIND) - if(NOT PYTHON_EXECUTABLE AND PYBIND11_PYTHON_VERSION) - # PYBIND11_PYTHON_VERSION is prioritized (not virtual environment) if PYTHON_EXECUTABLE is not set. - # See https://pybind11.readthedocs.io/en/stable/changelog.html#v2-6-0-oct-21-2020 - if((${CMAKE_VERSION} VERSION_LESS "3.22.0") AND (NOT WIN32)) - find_package(PythonInterp ${PYBIND11_PYTHON_VERSION} REQUIRED) - set(PYTHON_EXECUTABLE ${Python_EXECUTABLE}) - else() - find_package(Python3 ${PYBIND11_PYTHON_VERSION} REQUIRED EXACT COMPONENTS Interpreter Development) - set(PYTHON_EXECUTABLE ${Python3_EXECUTABLE}) - endif() - endif() - add_subdirectory(external/pybind11 EXCLUDE_FROM_ALL) -endif() add_subdirectory(external/Catch2 EXCLUDE_FROM_ALL) add_subdirectory(external/CLI11 EXCLUDE_FROM_ALL) add_subdirectory(external/json EXCLUDE_FROM_ALL) @@ -128,6 +126,9 @@ endif() if(HAILO_WIN_DRIVER) add_subdirectory(drivers/win) +endif() + +if(HAILO_SUPPORT_PACKAGING) add_subdirectory(packaging) endif() diff --git a/hailort/common/CMakeLists.txt b/hailort/common/CMakeLists.txt index 1056647..b3bed6b 100644 --- a/hailort/common/CMakeLists.txt +++ b/hailort/common/CMakeLists.txt @@ -19,6 +19,7 @@ set(SRC_FILES ${CMAKE_CURRENT_SOURCE_DIR}/barrier.cpp ${CMAKE_CURRENT_SOURCE_DIR}/file_utils.cpp ${CMAKE_CURRENT_SOURCE_DIR}/string_utils.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/event_internal.cpp ${CMAKE_CURRENT_SOURCE_DIR}/device_measurements.cpp ) diff --git a/hailort/common/barrier.hpp b/hailort/common/barrier.hpp index 3062754..1fae129 100644 --- a/hailort/common/barrier.hpp +++ b/hailort/common/barrier.hpp @@ -16,6 +16,9 @@ namespace hailort { +class Barrier; +using BarrierPtr = std::shared_ptr; + /** * A barrier is a synchronization object that allows an expected number of threads to block until all of them * arrive at the barrier. diff --git a/hailort/common/device_measurements.cpp b/hailort/common/device_measurements.cpp index 89fd0d3..b498def 100644 --- a/hailort/common/device_measurements.cpp +++ b/hailort/common/device_measurements.cpp @@ -56,16 +56,22 @@ Expected> TemperatureMeasurement::create return ptr; } - TemperatureMeasurement::TemperatureMeasurement(Device &device, hailo_status &status) : BaseMeasurement(device, status) -{} +{ + /* Executing the check only if BaseMeasurement constructor has succeeded */ + if (HAILO_SUCCESS == status) { + status = sanity_check(); + } +} -hailo_status TemperatureMeasurement::start_measurement() +hailo_status TemperatureMeasurement::sanity_check() { - // Checking sensor before starting thread - auto temp_info = m_device.get_chip_temperature(); - CHECK_EXPECTED_AS_STATUS(temp_info); + auto temp_measurement = m_device.get_chip_temperature(); + return temp_measurement.status(); +} +hailo_status TemperatureMeasurement::start_measurement() +{ m_is_thread_running = true; m_thread = std::thread([this] () { while (m_is_thread_running.load()) { @@ -102,14 +108,21 @@ Expected> PowerMeasurement::create_shared(Devi PowerMeasurement::PowerMeasurement(Device &device, hailo_power_measurement_types_t measurement_type, hailo_status &status) : BaseMeasurement(device, status), m_measurement_type(measurement_type) -{} +{ + /* Executing the check only if BaseMeasurement constructor has succeeded */ + if (HAILO_SUCCESS == status) { + status = sanity_check(); + } +} -hailo_status PowerMeasurement::start_measurement() +hailo_status PowerMeasurement::sanity_check() { - // Checking sensor before starting thread - auto power_info = m_device.power_measurement(HAILO_DVM_OPTIONS_AUTO, m_measurement_type); - CHECK_EXPECTED_AS_STATUS(power_info); + auto power_measurement = m_device.power_measurement(HAILO_DVM_OPTIONS_AUTO, m_measurement_type); + return power_measurement.status(); +} +hailo_status PowerMeasurement::start_measurement() +{ m_is_thread_running = true; m_thread = std::thread([this] () { while (m_is_thread_running.load()) { diff --git a/hailort/common/device_measurements.hpp b/hailort/common/device_measurements.hpp index 6089be8..a3c266c 100644 --- a/hailort/common/device_measurements.hpp +++ b/hailort/common/device_measurements.hpp @@ -38,6 +38,9 @@ protected: std::atomic_bool m_is_thread_running; std::mutex m_mutex; hailort::AccumulatorPtr m_acc; + +private: + virtual hailo_status sanity_check() = 0; }; @@ -56,6 +59,9 @@ public: } TemperatureMeasurement(hailort::Device &device, hailo_status &status); + +private: + virtual hailo_status sanity_check() override; }; @@ -89,6 +95,7 @@ public: private: hailo_power_measurement_types_t m_measurement_type; + virtual hailo_status sanity_check() override; }; #endif /* _HAILO_DEVICE_MEASUREMENTS_HPP_ */ diff --git a/hailort/common/ethernet_utils.hpp b/hailort/common/ethernet_utils.hpp index 108b8a7..eadfaed 100644 --- a/hailort/common/ethernet_utils.hpp +++ b/hailort/common/ethernet_utils.hpp @@ -86,13 +86,12 @@ public: static const uint32_t MAX_INTERFACE_SIZE = IFNAMSIZ; #endif - static hailo_status get_interface_from_board_ip(const char *board_ip, char *interface_name, size_t interface_name_length); - static hailo_status get_ip_from_interface(const char *interface_name, char *ip, size_t ip_length); + static Expected get_interface_from_board_ip(const std::string &board_ip); + static Expected get_ip_from_interface(const std::string &interface_name); private: #if defined(__GNUG__) - static hailo_status get_interface_from_arp_entry(char *arp_entry, char *interface_name, - size_t max_interface_name_length); + static Expected get_interface_from_arp_entry(char *arp_entry); #endif }; diff --git a/hailort/common/event_internal.cpp b/hailort/common/event_internal.cpp new file mode 100644 index 0000000..e699379 --- /dev/null +++ b/hailort/common/event_internal.cpp @@ -0,0 +1,73 @@ +/** + * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved. + * Distributed under the MIT license (https://opensource.org/licenses/MIT) +**/ +/** + * @file event_internal.cpp + * @brief Internal implementation for events, shared between all os. + **/ + +#include "common/event_internal.hpp" +#include "common/logger_macros.hpp" +#include "common/utils.hpp" + +namespace hailort +{ + +Waitable::Waitable(underlying_waitable_handle_t handle) : + m_handle(handle) +{} + +hailo_status Waitable::wait(std::chrono::milliseconds timeout) +{ + auto status = wait_for_single_object(m_handle, timeout); + if (HAILO_TIMEOUT == status) { + LOGGER__TRACE("wait_for_single_object failed with timeout (timeout={}ms)", timeout.count()); + return status; + } + CHECK_SUCCESS(status); + + status = post_wait(); + CHECK_SUCCESS(status); + + return HAILO_SUCCESS; +} + +underlying_waitable_handle_t Waitable::get_underlying_handle() +{ + return m_handle; +} + +WaitOrShutdown::WaitOrShutdown(WaitablePtr waitable, EventPtr shutdown_event) : + m_waitable(waitable), + m_shutdown_event(shutdown_event), + m_waitable_group(create_waitable_group(m_waitable, m_shutdown_event)) +{} + +hailo_status WaitOrShutdown::wait(std::chrono::milliseconds timeout) +{ + auto index = m_waitable_group.wait_any(timeout); + if (index.status() == HAILO_TIMEOUT) { + return index.status(); + } + CHECK_EXPECTED_AS_STATUS(index); + + assert(index.value() <= WAITABLE_INDEX); + return (index.value() == SHUTDOWN_INDEX) ? HAILO_SHUTDOWN_EVENT_SIGNALED : HAILO_SUCCESS; +} + +hailo_status WaitOrShutdown::signal() +{ + return m_waitable->signal(); +} + +WaitableGroup WaitOrShutdown::create_waitable_group(WaitablePtr waitable, EventPtr shutdown_event) +{ + // Note the order - consistent with SHUTDOWN_INDEX, WAITABLE_INDEX. + std::vector> waitables; + waitables.emplace_back(std::ref(*shutdown_event)); + waitables.emplace_back(std::ref(*waitable)); + return waitables; +} + +} /* namespace hailort */ diff --git a/hailort/common/event_internal.hpp b/hailort/common/event_internal.hpp new file mode 100644 index 0000000..295d4a8 --- /dev/null +++ b/hailort/common/event_internal.hpp @@ -0,0 +1,115 @@ +/** + * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved. + * Distributed under the MIT license (https://opensource.org/licenses/MIT) + **/ +/** + * @file event.hpp + * @brief Event and Semaphore wrapper objects used for multithreading + **/ + +#ifndef _EVENT_INTERNAL_HPP_ +#define _EVENT_INTERNAL_HPP_ + +#include "hailo/event.hpp" + +#include +#include +#include +#include +#if defined(__GNUC__) +#include +#endif + +namespace hailort +{ + +// Group of Waitable objects that can be waited for together +class WaitableGroup final +{ +public: + WaitableGroup(std::vector> &&waitables) : + m_waitables(std::move(waitables)), + m_waitable_handles(create_waitable_handle_vector(m_waitables)) + {} + + /** + * Waits until any of the given waitables are signaled. Returns the index in the waitables vector + * of the signaled waitable with the smallest index value. + */ + Expected wait_any(std::chrono::milliseconds timeout); + +private: + +#if defined(__linux__) + using WaitableHandle = pollfd; +#else + using WaitableHandle = underlying_waitable_handle_t; +#endif + + static std::vector create_waitable_handle_vector( + const std::vector> &waitables) + { + std::vector waitable_handles; + waitable_handles.reserve(waitables.size()); + for (auto &waitable : waitables) { +#if defined(__linux__) + waitable_handles.emplace_back(pollfd{waitable.get().get_underlying_handle(), POLLIN, 0}); +#else + waitable_handles.emplace_back(waitable.get().get_underlying_handle()); +#endif + } + return waitable_handles; + } + + // Initialization dependency + std::vector> m_waitables; + // Store this vector here to avoid runtime allocations. + std::vector m_waitable_handles; +}; + +class WaitOrShutdown final +{ +public: + WaitOrShutdown(WaitablePtr waitable, EventPtr shutdown_event); + ~WaitOrShutdown() = default; + + WaitOrShutdown(const WaitOrShutdown &other) = delete; + WaitOrShutdown &operator=(const WaitOrShutdown &other) = delete; + WaitOrShutdown(WaitOrShutdown &&other) noexcept = default; + WaitOrShutdown &operator=(WaitOrShutdown &&other) = delete; + + // Waits on waitable or shutdown_event to be signaled: + // * If shutdown_event is signaled: + // - shutdown_event is not reset + // - HAILO_SHUTDOWN_EVENT_SIGNALED is returned + // * If waitable is signaled: + // - waitable is reset if waitable->is_auto_reset() + // - HAILO_SUCCESS is returned + // * If both waitable and shutdown_event are signaled: + // - shutdown_event is not reset + // - waitable is not reset + // - HAILO_SHUTDOWN_EVENT_SIGNALED is returned + // * If neither are signaled, then HAILO_TIMEOUT is returned + // * On any failure an appropriate status shall be returned + hailo_status wait(std::chrono::milliseconds timeout); + hailo_status signal(); + +private: + static WaitableGroup create_waitable_group(WaitablePtr waitable, EventPtr shutdown_event); + + // Note: We want to guarantee that if the shutdown event is signaled, HAILO_SHUTDOWN_EVENT_SIGNALED will be + // returned. + // Waitable::wait_any returns the smallest index value of all the signaled objects. + // Hence, SHUTDOWN_INDEX must come before WAITABLE_INDEX! + static const size_t SHUTDOWN_INDEX = 0; + static const size_t WAITABLE_INDEX = 1; + + const WaitablePtr m_waitable; + const EventPtr m_shutdown_event; + + WaitableGroup m_waitable_group; +}; + +} /* namespace hailort */ + +#endif /* _EVENT_INTERNAL_HPP_ */ diff --git a/hailort/common/filesystem.hpp b/hailort/common/filesystem.hpp index 74d6c77..b650b46 100644 --- a/hailort/common/filesystem.hpp +++ b/hailort/common/filesystem.hpp @@ -35,6 +35,7 @@ public: static Expected get_file_modified_time(const std::string &file_path); static Expected is_directory(const std::string &path); static hailo_status create_directory(const std::string &dir_path); + static hailo_status remove_directory(const std::string &dir_path); static Expected get_current_dir(); static std::string get_home_directory(); static bool is_path_accesible(const std::string &path); diff --git a/hailort/common/latency_meter.hpp b/hailort/common/latency_meter.hpp index c4a141f..5178e1f 100644 --- a/hailort/common/latency_meter.hpp +++ b/hailort/common/latency_meter.hpp @@ -29,7 +29,7 @@ public: using duration = std::chrono::nanoseconds; using TimestampsArray = CircularArray; - explicit LatencyMeter(const std::set &output_names, size_t timestamps_list_length) : + LatencyMeter(const std::set &output_names, size_t timestamps_list_length) : m_start_timestamps(timestamps_list_length), m_latency_count(0), m_latency_sum(0) diff --git a/hailort/common/os/posix/ethernet_utils.cpp b/hailort/common/os/posix/ethernet_utils.cpp index 0908c4d..f675ec5 100644 --- a/hailort/common/os/posix/ethernet_utils.cpp +++ b/hailort/common/os/posix/ethernet_utils.cpp @@ -10,6 +10,9 @@ #include "common/utils.hpp" #include "common/logger_macros.hpp" #include "common/ethernet_utils.hpp" +#include "common/socket.hpp" + +#include namespace hailort { @@ -20,8 +23,7 @@ namespace hailort #define ETHERNET_UTILS__ARP_DEVICE_NAME_INDEX (4) -hailo_status EthernetUtils::get_interface_from_arp_entry(char *arp_entry, char *interface_name, - size_t max_interface_name_length) +Expected EthernetUtils::get_interface_from_arp_entry(char *arp_entry) { /* This function parses the interface name out from the arp entry * Each entry is built as follows: @@ -30,132 +32,62 @@ hailo_status EthernetUtils::get_interface_from_arp_entry(char *arp_entry, char * * For example: * 10.0.0.163 0x1 0x2 80:00:de:ad:be:3f * enp1s0 * */ - hailo_status status = HAILO_UNINITIALIZED; size_t token_counter = 0; char* token = NULL; /* Start splitting the arp entry into tokens according to the delimiter */ token = strtok(arp_entry, ETHERNET_UTILS__ARP_ENTRY_DELIMIETERS); - if (NULL == token) { - LOGGER__ERROR("Invalid arp entry, could not split it to tokens"); - status = HAILO_ETH_FAILURE; - goto l_exit; - } + CHECK_AS_EXPECTED(nullptr != token, HAILO_ETH_FAILURE, "Invalid arp entry, could not split it to tokens"); /* Iterate over the tokens until the device name is found */ while (NULL != token) { token = strtok(NULL, ETHERNET_UTILS__ARP_ENTRY_DELIMIETERS); if (ETHERNET_UTILS__ARP_DEVICE_NAME_INDEX == token_counter) { LOGGER__DEBUG("Interface name: {}", token); - strncpy(interface_name, token, max_interface_name_length); - break; + return std::string(token); } token_counter++; } - status = HAILO_SUCCESS; -l_exit: - return status; + return make_unexpected(HAILO_ETH_FAILURE); } -hailo_status EthernetUtils::get_interface_from_board_ip(const char *board_ip, char *interface_name, size_t interface_name_length) +Expected EthernetUtils::get_interface_from_board_ip(const std::string &board_ip) { - hailo_status status = HAILO_UNINITIALIZED; - FILE* arp_file = NULL; - int fclose_rc = -1; - char buffer[ETHERNET_UTILS__ARP_MAX_ENTRY_LENGTH] = {}; - - CHECK_ARG_NOT_NULL(interface_name); - CHECK_ARG_NOT_NULL(board_ip); + std::ifstream arp_file(ETHERNET_UTILS__ARP_FILE, std::ios::in); + CHECK_AS_EXPECTED(arp_file, HAILO_OPEN_FILE_FAILURE, "Cannot open file {}. errno: {:#x}", ETHERNET_UTILS__ARP_FILE, errno); - /* Open arp file */ - arp_file = fopen(ETHERNET_UTILS__ARP_FILE, "r"); - if (NULL == arp_file) { - LOGGER__ERROR("Cannot open file {}. Errno: {:#x}", ETHERNET_UTILS__ARP_FILE, errno); - status = HAILO_OPEN_FILE_FAILURE; - goto l_exit; - } + char buffer[ETHERNET_UTILS__ARP_MAX_ENTRY_LENGTH] = {}; /* Go over all of the lines at the file */ - while(fgets(buffer, ARRAY_LENGTH(buffer), arp_file)) { - /* Check if the arp line contains the board_ip */ - if (strstr(buffer, board_ip)) { - status = get_interface_from_arp_entry(buffer, interface_name, interface_name_length); - if (HAILO_SUCCESS != status) { - goto l_exit; - } - break; + while (arp_file.getline(buffer, sizeof(buffer))) { + if (strstr(buffer, board_ip.c_str())) { + return get_interface_from_arp_entry(buffer); } } - status = HAILO_SUCCESS; -l_exit: - if (NULL != arp_file) { - fclose_rc = fclose(arp_file); - if (0 != fclose_rc) { - LOGGER__ERROR("Cannot close arp file {} ", ETHERNET_UTILS__ARP_FILE); - if (HAILO_SUCCESS == status) { - status = HAILO_CLOSE_FAILURE; - } else { - LOGGER__ERROR("Did not override status. Left status value at: {} (not assigned {}", - status, - HAILO_CLOSE_FAILURE); - } - } - } - - return status; + LOGGER__ERROR("Failed to find interface name for ip {}", board_ip); + return make_unexpected(HAILO_ETH_FAILURE); } -hailo_status EthernetUtils::get_ip_from_interface(const char *interface_name, char *ip, size_t ip_length) +Expected EthernetUtils::get_ip_from_interface(const std::string &interface_name) { - hailo_status status = HAILO_UNINITIALIZED; struct ifreq ifr = {}; - int fd = 0; - int posix_rc = 0; - - CHECK_ARG_NOT_NULL(interface_name); - CHECK_ARG_NOT_NULL(ip); /* Create socket */ - fd = socket(AF_INET, SOCK_DGRAM, 0); - if (fd < 0) { - LOGGER__ERROR("Failed to create socket. Errno: {:#x}", errno); - status = HAILO_ETH_FAILURE; - goto l_exit; - } + auto socket = Socket::create(AF_INET, SOCK_DGRAM, 0); + CHECK_EXPECTED(socket); /* Convert interface name to ip address */ ifr.ifr_addr.sa_family = AF_INET; - (void)strncpy(ifr.ifr_name, interface_name, IFNAMSIZ-1); - posix_rc = ioctl(fd, SIOCGIFADDR, &ifr); - if (0 > posix_rc) { - LOGGER__ERROR("Interface was not found. ioctl with SIOCGIFADDR has failed. Errno: {:#x}", errno); - status = HAILO_ETH_INTERFACE_NOT_FOUND; - goto l_exit; - } - (void)strncpy(ip, inet_ntoa(((struct sockaddr_in *)&ifr.ifr_addr)->sin_addr), ip_length); - LOGGER__DEBUG("Interface {} | IP: {}", interface_name, ip); - - status = HAILO_SUCCESS; -l_exit: - /* Close the socket if it was created */ - if (0 < fd) { - posix_rc = close(fd); - if (0 != posix_rc) { - LOGGER__ERROR("Failed closing socket. Errno: {:#x}", errno); - /* Update status if only in case there was not previous error */ - if (HAILO_SUCCESS == status) { - status = HAILO_CLOSE_FAILURE; - } else { - LOGGER__ERROR("Did not override status. Left status value at: {} (not assigned {}", - status, - HAILO_CLOSE_FAILURE); - } - } - } - - return status; + (void)strncpy(ifr.ifr_name, interface_name.c_str(), IFNAMSIZ-1); + auto posix_rc = ioctl(socket->get_fd(), SIOCGIFADDR, &ifr); + CHECK_AS_EXPECTED(posix_rc >= 0, HAILO_ETH_INTERFACE_NOT_FOUND, + "Interface was not found. ioctl with SIOCGIFADDR has failed. errno: {:#x}", errno); + + std::string res = inet_ntoa(((struct sockaddr_in *)&ifr.ifr_addr)->sin_addr); + LOGGER__DEBUG("Interface {} | IP: {}", interface_name, res); + return res; } } /* namespace hailort */ diff --git a/hailort/common/os/posix/filesystem.cpp b/hailort/common/os/posix/filesystem.cpp index 1a41347..57a12ac 100644 --- a/hailort/common/os/posix/filesystem.cpp +++ b/hailort/common/os/posix/filesystem.cpp @@ -165,6 +165,13 @@ hailo_status Filesystem::create_directory(const std::string &dir_path) return HAILO_SUCCESS; } +hailo_status Filesystem::remove_directory(const std::string &dir_path) +{ + auto ret_val = rmdir(dir_path.c_str()); + CHECK(0 == ret_val, HAILO_FILE_OPERATION_FAILURE, "Failed to remove directory {}", dir_path); + return HAILO_SUCCESS; +} + Expected Filesystem::get_current_dir() { char cwd[PATH_MAX]; diff --git a/hailort/common/os/posix/os_utils.cpp b/hailort/common/os/posix/os_utils.cpp index 36f1819..c9b0e98 100644 --- a/hailort/common/os/posix/os_utils.cpp +++ b/hailort/common/os/posix/os_utils.cpp @@ -8,15 +8,20 @@ **/ #include "hailo/hailort.h" - #include "common/os_utils.hpp" - +#include "common/utils.hpp" #include "spdlog/sinks/syslog_sink.h" +#include +#include +#include + namespace hailort { +#define EXISTENCE_CHECK_SIGNAL (0) + HailoRTOSLogger::HailoRTOSLogger() { m_hailort_os_logger = spdlog::syslog_logger_mt("syslog", "hailort_service", LOG_PID); @@ -29,6 +34,46 @@ uint32_t OsUtils::get_curr_pid() return getpid(); } +bool OsUtils::is_pid_alive(uint32_t pid) +{ + return (0 == kill(pid, EXISTENCE_CHECK_SIGNAL)); +} + +void OsUtils::set_current_thread_name(const std::string &name) +{ + (void)name; +#ifndef NDEBUG + // pthread_setname_np name size is limited to 16 chars (including null terminator) + assert(name.size() < 16); + pthread_setname_np(pthread_self(), name.c_str()); +#endif /* NDEBUG */ +} + +hailo_status OsUtils::set_current_thread_affinity(uint8_t cpu_index) +{ +#if defined(__linux__) + cpu_set_t cpuset; + CPU_ZERO(&cpuset); + CPU_SET(cpu_index, &cpuset); + + static const pid_t CURRENT_THREAD = 0; + int rc = sched_setaffinity(CURRENT_THREAD, sizeof(cpu_set_t), &cpuset); + CHECK(rc == 0, HAILO_INTERNAL_FAILURE, "sched_setaffinity failed with status {}", rc); + + return HAILO_SUCCESS; +#elif defined(__QNX__) + (void)cpu_index; + // TODO: impl on qnx (HRT-10889) + return HAILO_NOT_IMPLEMENTED; +#endif +} + +size_t OsUtils::get_page_size() +{ + static const auto page_size = sysconf(_SC_PAGESIZE); + return page_size; +} + CursorAdjustment::CursorAdjustment(){} CursorAdjustment::~CursorAdjustment(){} diff --git a/hailort/common/os/posix/traffic_control.cpp b/hailort/common/os/posix/traffic_control.cpp index fccbbc9..71aa3f4 100644 --- a/hailort/common/os/posix/traffic_control.cpp +++ b/hailort/common/os/posix/traffic_control.cpp @@ -22,7 +22,7 @@ namespace hailort Expected TrafficControlUtil::create(const std::string &ip, uint16_t port, uint32_t rate_bytes_per_sec) { - auto interface_name = get_interface_name(ip); + auto interface_name = EthernetUtils::get_interface_from_board_ip(ip); CHECK_EXPECTED(interface_name, "get_interface_name failed with status {}", interface_name.status()); auto board_id = ip_to_board_id(ip); @@ -158,17 +158,6 @@ hailo_status TrafficControlUtil::tc_class_del_dev_for_board(const std::string &i return run_command(cmd.str(), m_is_sudo_needed, {}, true); } -Expected TrafficControlUtil::get_interface_name(const std::string &ip) -{ - auto interface_name = Buffer::create(EthernetUtils::MAX_INTERFACE_SIZE, 0); - CHECK_EXPECTED(interface_name); - - CHECK_SUCCESS_AS_EXPECTED(EthernetUtils::get_interface_from_board_ip(ip.c_str(), - interface_name->as_pointer(), interface_name->size())); - - return interface_name->to_string(); -} - Expected TrafficControlUtil::ip_to_board_id(const std::string &ip) { // Takes last digit from 3 octet + the whole 4th octet diff --git a/hailort/common/os/posix/traffic_control.hpp b/hailort/common/os/posix/traffic_control.hpp index cbca2e8..82cbfff 100644 --- a/hailort/common/os/posix/traffic_control.hpp +++ b/hailort/common/os/posix/traffic_control.hpp @@ -23,7 +23,6 @@ class TrafficControlUtil final { public: static Expected create(const std::string &ip, uint16_t port, uint32_t rate_bytes_per_sec); - static Expected get_interface_name(const std::string &ip); ~TrafficControlUtil() = default; TrafficControlUtil(TrafficControlUtil&) = delete; TrafficControlUtil &operator=(const TrafficControlUtil &) = delete; diff --git a/hailort/common/os/windows/ethernet_utils.cpp b/hailort/common/os/windows/ethernet_utils.cpp index 8fec8d4..4dcfd3f 100644 --- a/hailort/common/os/windows/ethernet_utils.cpp +++ b/hailort/common/os/windows/ethernet_utils.cpp @@ -160,48 +160,40 @@ Expected ArpTable::create(uint32_t interface_index) return result; } -hailo_status EthernetUtils::get_interface_from_board_ip(const char *board_ip, char *interface_name, size_t interface_name_length) +Expected EthernetUtils::get_interface_from_board_ip(const std::string &board_ip) { - CHECK_ARG_NOT_NULL(interface_name); - CHECK_ARG_NOT_NULL(board_ip); - auto network_interfaces = NetworkInterface::get_all_interfaces(); - CHECK_EXPECTED_AS_STATUS(network_interfaces); + CHECK_EXPECTED(network_interfaces); struct in_addr board_ip_struct{}; - auto status = Socket::pton(AF_INET, board_ip, &board_ip_struct); - CHECK_SUCCESS(status, "Invalid board ip address {}", board_ip); + auto status = Socket::pton(AF_INET, board_ip.c_str(), &board_ip_struct); + CHECK_SUCCESS_AS_EXPECTED(status, "Invalid board ip address {}", board_ip); for (const auto& network_interface : network_interfaces.value()) { auto arp_table = ArpTable::create(network_interface.index()); - CHECK_EXPECTED_AS_STATUS(arp_table); + CHECK_EXPECTED(arp_table); const auto mac_address = arp_table->get_mac_address(static_cast(board_ip_struct.S_un.S_addr)); if (mac_address) { - (void)strncpy(interface_name, network_interface.friendly_name().c_str(), interface_name_length); - return HAILO_SUCCESS; + return network_interface.friendly_name(); } } - return HAILO_ETH_INTERFACE_NOT_FOUND; + return make_unexpected(HAILO_ETH_INTERFACE_NOT_FOUND); } -hailo_status EthernetUtils::get_ip_from_interface(const char *interface_name, char *ip, size_t ip_length) +Expected EthernetUtils::get_ip_from_interface(const std::string &interface_name) { - CHECK_ARG_NOT_NULL(interface_name); - CHECK_ARG_NOT_NULL(ip); - auto network_interfaces = NetworkInterface::get_all_interfaces(); - CHECK_EXPECTED_AS_STATUS(network_interfaces); + CHECK_EXPECTED(network_interfaces); for (const auto& network_interface : network_interfaces.value()) { if (network_interface.friendly_name() == interface_name) { - (void)strncpy(ip, network_interface.ip().c_str(), ip_length); - return HAILO_SUCCESS; + return network_interface.ip(); } } - return HAILO_ETH_INTERFACE_NOT_FOUND; + return make_unexpected(HAILO_ETH_INTERFACE_NOT_FOUND); } } /* namespace hailort */ diff --git a/hailort/common/os/windows/filesystem.cpp b/hailort/common/os/windows/filesystem.cpp index d9adeae..6dbcc25 100644 --- a/hailort/common/os/windows/filesystem.cpp +++ b/hailort/common/os/windows/filesystem.cpp @@ -164,6 +164,13 @@ hailo_status Filesystem::create_directory(const std::string &dir_path) return HAILO_SUCCESS; } +hailo_status Filesystem::remove_directory(const std::string &dir_path) +{ + bool was_removed = RemoveDirectoryA(dir_path.c_str()); + CHECK(was_removed, HAILO_FILE_OPERATION_FAILURE, "Failed to remove directory {}", dir_path); + return HAILO_SUCCESS; +} + bool Filesystem::is_path_accesible(const std::string &path) { // The code is based on examples from: https://cpp.hotexamples.com/examples/-/-/AccessCheck/cpp-accesscheck-function-examples.html diff --git a/hailort/common/os/windows/os_utils.cpp b/hailort/common/os/windows/os_utils.cpp index 3146d31..3d4022f 100644 --- a/hailort/common/os/windows/os_utils.cpp +++ b/hailort/common/os/windows/os_utils.cpp @@ -8,6 +8,7 @@ **/ #include "common/os_utils.hpp" +#include "common/utils.hpp" #include "hailo/hailort.h" #include @@ -29,6 +30,54 @@ uint32_t OsUtils::get_curr_pid() return static_cast(GetCurrentProcessId()); } +bool OsUtils::is_pid_alive(uint32_t pid) +{ + HANDLE hProcess = OpenProcess(PROCESS_QUERY_INFORMATION, FALSE, pid); + if (hProcess == NULL) { + // Process is not running + return false; + } + + DWORD exitCode; + BOOL result = GetExitCodeProcess(hProcess, &exitCode); + + CloseHandle(hProcess); + + if (result && exitCode == STILL_ACTIVE) { + return true; + } + else { + return false; + } +} + +void OsUtils::set_current_thread_name(const std::string &name) +{ + (void)name; +} + +hailo_status OsUtils::set_current_thread_affinity(uint8_t cpu_index) +{ + const DWORD_PTR affinity_mask = static_cast(1ULL << cpu_index); + CHECK(0 != SetThreadAffinityMask(GetCurrentThread(), affinity_mask), HAILO_INTERNAL_FAILURE, + "SetThreadAffinityMask failed. LE={}", GetLastError()); + + return HAILO_SUCCESS; +} + +static size_t get_page_size_impl() +{ + SYSTEM_INFO system_info{}; + GetSystemInfo(&system_info); + return system_info.dwPageSize; +} + +size_t OsUtils::get_page_size() +{ + static const auto page_size = get_page_size_impl(); + return page_size; +} + CursorAdjustment::CursorAdjustment() { // Enables Vitual Terminal Processing - enables ANSI Escape Sequences on Windows diff --git a/hailort/common/os_utils.hpp b/hailort/common/os_utils.hpp index 30d0c2e..025ef1d 100644 --- a/hailort/common/os_utils.hpp +++ b/hailort/common/os_utils.hpp @@ -57,22 +57,12 @@ class OsUtils final { public: OsUtils() = delete; - static uint32_t get_curr_pid(); - - static void set_current_thread_name(const std::string &name) - { - (void)name; -#ifndef NDEBUG -#ifndef _WIN32 - // pthread_setname_np name size is limited to 16 chars (including null terminator) - assert(name.size() < 16); - pthread_setname_np(pthread_self(), name.c_str()); -#else -// TODO: implement for windows -#endif /* _WIN32 */ -#endif /* NDEBUG */ - } + static uint32_t get_curr_pid(); + static bool is_pid_alive(uint32_t pid); + static void set_current_thread_name(const std::string &name); + static hailo_status set_current_thread_affinity(uint8_t cpu_index); + static size_t get_page_size(); }; } /* namespace hailort */ diff --git a/hailort/common/socket.hpp b/hailort/common/socket.hpp index 8df9daf..afe0afd 100644 --- a/hailort/common/socket.hpp +++ b/hailort/common/socket.hpp @@ -42,6 +42,8 @@ public: m_module_wrapper(std::move(other.m_module_wrapper)), m_socket_fd(std::exchange(other.m_socket_fd, INVALID_SOCKET)) {}; + socket_t get_fd() { return m_socket_fd; } + static hailo_status ntop(int af, const void *src, char *dst, socklen_t size); static hailo_status pton(int af, const char *src, void *dst); diff --git a/hailort/common/utils.hpp b/hailort/common/utils.hpp index 3dd3bc6..a285651 100644 --- a/hailort/common/utils.hpp +++ b/hailort/common/utils.hpp @@ -18,6 +18,7 @@ #include #include #include +#include namespace hailort @@ -25,7 +26,7 @@ namespace hailort #define IS_FIT_IN_UINT8(number) ((std::numeric_limits::max() >= ((int32_t)(number))) && (std::numeric_limits::min() <= ((int32_t)(number)))) #define IS_FIT_IN_UINT16(number) ((std::numeric_limits::max() >= ((int32_t)(number))) && (std::numeric_limits::min() <= ((int32_t)(number)))) - +#define IS_FIT_IN_UINT32(number) ((std::numeric_limits::max() >= ((int64_t)(number))) && (std::numeric_limits::min() <= ((int64_t)(number)))) template static inline bool contains(const std::vector &container, const T &value) @@ -51,6 +52,12 @@ static inline bool contains(const std::set &container, T value) return (container.find(value) != container.end()); } +template +static inline bool contains(const std::unordered_set &container, T value) +{ + return (container.find(value) != container.end()); +} + // From https://stackoverflow.com/questions/57092289/do-stdmake-shared-and-stdmake-unique-have-a-nothrow-version template static inline std::unique_ptr make_unique_nothrow(Args&&... args) @@ -202,6 +209,14 @@ _ISEMPTY( \ } while(0) #define CHECK_SUCCESS_AS_EXPECTED(status, ...) _CHECK_SUCCESS_AS_EXPECTED(status, ISEMPTY(__VA_ARGS__), "" __VA_ARGS__) +// Define macro CHECK_IN_DEBUG - that checks cond in debug with CHECK macro but in release does nothing and will get optimized out +#ifdef NDEBUG +// In release have this macro do nothing - empty macro +#define CHECK_IN_DEBUG(cond, ret_val, ...) +#else // NDEBUG +#define CHECK_IN_DEBUG(cond, ret_val, ...) CHECK(cond, ret_val, __VA_ARGS__) +#endif // NDEBUG + #ifdef HAILO_SUPPORT_MULTI_PROCESS #define _CHECK_SUCCESS_AS_RPC_STATUS(status, reply, is_default, fmt, ...) \ do { \ @@ -314,6 +329,12 @@ static uint32_t get_min_value_of_unordered_map(const std::unordered_map &m return min_count; } +static inline bool is_env_variable_on(const char* env_var_name) +{ + auto env_var = std::getenv(env_var_name); + return ((nullptr != env_var) && (strnlen(env_var, 2) == 1) && (strncmp(env_var, "1", 1) == 0)); +} + } /* namespace hailort */ #endif /* HAILO_UTILS_H_ */ \ No newline at end of file diff --git a/hailort/drivers/common/hailo_ioctl_common.h b/hailort/drivers/common/hailo_ioctl_common.h index f11eae7..a70d4e0 100644 --- a/hailort/drivers/common/hailo_ioctl_common.h +++ b/hailort/drivers/common/hailo_ioctl_common.h @@ -27,6 +27,19 @@ #define INVALID_VDMA_CHANNEL (0xff) +#if !defined(__cplusplus) && defined(NTDDI_VERSION) +#include +typedef ULONG uint32_t; +typedef UCHAR uint8_t; +typedef USHORT uint16_t; +typedef ULONGLONG uint64_t; +typedef uint64_t u64; +typedef uint32_t u32; +typedef uint16_t u16; +typedef uint8_t u8; +#endif /* !defined(__cplusplus) && defined(NTDDI_VERSION) */ + + #ifdef _MSC_VER #if !defined(bool) && !defined(__cplusplus) typedef uint8_t bool; @@ -64,6 +77,8 @@ typedef uint8_t bool; #include #include #include +#include + // defines for devctl #define _IOW_ __DIOF #define _IOR_ __DIOT @@ -132,9 +147,9 @@ struct hailo_vdma_buffer_unmap_params { /* structure used in ioctl HAILO_DESC_LIST_CREATE */ struct hailo_desc_list_create_params { size_t desc_count; // in + bool is_circular; // in uintptr_t desc_handle; // out - // Note: The dma address is required for CONTEXT_SWITCH firmware controls - uint64_t dma_address; // out + uint64_t dma_address; // out }; /* structure used in ioctl HAILO_NON_LINUX_DESC_LIST_MMAP */ @@ -277,7 +292,7 @@ struct hailo_vdma_channel_write_register_params { /* structure used in ioctl HAILO_VDMA_BUFFER_SYNC */ enum hailo_vdma_buffer_sync_type { - HAILO_SYNC_FOR_HOST, + HAILO_SYNC_FOR_CPU, HAILO_SYNC_FOR_DEVICE, /** Max enum value to maintain ABI Integrity */ diff --git a/hailort/hailort_service/CMakeLists.txt b/hailort/hailort_service/CMakeLists.txt index 2b9e3a5..3755c4e 100644 --- a/hailort/hailort_service/CMakeLists.txt +++ b/hailort/hailort_service/CMakeLists.txt @@ -13,6 +13,7 @@ add_executable(hailort_service service_resource_manager.hpp ${HAILORT_SERVICE_OS_DIR}/hailort_service.cpp ${HAILORT_COMMON_CPP_SOURCES} + ${HAILO_FULL_OS_DIR}/event.cpp # TODO HRT-10681: move event.cpp to common ) target_compile_options(hailort_service PRIVATE ${HAILORT_COMPILE_OPTIONS}) set_property(TARGET hailort_service PROPERTY CXX_STANDARD 14) diff --git a/hailort/hailort_service/hailort_rpc_service.cpp b/hailort/hailort_service/hailort_rpc_service.cpp index 09a0472..d76ed61 100644 --- a/hailort/hailort_service/hailort_rpc_service.cpp +++ b/hailort/hailort_service/hailort_rpc_service.cpp @@ -32,32 +32,146 @@ HailoRtRpcService::HailoRtRpcService() }); } -void HailoRtRpcService::keep_alive() +hailo_status HailoRtRpcService::abort_input_vstream(uint32_t handle) { - while (true) { - std::this_thread::sleep_for(hailort::HAILO_KEEPALIVE_INTERVAL / 2); - auto now = std::chrono::high_resolution_clock::now(); + if (is_input_vstream_aborted(handle)) { + return HAILO_SUCCESS; + } + + auto lambda = [](std::shared_ptr input_vstream) { + return input_vstream->abort(); + }; + auto &manager = ServiceResourceManager::get_instance(); + auto status = manager.execute(handle, lambda); + if (HAILO_SUCCESS != status) { + LOGGER__ERROR("Failed to abort input vstream with status {}", status); + } + return status; +} + +hailo_status HailoRtRpcService::abort_output_vstream(uint32_t handle) +{ + if (is_output_vstream_aborted(handle)) { + return HAILO_SUCCESS; + } + + auto lambda = [](std::shared_ptr output_vstream) { + return output_vstream->abort(); + }; + auto &manager = ServiceResourceManager::get_instance(); + auto status = manager.execute(handle, lambda); + if (HAILO_SUCCESS != status) { + LOGGER__ERROR("Failed to abort output vstream with status {}", status); + } + return status; +} + +bool HailoRtRpcService::is_input_vstream_aborted(uint32_t handle) +{ + auto lambda = [](std::shared_ptr input_vstream) { + return input_vstream->is_aborted(); + }; + auto &manager = ServiceResourceManager::get_instance(); + return manager.execute(handle, lambda); +} + +bool HailoRtRpcService::is_output_vstream_aborted(uint32_t handle) +{ + auto lambda = [](std::shared_ptr output_vstream) { + return output_vstream->is_aborted(); + }; + auto &manager = ServiceResourceManager::get_instance(); + return manager.execute(handle, lambda); +} + +hailo_status HailoRtRpcService::resume_input_vstream(uint32_t handle) +{ + if (!is_input_vstream_aborted(handle)) { + return HAILO_SUCCESS; + } + + auto lambda = [](std::shared_ptr input_vstream) { + return input_vstream->resume(); + }; + auto &manager = ServiceResourceManager::get_instance(); + auto status = manager.execute(handle, lambda); + if (HAILO_SUCCESS != status) { + LOGGER__ERROR("Failed to resume input vstream with status {}", status); + } + return status; +} + +hailo_status HailoRtRpcService::resume_output_vstream(uint32_t handle) +{ + if (!is_output_vstream_aborted(handle)) { + return HAILO_SUCCESS; + } + + auto lambda = [](std::shared_ptr output_vstream) { + return output_vstream->resume(); + }; + auto &manager = ServiceResourceManager::get_instance(); + auto status = manager.execute(handle, lambda); + if (HAILO_SUCCESS != status) { + LOGGER__ERROR("Failed to resume output vstream with status {}", status); + } + return status; +} + +// TODO: Add a named templated release functions for InputVStream and OutputVStream to call abort before release. +void HailoRtRpcService::abort_vstreams_by_pids(std::set &pids) +{ + auto inputs_handles = ServiceResourceManager::get_instance().resources_handles_by_pids(pids); + auto outputs_handles = ServiceResourceManager::get_instance().resources_handles_by_pids(pids); + for (auto &input_handle : inputs_handles) { + abort_input_vstream(input_handle); + } + for (auto &output_handle : outputs_handles) { + abort_output_vstream(output_handle); + } +} + + +void HailoRtRpcService::remove_disconnected_clients() +{ + std::this_thread::sleep_for(hailort::HAILO_KEEPALIVE_INTERVAL / 2); + auto now = std::chrono::high_resolution_clock::now(); + std::set pids_to_remove; + { std::unique_lock lock(m_mutex); - std::set pids_to_remove; for (auto pid_to_last_alive : m_clients_pids) { auto duration = std::chrono::duration_cast(now - pid_to_last_alive.second); if (duration > hailort::HAILO_KEEPALIVE_INTERVAL) { - auto client_id = pid_to_last_alive.first; - pids_to_remove.insert(client_id); - LOGGER__INFO("Client disconnected, pid: {}", client_id); - HAILORT_OS_LOG_INFO("Client disconnected, pid: {}", client_id); - ServiceResourceManager::get_instance().release_by_pid(client_id); - ServiceResourceManager::get_instance().release_by_pid(client_id); - ServiceResourceManager::get_instance().release_by_pid(client_id); - ServiceResourceManager::get_instance().release_by_pid(client_id); + auto client_pid = pid_to_last_alive.first; + pids_to_remove.insert(client_pid); } } - for (auto &pid : pids_to_remove) { - m_clients_pids.erase(pid); + + // We abort vstreams before releasing them to avoid cases where the vstream is stuck in execute of a + // blocking operation (which will be finished with timeout). + // To release the vstream the ServiceResourceManager is waiting for the resource_mutex which is also locked in execute. + abort_vstreams_by_pids(pids_to_remove); + for (auto &client_pid : pids_to_remove) { + ServiceResourceManager::get_instance().release_by_pid(client_pid); + ServiceResourceManager::get_instance().release_by_pid(client_pid); + ServiceResourceManager::get_instance().release_by_pid(client_pid); + ServiceResourceManager::get_instance().release_by_pid(client_pid); + + LOGGER__INFO("Client disconnected, pid: {}", client_pid); + HAILORT_OS_LOG_INFO("Client disconnected, pid: {}", client_pid); + m_clients_pids.erase(client_pid); } } } + +void HailoRtRpcService::keep_alive() +{ + while (true) { + remove_disconnected_clients(); + } +} + grpc::Status HailoRtRpcService::client_keep_alive(grpc::ServerContext*, const keepalive_Request *request, empty*) { @@ -93,6 +207,8 @@ grpc::Status HailoRtRpcService::VDevice_dup_handle(grpc::ServerContext*, const d grpc::Status HailoRtRpcService::VDevice_create(grpc::ServerContext *, const VDevice_create_Request *request, VDevice_create_Reply *reply) { + remove_disconnected_clients(); + // Deserialization const auto params_proto = request->hailo_vdevice_params(); std::vector device_ids; @@ -125,8 +241,8 @@ grpc::Status HailoRtRpcService::VDevice_release(grpc::ServerContext*, const Rele Release_Reply *reply) { auto &manager = ServiceResourceManager::get_instance(); - auto status = manager.release_resource(request->handle()); - reply->set_status(static_cast(status)); + manager.release_resource(request->handle(), request->pid()); + reply->set_status(static_cast(HAILO_SUCCESS)); return grpc::Status::OK; } @@ -236,8 +352,8 @@ grpc::Status HailoRtRpcService::ConfiguredNetworkGroup_release(grpc::ServerConte Release_Reply *reply) { auto &manager = ServiceResourceManager::get_instance(); - auto status = manager.release_resource(request->handle()); - reply->set_status(static_cast(status)); + manager.release_resource(request->handle(), request->pid()); + reply->set_status(static_cast(HAILO_SUCCESS)); return grpc::Status::OK; } @@ -468,11 +584,12 @@ grpc::Status HailoRtRpcService::ConfiguredNetworkGroup_set_scheduler_timeout(grp const ConfiguredNetworkGroup_set_scheduler_timeout_Request *request, ConfiguredNetworkGroup_set_scheduler_timeout_Reply *reply) { - auto lambda = [](std::shared_ptr cng, std::chrono::milliseconds timeout_ms) { - return cng->set_scheduler_timeout(timeout_ms); + auto lambda = [](std::shared_ptr cng, std::chrono::milliseconds timeout_ms, std::string network_name) { + return cng->set_scheduler_timeout(timeout_ms, network_name); }; auto &net_group_manager = ServiceResourceManager::get_instance(); - auto status = net_group_manager.execute(request->handle(), lambda, static_cast(request->timeout_ms())); + auto status = net_group_manager.execute(request->handle(), lambda, static_cast(request->timeout_ms()), + request->network_name()); reply->set_status(status); return grpc::Status::OK; } @@ -561,21 +678,24 @@ grpc::Status HailoRtRpcService::InputVStreams_create(grpc::ServerContext *, cons }; inputs_params.emplace(param_proto.name(), std::move(params)); } + auto network_group_handle = request->net_group(); + auto client_pid = request->pid(); auto lambda = [](std::shared_ptr cng, const std::map &inputs_params) { return cng->create_input_vstreams(inputs_params); }; auto &net_group_manager = ServiceResourceManager::get_instance(); - auto vstreams_expected = net_group_manager.execute>>(request->net_group(), lambda, inputs_params); + auto vstreams_expected = net_group_manager.execute>>(network_group_handle, lambda, inputs_params); CHECK_EXPECTED_AS_RPC_STATUS(vstreams_expected, reply); auto vstreams = vstreams_expected.release(); - auto &manager = ServiceResourceManager::get_instance(); - auto client_pid = request->pid(); + auto &manager = ServiceResourceManager::get_instance(); for (size_t i = 0; i < vstreams.size(); i++) { auto handle = manager.register_resource(client_pid, make_shared_nothrow(std::move(vstreams[i]))); reply->add_handles(handle); } + net_group_manager.dup_handle(client_pid, network_group_handle); + reply->set_status(static_cast(HAILO_SUCCESS)); return grpc::Status::OK; } @@ -584,8 +704,8 @@ grpc::Status HailoRtRpcService::InputVStream_release(grpc::ServerContext *, cons Release_Reply *reply) { auto &manager = ServiceResourceManager::get_instance(); - auto status = manager.release_resource(request->handle()); - reply->set_status(static_cast(status)); + manager.release_resource(request->handle(), request->pid()); + reply->set_status(static_cast(HAILO_SUCCESS)); return grpc::Status::OK; } @@ -610,20 +730,24 @@ grpc::Status HailoRtRpcService::OutputVStreams_create(grpc::ServerContext *, con output_params.emplace(param_proto.name(), std::move(params)); } + auto network_group_handle = request->net_group(); + auto client_pid = request->pid(); + auto lambda = [](std::shared_ptr cng, const std::map &output_params) { return cng->create_output_vstreams(output_params); }; auto &net_group_manager = ServiceResourceManager::get_instance(); - auto vstreams_expected = net_group_manager.execute>>(request->net_group(), lambda, output_params); + auto vstreams_expected = net_group_manager.execute>>(network_group_handle, lambda, output_params); CHECK_EXPECTED_AS_RPC_STATUS(vstreams_expected, reply); auto vstreams = vstreams_expected.release(); - auto &manager = ServiceResourceManager::get_instance(); - auto client_pid = request->pid(); + auto &manager = ServiceResourceManager::get_instance(); for (size_t i = 0; i < vstreams.size(); i++) { auto handle = manager.register_resource(client_pid, make_shared_nothrow(std::move(vstreams[i]))); reply->add_handles(handle); } + net_group_manager.dup_handle(client_pid, network_group_handle); + reply->set_status(static_cast(HAILO_SUCCESS)); return grpc::Status::OK; } @@ -631,8 +755,17 @@ grpc::Status HailoRtRpcService::OutputVStreams_create(grpc::ServerContext *, con grpc::Status HailoRtRpcService::OutputVStream_release(grpc::ServerContext *, const Release_Request *request, Release_Reply *reply) { + auto was_aborted = is_output_vstream_aborted(request->handle()); + abort_output_vstream(request->handle()); auto &manager = ServiceResourceManager::get_instance(); - auto status = manager.release_resource(request->handle()); + auto resource = manager.release_resource(request->handle(), request->pid()); + auto status = HAILO_SUCCESS; + if (resource && (!was_aborted)) { + status = resource->resume(); + if (HAILO_SUCCESS != status) { + LOGGER__INFO("Failed to resume output vstream {} after destruction", resource->name()); + } + } reply->set_status(static_cast(status)); return grpc::Status::OK; } @@ -752,6 +885,8 @@ grpc::Status HailoRtRpcService::ConfiguredNetworkGroup_get_all_stream_infos(grpc auto proto_nms_info_defuse_info = proto_nms_info->mutable_defuse_info(); proto_nms_info_defuse_info->set_class_group_index(stream_info.nms_info.defuse_info.class_group_index); proto_nms_info_defuse_info->set_original_name(std::string(stream_info.nms_info.defuse_info.original_name)); + proto_nms_info->set_burst_size(stream_info.nms_info.burst_size); + proto_nms_info->set_burst_type(static_cast(proto_stream_info.nms_info().burst_type())); } else { auto proto_stream_shape = proto_stream_info.mutable_stream_shape(); auto proto_stream_shape_shape = proto_stream_shape->mutable_shape(); @@ -793,9 +928,13 @@ grpc::Status HailoRtRpcService::ConfiguredNetworkGroup_get_latency_measurement(g }; auto &manager = ServiceResourceManager::get_instance(); auto expected_latency_result = manager.execute>(request->handle(), lambda, request->network_name()); - CHECK_EXPECTED_AS_RPC_STATUS(expected_latency_result, reply); - reply->set_avg_hw_latency(static_cast(expected_latency_result.value().avg_hw_latency.count())); - reply->set_status(static_cast(HAILO_SUCCESS)); + if (HAILO_NOT_AVAILABLE == expected_latency_result.status()) { + reply->set_status(static_cast(HAILO_NOT_AVAILABLE)); + } else { + CHECK_EXPECTED_AS_RPC_STATUS(expected_latency_result, reply); + reply->set_avg_hw_latency(static_cast(expected_latency_result.value().avg_hw_latency.count())); + reply->set_status(static_cast(HAILO_SUCCESS)); + } return grpc::Status::OK; } @@ -813,6 +952,60 @@ grpc::Status HailoRtRpcService::ConfiguredNetworkGroup_is_multi_context(grpc::Se return grpc::Status::OK; } +grpc::Status HailoRtRpcService::ConfiguredNetworkGroup_get_sorted_output_names(grpc::ServerContext*, + const ConfiguredNetworkGroup_get_sorted_output_names_Request *request, + ConfiguredNetworkGroup_get_sorted_output_names_Reply *reply) +{ + auto lambda = [](std::shared_ptr cng) { + return cng->get_sorted_output_names(); + }; + auto &manager = ServiceResourceManager::get_instance(); + auto sorted_output_names_expected = manager.execute>>(request->handle(), lambda); + CHECK_EXPECTED_AS_RPC_STATUS(sorted_output_names_expected, reply); + auto sorted_output_names_proto = reply->mutable_sorted_output_names(); + for (auto &name : sorted_output_names_expected.value()) { + sorted_output_names_proto->Add(std::move(name)); + } + reply->set_status(static_cast(HAILO_SUCCESS)); + return grpc::Status::OK; +} + +grpc::Status HailoRtRpcService::ConfiguredNetworkGroup_get_stream_names_from_vstream_name(grpc::ServerContext*, + const ConfiguredNetworkGroup_get_stream_names_from_vstream_name_Request *request, + ConfiguredNetworkGroup_get_stream_names_from_vstream_name_Reply *reply) +{ + auto lambda = [](std::shared_ptr cng, const std::string &vstream_name) { + return cng->get_stream_names_from_vstream_name(vstream_name); + }; + auto &manager = ServiceResourceManager::get_instance(); + auto streams_names_expected = manager.execute>>(request->handle(), lambda, request->vstream_name()); + CHECK_EXPECTED_AS_RPC_STATUS(streams_names_expected, reply); + auto streams_names_proto = reply->mutable_streams_names(); + for (auto &name : streams_names_expected.value()) { + streams_names_proto->Add(std::move(name)); + } + reply->set_status(static_cast(HAILO_SUCCESS)); + return grpc::Status::OK; +} + +grpc::Status HailoRtRpcService::ConfiguredNetworkGroup_get_vstream_names_from_stream_name(grpc::ServerContext*, + const ConfiguredNetworkGroup_get_vstream_names_from_stream_name_Request *request, + ConfiguredNetworkGroup_get_vstream_names_from_stream_name_Reply *reply) +{ + auto lambda = [](std::shared_ptr cng, const std::string &stream_name) { + return cng->get_vstream_names_from_stream_name(stream_name); + }; + auto &manager = ServiceResourceManager::get_instance(); + auto vstreams_names_expected = manager.execute>>(request->handle(), lambda, request->stream_name()); + CHECK_EXPECTED_AS_RPC_STATUS(vstreams_names_expected, reply); + auto vstreams_names_proto = reply->mutable_vstreams_names(); + for (auto &name : vstreams_names_expected.value()) { + vstreams_names_proto->Add(std::move(name)); + } + reply->set_status(static_cast(HAILO_SUCCESS)); + return grpc::Status::OK; +} + grpc::Status HailoRtRpcService::InputVStream_get_frame_size(grpc::ServerContext*, const VStream_get_frame_size_Request *request, VStream_get_frame_size_Reply *reply) { @@ -906,11 +1099,7 @@ grpc::Status HailoRtRpcService::OutputVStream_network_name(grpc::ServerContext*, grpc::Status HailoRtRpcService::InputVStream_abort(grpc::ServerContext*, const VStream_abort_Request *request, VStream_abort_Reply *reply) { - auto lambda = [](std::shared_ptr input_vstream) { - return input_vstream->abort(); - }; - auto &manager = ServiceResourceManager::get_instance(); - auto status = manager.execute(request->handle(), lambda); + auto status = abort_input_vstream(request->handle()); reply->set_status(status); return grpc::Status::OK; } @@ -918,11 +1107,7 @@ grpc::Status HailoRtRpcService::InputVStream_abort(grpc::ServerContext*, const V grpc::Status HailoRtRpcService::OutputVStream_abort(grpc::ServerContext*, const VStream_abort_Request *request, VStream_abort_Reply *reply) { - auto lambda = [](std::shared_ptr output_vstream) { - return output_vstream->abort(); - }; - auto &manager = ServiceResourceManager::get_instance(); - auto status = manager.execute(request->handle(), lambda); + auto status = abort_output_vstream(request->handle()); reply->set_status(status); return grpc::Status::OK; } @@ -951,6 +1136,54 @@ grpc::Status HailoRtRpcService::OutputVStream_resume(grpc::ServerContext*, const return grpc::Status::OK; } +grpc::Status HailoRtRpcService::InputVStream_stop_and_clear(grpc::ServerContext*, const VStream_stop_and_clear_Request *request, + VStream_stop_and_clear_Reply *reply) +{ + auto lambda = [](std::shared_ptr input_vstream) { + return input_vstream->stop_and_clear(); + }; + auto &manager = ServiceResourceManager::get_instance(); + auto status = manager.execute(request->handle(), lambda); + reply->set_status(status); + return grpc::Status::OK; +} + +grpc::Status HailoRtRpcService::OutputVStream_stop_and_clear(grpc::ServerContext*, const VStream_stop_and_clear_Request *request, + VStream_stop_and_clear_Reply *reply) +{ + auto lambda = [](std::shared_ptr output_vstream) { + return output_vstream->stop_and_clear(); + }; + auto &manager = ServiceResourceManager::get_instance(); + auto status = manager.execute(request->handle(), lambda); + reply->set_status(status); + return grpc::Status::OK; +} + +grpc::Status HailoRtRpcService::InputVStream_start_vstream(grpc::ServerContext*, const VStream_start_vstream_Request *request, + VStream_start_vstream_Reply *reply) +{ + auto lambda = [](std::shared_ptr input_vstream) { + return input_vstream->start_vstream(); + }; + auto &manager = ServiceResourceManager::get_instance(); + auto status = manager.execute(request->handle(), lambda); + reply->set_status(status); + return grpc::Status::OK; +} + +grpc::Status HailoRtRpcService::OutputVStream_start_vstream(grpc::ServerContext*, const VStream_start_vstream_Request *request, + VStream_start_vstream_Reply *reply) +{ + auto lambda = [](std::shared_ptr output_vstream) { + return output_vstream->start_vstream(); + }; + auto &manager = ServiceResourceManager::get_instance(); + auto status = manager.execute(request->handle(), lambda); + reply->set_status(status); + return grpc::Status::OK; +} + grpc::Status HailoRtRpcService::InputVStream_get_user_buffer_format(grpc::ServerContext*, const VStream_get_user_buffer_format_Request *request, VStream_get_user_buffer_format_Reply *reply) { @@ -1015,5 +1248,31 @@ grpc::Status HailoRtRpcService::OutputVStream_get_info(grpc::ServerContext*, con return grpc::Status::OK; } +grpc::Status HailoRtRpcService::InputVStream_is_aborted(grpc::ServerContext*, const VStream_is_aborted_Request *request, + VStream_is_aborted_Reply *reply) +{ + auto lambda = [](std::shared_ptr input_vstream) { + return input_vstream->is_aborted(); + }; + auto &manager = ServiceResourceManager::get_instance(); + auto is_aborted = manager.execute(request->handle(), lambda); + reply->set_is_aborted(is_aborted); + reply->set_status(static_cast(HAILO_SUCCESS)); + return grpc::Status::OK; +} + +grpc::Status HailoRtRpcService::OutputVStream_is_aborted(grpc::ServerContext*, const VStream_is_aborted_Request *request, + VStream_is_aborted_Reply *reply) +{ + auto lambda = [](std::shared_ptr input_vstream) { + return input_vstream->is_aborted(); + }; + auto &manager = ServiceResourceManager::get_instance(); + auto is_aborted = manager.execute(request->handle(), lambda); + reply->set_is_aborted(is_aborted); + reply->set_status(static_cast(HAILO_SUCCESS)); + return grpc::Status::OK; +} + } diff --git a/hailort/hailort_service/hailort_rpc_service.hpp b/hailort/hailort_service/hailort_rpc_service.hpp index a77d701..fe1e9c5 100644 --- a/hailort/hailort_service/hailort_rpc_service.hpp +++ b/hailort/hailort_service/hailort_rpc_service.hpp @@ -26,6 +26,7 @@ #endif #include +#include "hailo/hailort.h" namespace hailort { @@ -98,6 +99,18 @@ public: dup_handle_Reply*) override; virtual grpc::Status OutputVStream_dup_handle(grpc::ServerContext *ctx, const dup_handle_Request *request, dup_handle_Reply*) override; + virtual grpc::Status InputVStream_stop_and_clear(grpc::ServerContext *ctx, const VStream_stop_and_clear_Request *request, + VStream_stop_and_clear_Reply*) override; + virtual grpc::Status OutputVStream_stop_and_clear(grpc::ServerContext *ctx, const VStream_stop_and_clear_Request *request, + VStream_stop_and_clear_Reply*) override; + virtual grpc::Status InputVStream_start_vstream(grpc::ServerContext *ctx, const VStream_start_vstream_Request *request, + VStream_start_vstream_Reply*) override; + virtual grpc::Status OutputVStream_start_vstream(grpc::ServerContext *ctx, const VStream_start_vstream_Request *request, + VStream_start_vstream_Reply*) override; + virtual grpc::Status InputVStream_is_aborted(grpc::ServerContext *ctx, const VStream_is_aborted_Request *request, + VStream_is_aborted_Reply*) override; + virtual grpc::Status OutputVStream_is_aborted(grpc::ServerContext *ctx, const VStream_is_aborted_Request *request, + VStream_is_aborted_Reply*) override; virtual grpc::Status ConfiguredNetworkGroup_dup_handle(grpc::ServerContext *ctx, const dup_handle_Request *request, dup_handle_Reply*) override; @@ -157,9 +170,26 @@ public: virtual grpc::Status ConfiguredNetworkGroup_get_config_params(grpc::ServerContext*, const ConfiguredNetworkGroup_get_config_params_Request *request, ConfiguredNetworkGroup_get_config_params_Reply *reply) override; + virtual grpc::Status ConfiguredNetworkGroup_get_sorted_output_names(grpc::ServerContext*, + const ConfiguredNetworkGroup_get_sorted_output_names_Request *request, + ConfiguredNetworkGroup_get_sorted_output_names_Reply *reply) override; + virtual grpc::Status ConfiguredNetworkGroup_get_stream_names_from_vstream_name(grpc::ServerContext*, + const ConfiguredNetworkGroup_get_stream_names_from_vstream_name_Request *request, + ConfiguredNetworkGroup_get_stream_names_from_vstream_name_Reply *reply) override; + virtual grpc::Status ConfiguredNetworkGroup_get_vstream_names_from_stream_name(grpc::ServerContext*, + const ConfiguredNetworkGroup_get_vstream_names_from_stream_name_Request *request, + ConfiguredNetworkGroup_get_vstream_names_from_stream_name_Reply *reply) override; private: void keep_alive(); + hailo_status abort_input_vstream(uint32_t handle); + hailo_status abort_output_vstream(uint32_t handle); + hailo_status resume_input_vstream(uint32_t handle); + hailo_status resume_output_vstream(uint32_t handle); + bool is_input_vstream_aborted(uint32_t handle); + bool is_output_vstream_aborted(uint32_t handle); + void abort_vstreams_by_pids(std::set &pids); + void remove_disconnected_clients(); std::mutex m_mutex; std::map> m_clients_pids; diff --git a/hailort/hailort_service/hailort_service b/hailort/hailort_service/hailort_service index 0259a28..67ee22b 100644 --- a/hailort/hailort_service/hailort_service +++ b/hailort/hailort_service/hailort_service @@ -7,5 +7,4 @@ [Service] HAILORT_LOGGER_PATH="/var/log/hailo" -HAILO_DISABLE_MULTIPLEXER=0 HAILO_MONITOR=0 diff --git a/hailort/hailort_service/service_resource_manager.hpp b/hailort/hailort_service/service_resource_manager.hpp index 488999c..5b5930b 100644 --- a/hailort/hailort_service/service_resource_manager.hpp +++ b/hailort/hailort_service/service_resource_manager.hpp @@ -13,9 +13,11 @@ #include "hailo/expected.hpp" #include "common/utils.hpp" +#include "common/os_utils.hpp" #include #include +#include namespace hailort { @@ -23,11 +25,13 @@ namespace hailort template struct Resource { Resource(uint32_t pid, std::shared_ptr resource) - : pid(pid), resource(std::move(resource)) - {} + : resource(std::move(resource)) + { + pids.insert(pid); + } - uint32_t pid; std::shared_ptr resource; + std::unordered_set pids; }; template @@ -69,42 +73,88 @@ public: uint32_t dup_handle(uint32_t pid, uint32_t handle) { - // Keeping this function for future possible usage - (void)pid; + std::unique_lock lock(m_mutex); + auto resource_expected = resource_lookup(handle); + assert(resource_expected); + auto resource = resource_expected.release(); + + assert(contains(m_resources_mutexes, handle)); + std::unique_lock resource_lock(m_resources_mutexes[handle]); + resource->pids.insert(pid); + return handle; } - hailo_status release_resource(uint32_t handle) + std::shared_ptr release_resource(uint32_t handle, uint32_t pid) { + std::shared_ptr res = nullptr; std::unique_lock lock(m_mutex); auto found = m_resources.find(handle); - CHECK(found != m_resources.end(), HAILO_NOT_FOUND, "Failed to release resource with handle {}, resource does not exist", handle); + if (found == m_resources.end()) { + LOGGER__INFO("Failed to release resource with handle {} and PID {}. The resource no longer exists or may have already been released", + handle, pid); + return res; + } + assert(contains(m_resources_mutexes, handle)); auto resource = m_resources[handle]; + bool release_resource = false; { std::unique_lock resource_lock(m_resources_mutexes[handle]); - m_resources.erase(handle); + resource->pids.erase(pid); + if (all_pids_dead(resource)) { + release_resource = true; + res = resource->resource; + m_resources.erase(handle); + } + } + if (release_resource) { + m_resources_mutexes.erase(handle); } - m_resources_mutexes.erase(handle); - return HAILO_SUCCESS; + return res; } - void release_by_pid(uint32_t pid) + std::vector> release_by_pid(uint32_t pid) { + std::vector> res; std::unique_lock lock(m_mutex); for (auto iter = m_resources.begin(); iter != m_resources.end(); ) { auto handle = iter->first; - if (iter->second->pid == pid) { + bool release_resource = false; + if (contains(iter->second->pids, pid)) { assert(contains(m_resources_mutexes, handle)); { std::unique_lock resource_lock(m_resources_mutexes[handle]); - iter = m_resources.erase(iter); + iter->second->pids.erase(pid); + if (iter->second->pids.empty()) { + release_resource = true; + res.push_back(iter->second->resource); + iter = m_resources.erase(iter); + } } + } + if (release_resource) { m_resources_mutexes.erase(handle); } else { ++iter; } } + + return res; + } + + std::vector resources_handles_by_pids(std::set &pids) + { + std::unique_lock lock(m_mutex); + std::vector resources_handles; + for (auto &handle_resource_pair : m_resources) { + for (auto &pid : pids) { + if (contains(handle_resource_pair.second->pids, pid)) { + resources_handles.emplace_back(handle_resource_pair.first); + } + } + } + return resources_handles; } private: @@ -120,6 +170,16 @@ private: return resource; } + bool all_pids_dead(std::shared_ptr> resource) + { + for (auto &pid : resource->pids) { + if (OsUtils::is_pid_alive(pid)) { + return false; + } + } + return true; + } + std::mutex m_mutex; std::atomic m_current_handle_index; std::unordered_map>> m_resources; diff --git a/hailort/hailort_service/windows/hailort_service_env_vars.bat b/hailort/hailort_service/windows/hailort_service_env_vars.bat index a615ab4..2b4e82a 100644 --- a/hailort/hailort_service/windows/hailort_service_env_vars.bat +++ b/hailort/hailort_service/windows/hailort_service_env_vars.bat @@ -7,5 +7,4 @@ @REM Running this script requires Administrator permissions. reg ADD HKLM\SYSTEM\CurrentControlSet\Services\hailort_service /f /v Environment /t REG_MULTI_SZ /d ^ -HAILORT_LOGGER_PATH="%PROGRAMDATA%\HailoRT_Service\logs"\0^ -HAILO_DISABLE_MULTIPLEXER=0\0 \ No newline at end of file +HAILORT_LOGGER_PATH="%PROGRAMDATA%\HailoRT_Service\logs"\0^ \ No newline at end of file diff --git a/hailort/hailortcli/CMakeLists.txt b/hailort/hailortcli/CMakeLists.txt index cd7ff1d..b5c190b 100644 --- a/hailort/hailortcli/CMakeLists.txt +++ b/hailort/hailortcli/CMakeLists.txt @@ -26,19 +26,21 @@ set(HAILORTCLI_CPP_FILES run2/run2_command.cpp run2/network_runner.cpp - run2/live_printer.cpp + run2/live_stats.cpp run2/timer_live_track.cpp run2/network_live_track.cpp run2/measurement_live_track.cpp + run2/io_wrappers.cpp ) - + if(UNIX) # Unix only modules set(HAILORTCLI_CPP_FILES ${HAILORTCLI_CPP_FILES} udp_rate_limiter_command.cpp # TODO: We dont compile download_action_list_command on windows, as it uses packed enums (HRT-5919) download_action_list_command.cpp - ) + measure_nnc_performance_command.cpp + ) endif() # 'config_definitions_json_file' is used in generate_definitions_json_str.in for configure_file() @@ -70,7 +72,7 @@ target_link_libraries(hailortcli scheduler_mon_proto) if(WIN32) - target_link_libraries(hailortcli Ws2_32 Iphlpapi Shlwapi) + target_link_libraries(hailortcli Ws2_32 Iphlpapi Shlwapi winmm.lib) elseif(CMAKE_SYSTEM_NAME STREQUAL QNX) target_link_libraries(hailortcli pevents) endif() diff --git a/hailort/hailortcli/download_action_list_command.cpp b/hailort/hailortcli/download_action_list_command.cpp index be53034..d4a4cc9 100644 --- a/hailort/hailortcli/download_action_list_command.cpp +++ b/hailort/hailortcli/download_action_list_command.cpp @@ -308,6 +308,14 @@ Expected DownloadActionListCommand::parse_action_data(uint32_t bas data_json = *reinterpret_cast(action); action_length_local = sizeof(CONTEXT_SWITCH_DEFS__enable_nms_action_t); break; + case CONTEXT_SWITCH_DEFS__ACTION_TYPE_WRITE_DATA_BY_TYPE: + data_json = *reinterpret_cast(action); + action_length_local = sizeof(CONTEXT_SWITCH_DEFS__write_data_by_type_action_t); + break; + case CONTEXT_SWITCH_DEFS__ACTION_TYPE_SWITCH_LCU_BATCH: + data_json = *reinterpret_cast(action); + action_length_local = sizeof(CONTEXT_SWITCH_DEFS__switch_lcu_batch_action_data_t); + break; case CONTEXT_SWITCH_DEFS__ACTION_TYPE_COUNT: // Fallthrough // Handling CONTEXT_SWITCH_DEFS__ACTION_TYPE_COUNT is needed because we compile this file with -Wswitch-enum @@ -622,3 +630,12 @@ void to_json(json &j, const CONTEXT_SWITCH_DEFS__open_boundary_output_channel_da { j = unpack_vdma_channel_id(data); } + +void to_json(json& j, const CONTEXT_SWITCH_DEFS__switch_lcu_batch_action_data_t& data) { + const auto cluster_index = CONTEXT_SWITCH_DEFS__PACKED_LCU_ID_CLUSTER_INDEX_READ(data.packed_lcu_id); + const auto lcu_index = CONTEXT_SWITCH_DEFS__PACKED_LCU_ID_LCU_INDEX_READ(data.packed_lcu_id); + const auto network_index = data.network_index; + const auto kernel_done_count = data.kernel_done_count; + j = json{{"cluster_index", cluster_index}, {"lcu_index", lcu_index}, {"network_index", network_index}, + {"kernel_done_count", kernel_done_count}}; +} diff --git a/hailort/hailortcli/download_action_list_command.hpp b/hailort/hailortcli/download_action_list_command.hpp index 1aa271e..4a4bb6f 100644 --- a/hailort/hailortcli/download_action_list_command.hpp +++ b/hailort/hailortcli/download_action_list_command.hpp @@ -100,6 +100,8 @@ static std::pair mapping[] = { {CONTEXT_SWITCH_DEFS__ACTION_TYPE_OPEN_BOUNDARY_INPUT_CHANNEL, "open_boundary_input_channel"}, {CONTEXT_SWITCH_DEFS__ACTION_TYPE_OPEN_BOUNDARY_OUTPUT_CHANNEL, "open_boundary_output_channel"}, {CONTEXT_SWITCH_DEFS__ACTION_TYPE_ENABLE_NMS, "enable_nms"}, + {CONTEXT_SWITCH_DEFS__ACTION_TYPE_WRITE_DATA_BY_TYPE, "write_data_by_type"}, + {CONTEXT_SWITCH_DEFS__ACTION_TYPE_SWITCH_LCU_BATCH, "switch_lcu_batch"}, }; static_assert(ARRAY_ENTRIES(mapping) == CONTEXT_SWITCH_DEFS__ACTION_TYPE_COUNT, "Missing a mapping from a CONTEXT_SWITCH_DEFS__ACTION_TYPE_t to it's string value"); @@ -112,8 +114,9 @@ NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE(CONTEXT_SWITCH_DEFS__trigger_sequencer_action NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE(CONTEXT_SWITCH_DEFS__sequencer_interrupt_data_t, sequencer_index); NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE(CONTEXT_SWITCH_DEFS__wait_nms_data_t, aggregator_index); NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE(CONTEXT_SWITCH_DEFS__module_config_done_interrupt_data_t, module_index); -NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE(CONTEXT_SWITCH_DEFS__fetch_ccw_bursts_action_data_t, config_stream_index); -NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE(CONTEXT_SWITCH_DEFS__enable_nms_action_t, nms_unit_index, network_index); +NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE(CONTEXT_SWITCH_DEFS__fetch_ccw_bursts_action_data_t, config_stream_index, ccw_bursts); +NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE(CONTEXT_SWITCH_DEFS__enable_nms_action_t, nms_unit_index, network_index, number_of_classes, burst_size); +NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE(CONTEXT_SWITCH_DEFS__write_data_by_type_action_t, address, data_type, data, shift, mask, network_index); // Non-default implementations void to_json(json &j, const CONTEXT_SWITCH_DEFS__deactivate_vdma_channel_action_data_t &data); @@ -138,5 +141,6 @@ void to_json(json &j, const CONTEXT_SWITCH_DEFS__deactivate_cfg_channel_t &data) void to_json(json &j, const CONTEXT_SWITCH_DEFS__add_ddr_pair_info_action_data_t &data); void to_json(json &j, const CONTEXT_SWITCH_DEFS__open_boundary_input_channel_data_t &data); void to_json(json &j, const CONTEXT_SWITCH_DEFS__open_boundary_output_channel_data_t &data); +void to_json(json &j, const CONTEXT_SWITCH_DEFS__switch_lcu_batch_action_data_t &data); #endif /* _HAILO_DOWNLOAD_ACTION_LIST_COMMAND_HPP_ */ diff --git a/hailort/hailortcli/hailortcli.cpp b/hailort/hailortcli/hailortcli.cpp index 474a747..ed246d7 100644 --- a/hailort/hailortcli/hailortcli.cpp +++ b/hailort/hailortcli/hailortcli.cpp @@ -26,6 +26,7 @@ #endif #include "parse_hef_command.hpp" #include "fw_control_command.hpp" +#include "measure_nnc_performance_command.hpp" #include "firmware_header_utils.h" #include "hailo/hailort.h" @@ -200,6 +201,7 @@ public: add_subcommand(); #if defined(__GNUC__) add_subcommand(); + add_subcommand(); #endif add_subcommand(); add_subcommand(); diff --git a/hailort/hailortcli/hailortcli.hpp b/hailort/hailortcli/hailortcli.hpp index 4ed2ab6..8e0ef96 100644 --- a/hailort/hailortcli/hailortcli.hpp +++ b/hailort/hailortcli/hailortcli.hpp @@ -46,6 +46,12 @@ void add_device_options(CLI::App *app, hailo_device_params &device_params, bool Expected>> create_devices(const hailo_device_params &device_params); Expected> get_device_ids(const hailo_device_params &device_params); + +enum class OptionVisibility { + VISIBLE, + HIDDEN +}; + /** * CLI11 transformer object, converting enum argument from string. * Use this object instead of CLI::CheckedTransformer in order @@ -55,13 +61,48 @@ template class HailoCheckedTransformer : public CLI::CheckedTransformer { public: - HailoCheckedTransformer(std::vector> values) : - CLI::CheckedTransformer(values) + + struct Enum { - desc_function_ = [values]() { - return CLI::detail::generate_map(CLI::detail::smart_deref(values), true); + std::string name; + EnumType value; + OptionVisibility visibility = OptionVisibility::VISIBLE; + + std::pair to_pair() const { return std::make_pair(name, value); } + }; + + HailoCheckedTransformer(std::vector values) : + CLI::CheckedTransformer(to_values_vector(values, true)) // Getting hidden value for the enum transformer. + { + // Hide hidden values for help and autocomplete. + const auto non_hidden_values = to_values_vector(values, false); + + desc_function_ = [non_hidden_values]() { + return CLI::detail::generate_map(CLI::detail::smart_deref(non_hidden_values), true); + }; + + autocomplete_func_ = [non_hidden_values](const std::string &) { + std::vector completions; + for (const auto &completion : non_hidden_values) { + completions.emplace_back(completion.first); + } + return completions; }; } + +private: + static std::vector> to_values_vector(const std::vector &values, + bool get_hidden) + { + std::vector> values_vector; + for (const auto &value : values) { + if (get_hidden || (value.visibility == OptionVisibility::VISIBLE)) { + values_vector.emplace_back(value.to_pair()); + } + } + return values_vector; + + } }; class DeprecationAction diff --git a/hailort/hailortcli/measure_nnc_performance_command.cpp b/hailort/hailortcli/measure_nnc_performance_command.cpp new file mode 100644 index 0000000..eefed1a --- /dev/null +++ b/hailort/hailortcli/measure_nnc_performance_command.cpp @@ -0,0 +1,118 @@ +/** + * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved. + * Distributed under the MIT license (https://opensource.org/licenses/MIT) + **/ +/** + * @file measure_nnc_performance_command.cpp +* @brief measure nerual network performance for given network using only the HW components without host SW + **/ + +#include "measure_nnc_performance_command.hpp" +#include "hailortcli.hpp" + +#include "hailo/hailort.h" +#include "hailo/network_group.hpp" +#include "hailo/hef.hpp" +#include "hailo/vstream.hpp" +#include "hailo/vdevice.hpp" + +#include + +#define BYTES_TO_KILOBYTES (1024) + +HwInferEstimatorCommand::HwInferEstimatorCommand(CLI::App &parent_app) : + Command(parent_app.add_subcommand("measure-nnc-performance", + "measure nerual network performance for given network using only the HW components without host SW")), + m_params({}) +{ + // This will make the command to be hidden in the --help print in the command line. + m_app->group(""); + + add_vdevice_options(m_app, m_params.vdevice_params); + m_app->add_option("hef", m_params.hef_path, "Path of the HEF to load") + ->check(CLI::ExistingFile) + ->required(); + m_app->add_option("--batch-size", m_params.batch_size, + "Inference batch.\n" + "This batch applies to the whole network_group.") + ->check(CLI::NonNegativeNumber) + ->default_val(HAILO_DEFAULT_BATCH_SIZE); +} + +Expected> get_configure_params(const hw_infer_runner_params ¶ms, + hailort::Hef &hef, hailo_stream_interface_t interface) +{ + std::map configure_params{}; + + hailo_configure_params_t config_params{}; + hailo_status status = hailo_init_configure_params(reinterpret_cast(&hef), interface, &config_params); + CHECK_SUCCESS_AS_EXPECTED(status); + + /* For default case overwrite batch to 1 */ + uint16_t batch_size = (HAILO_DEFAULT_BATCH_SIZE == params.batch_size ? 1 : params.batch_size); + + /* Fill all network and network group structs with batch size value */ + for (size_t network_group_idx = 0; network_group_idx < config_params.network_group_params_count; network_group_idx++) { + config_params.network_group_params[network_group_idx].batch_size = batch_size; + } + + for (size_t network_group_idx = 0; network_group_idx < config_params.network_group_params_count; network_group_idx++) { + config_params.network_group_params[network_group_idx].power_mode = params.power_mode; + configure_params.emplace(std::string(config_params.network_group_params[network_group_idx].name), + ConfigureNetworkParams(config_params.network_group_params[network_group_idx])); + } + + return configure_params; +} + +hailo_status HwInferEstimatorCommand::execute() +{ + auto devices = create_devices(m_params.vdevice_params.device_params); + CHECK_EXPECTED_AS_STATUS(devices, "Failed creating device"); + /* This function supports controls for multiple devices. + We validate there is only 1 device generated as we are on a single device flow */ + CHECK(1 == devices->size(), HAILO_INTERNAL_FAILURE, "Hw infer command support only one physical device"); + auto &device = devices.value()[0]; + + auto hef = Hef::create(m_params.hef_path.c_str()); + CHECK_EXPECTED_AS_STATUS(hef, "Failed reading hef file {}", m_params.hef_path); + + auto interface = device->get_default_streams_interface(); + CHECK_EXPECTED_AS_STATUS(interface, "Failed to get default streams interface"); + + auto configure_params = get_configure_params(m_params, hef.value(), interface.value()); + CHECK_EXPECTED_AS_STATUS(configure_params); + + /* Use Env var to configure all desc list with max depth */ + setenv("HAILO_CONFIGURE_FOR_HW_INFER","Y",1); + auto network_group_list = device->configure(hef.value(), configure_params.value()); + CHECK_EXPECTED_AS_STATUS(network_group_list, "Failed configure device from hef"); + unsetenv("HAILO_CONFIGURE_FOR_HW_INFER"); + + CHECK(1 == network_group_list->size(), HAILO_INVALID_OPERATION, + "HW Inference is not supported on HEFs with multiple network groups"); + + auto network_group_ptr = network_group_list.value()[0]; + + std::cout << "Starting HW infer Estimator..." << std::endl; + + auto results = network_group_ptr->run_hw_infer_estimator(); + CHECK_EXPECTED_AS_STATUS(results); + + std::cout << std::endl; + std::cout << "======================" << std::endl; + std::cout << " Summary" << std::endl; + std::cout << "======================" << std::endl; + + std::cout << "Batch count: " << results->batch_count << std::endl; + std::cout << "Total transfer size [KB]: " << (results->total_transfer_size / BYTES_TO_KILOBYTES) << std::endl; + std::cout << "Total frames passed: " << results->total_frames_passed << std::endl; + std::cout << "Total time [s]: " << results->time_sec << std::endl; + std::cout << "Total FPS [1/s]: " << results->fps << std::endl; + std::cout << "BW [Gbps]: " << results->BW_Gbps << std::endl; + + std::cout << "======================" << std::endl; + std::cout << " End of report" << std::endl; + std::cout << "======================" << std::endl; + return HAILO_SUCCESS; +} diff --git a/hailort/hailortcli/measure_nnc_performance_command.hpp b/hailort/hailortcli/measure_nnc_performance_command.hpp new file mode 100644 index 0000000..8bcbadb --- /dev/null +++ b/hailort/hailortcli/measure_nnc_performance_command.hpp @@ -0,0 +1,33 @@ +/** + * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved. + * Distributed under the MIT license (https://opensource.org/licenses/MIT) + **/ +/** + * @file measure_nnc_performance_command.hpp + * @brief measure nerual network performance for given network using only the HW components without host SW + **/ + +#ifndef _HAILO_HW_INFER_ESTIMATOR_COMMAND_HPP_ +#define _HAILO_HW_INFER_ESTIMATOR_COMMAND_HPP_ + +#include "hailortcli.hpp" +#include "command.hpp" +#include "CLI/CLI.hpp" + +struct hw_infer_runner_params { + hailo_vdevice_params vdevice_params; + std::string hef_path; + uint16_t batch_size; + hailo_power_mode_t power_mode; +}; + +class HwInferEstimatorCommand : public Command { +public: + explicit HwInferEstimatorCommand(CLI::App &parent_app); + hailo_status execute() override; + +private: + hw_infer_runner_params m_params; +}; + +#endif /*_HAILO_HW_INFER_ESTIMATOR_COMMAND_HPP_*/ \ No newline at end of file diff --git a/hailort/hailortcli/mon_command.hpp b/hailort/hailortcli/mon_command.hpp index 6fe1774..653076a 100644 --- a/hailort/hailortcli/mon_command.hpp +++ b/hailort/hailortcli/mon_command.hpp @@ -14,7 +14,7 @@ #include "hailortcli.hpp" #include "command.hpp" -#include "vdevice/scheduler/scheduler_mon.hpp" +#include "utils/profiler/monitor_handler.hpp" #include "CLI/CLI.hpp" diff --git a/hailort/hailortcli/parse_hef_command.cpp b/hailort/hailortcli/parse_hef_command.cpp index 200e7c0..51f830e 100644 --- a/hailort/hailortcli/parse_hef_command.cpp +++ b/hailort/hailortcli/parse_hef_command.cpp @@ -40,7 +40,7 @@ hailo_status ParseHefCommand::parse_hefs_info(const std::string &hef_path, bool CHECK_EXPECTED_AS_STATUS(hef_exp, "Failed to parse HEF"); auto hef = hef_exp.release(); - auto hef_info = hef.get_hef_description(stream_infos, vstream_infos); + auto hef_info = hef.get_description(stream_infos, vstream_infos); CHECK_EXPECTED_AS_STATUS(hef_info, "Failed to parse HEF"); std::cout << hef_info.release(); return HAILO_SUCCESS; diff --git a/hailort/hailortcli/run2/io_wrappers.cpp b/hailort/hailortcli/run2/io_wrappers.cpp new file mode 100644 index 0000000..3af75f0 --- /dev/null +++ b/hailort/hailortcli/run2/io_wrappers.cpp @@ -0,0 +1,26 @@ +/** + * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved. + * Distributed under the MIT license (https://opensource.org/licenses/MIT) +**/ +/** + * @file io_wrappers.cpp + **/ + +#include "io_wrappers.hpp" + +FramerateThrottle::FramerateThrottle(uint32_t framerate) : + m_framerate(framerate), + m_framerate_interval(std::chrono::duration(1) / framerate), + m_last_write_time(std::chrono::steady_clock::now()) +{} + +void FramerateThrottle::throttle() +{ + if (m_framerate == UNLIMITED_FRAMERATE) { + return; + } + + const auto elapsed_time = std::chrono::steady_clock::now() - m_last_write_time; + std::this_thread::sleep_for(m_framerate_interval - elapsed_time); + m_last_write_time = std::chrono::steady_clock::now(); +} diff --git a/hailort/hailortcli/run2/io_wrappers.hpp b/hailort/hailortcli/run2/io_wrappers.hpp new file mode 100644 index 0000000..d437675 --- /dev/null +++ b/hailort/hailortcli/run2/io_wrappers.hpp @@ -0,0 +1,261 @@ +/** + * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved. + * Distributed under the MIT license (https://opensource.org/licenses/MIT) +**/ +/** + * @file io_wrappers.hpp + * @brief Wrappers for Input/Output Stream/VStream. Manages buffer allocation, framerate throttle, latency meter and + * more. + **/ + +#ifndef _HAILO_IO_WRAPPERS_HPP_ +#define _HAILO_IO_WRAPPERS_HPP_ + +#include "network_live_track.hpp" + +#include "common/file_utils.hpp" +#include "common/latency_meter.hpp" + +#include +#include + +using namespace hailort; + +constexpr uint32_t UNLIMITED_FRAMERATE = 0; + +#ifndef HAILO_EMULATOR +constexpr std::chrono::milliseconds HAILORTCLI_DEFAULT_TIMEOUT(HAILO_DEFAULT_VSTREAM_TIMEOUT_MS); +#else /* ifndef HAILO_EMULATOR */ +constexpr std::chrono::milliseconds HAILORTCLI_DEFAULT_TIMEOUT(HAILO_DEFAULT_VSTREAM_TIMEOUT_MS * 100); +#endif /* ifndef HAILO_EMULATOR */ + + +class FramerateThrottle final +{ +public: + FramerateThrottle(uint32_t framerate); + ~FramerateThrottle() = default; + void throttle(); + +private: + const uint32_t m_framerate; + const std::chrono::duration m_framerate_interval; + decltype(std::chrono::steady_clock::now()) m_last_write_time; +}; + +// Wrapper for InputStream or InputVStream objects. +template +class WriterWrapper final +{ +public: + template + static Expected> create(Writer &writer, const WriterParams ¶ms, + const LatencyMeterPtr &overall_latency_meter, uint32_t framerate) + { + auto dataset = create_dataset(writer, params); + CHECK_EXPECTED(dataset); + + std::shared_ptr wrapper( + new (std::nothrow) WriterWrapper(writer, dataset.release(), overall_latency_meter, framerate)); + CHECK_NOT_NULL_AS_EXPECTED(wrapper, HAILO_OUT_OF_HOST_MEMORY); + + return wrapper; + } + + Writer &get() { return m_writer.get(); } + Writer &get() const { return m_writer.get(); } + + hailo_status write() + { + before_write_start(); + auto status = get().write(MemoryView(*next_buffer())); + if (HAILO_SUCCESS != status) { + return status; + } + + m_framerate_throttle.throttle(); + return HAILO_SUCCESS; + } + + hailo_status wait_for_async_ready() + { + return get().wait_for_async_ready(m_dataset[0]->size(), HAILORTCLI_DEFAULT_TIMEOUT); + } + + hailo_status write_async(typename Writer::TransferDoneCallback callback) + { + before_write_start(); + // We can use the same buffer for multiple writes simultaneously. That is OK since we don't modify the buffers. + auto status = get().write_async(MemoryView(*next_buffer()), callback); + if (HAILO_SUCCESS != status) { + return status; + } + + m_framerate_throttle.throttle(); + return HAILO_SUCCESS; + } + +private: + WriterWrapper(Writer &writer, std::vector &&dataset, const LatencyMeterPtr &overall_latency_meter, + uint32_t framerate) : + m_writer(std::ref(writer)), + m_dataset(std::move(dataset)), + m_overall_latency_meter(overall_latency_meter), + m_framerate_throttle(framerate) + {} + + void before_write_start() + { + if (m_overall_latency_meter) { + m_overall_latency_meter->add_start_sample(std::chrono::steady_clock::now().time_since_epoch()); + } + } + + size_t next_buffer_index() + { + const auto index = m_current_buffer_index; + m_current_buffer_index = (m_current_buffer_index + 1) % m_dataset.size(); + return index; + } + + BufferPtr next_buffer() + { + return m_dataset[next_buffer_index()]; + } + + template + static Expected> create_dataset(Writer &writer, const WriterParams ¶ms) + { + if (params.input_file_path.empty()) { + return create_constant_dataset(writer.get_frame_size()); + } else { + return create_dataset_from_input_file(params.input_file_path, writer.get_frame_size()); + } + } + + static Expected> create_constant_dataset(size_t frame_size) + { + const uint8_t const_byte = 0xAB; + auto constant_buffer = Buffer::create_shared(frame_size, const_byte, BufferStorageParams::create_dma()); + CHECK_EXPECTED(constant_buffer); + + return std::vector{constant_buffer.release()}; + } + + static Expected> create_dataset_from_input_file(const std::string &file_path, size_t frame_size) + { + auto buffer = read_binary_file(file_path); + CHECK_EXPECTED(buffer); + CHECK_AS_EXPECTED(0 == (buffer->size() % frame_size), HAILO_INVALID_ARGUMENT, + "Input file ({}) size {} must be a multiple of the frame size {}", + file_path, buffer->size(), frame_size); + + auto buffer_ptr = make_shared_nothrow(buffer.release()); + CHECK_NOT_NULL_AS_EXPECTED(buffer_ptr, HAILO_OUT_OF_HOST_MEMORY); + + std::vector dataset; + const size_t frames_count = buffer->size() / frame_size; + dataset.reserve(frames_count); + for (size_t i = 0; i < frames_count; i++) { + const auto offset = frame_size * i; + auto frame_buffer = Buffer::create_shared(buffer->data() + offset, frame_size, BufferStorageParams::create_dma()); + CHECK_EXPECTED(frame_buffer); + dataset.emplace_back(frame_buffer.release()); + } + + return dataset; + } + + std::reference_wrapper m_writer; + + std::vector m_dataset; + size_t m_current_buffer_index = 0; + + LatencyMeterPtr m_overall_latency_meter; + FramerateThrottle m_framerate_throttle; +}; + +template +using WriterWrapperPtr = std::shared_ptr>; + +// Wrapper for OutputStream or OutputVStream objects. +// We use std::enable_from_this because on async api the callback is using `this`. We want to increase the reference +// count until the callback is over. +template +class ReaderWrapper final : public std::enable_shared_from_this> +{ +public: + static Expected> create(Reader &reader, const LatencyMeterPtr &overall_latency_meter, + std::shared_ptr net_live_track) + { + auto buffer = Buffer::create_shared(reader.get_frame_size(), BufferStorageParams::create_dma()); + CHECK_EXPECTED(buffer); + + std::shared_ptr wrapper( + new (std::nothrow) ReaderWrapper(reader, buffer.release(), overall_latency_meter, net_live_track)); + CHECK_NOT_NULL_AS_EXPECTED(wrapper, HAILO_OUT_OF_HOST_MEMORY); + + return wrapper; + } + + Reader &get() { return m_reader.get(); } + Reader &get() const { return m_reader.get(); } + + hailo_status read() + { + auto status = get().read(MemoryView(*m_buffer)); + if (HAILO_SUCCESS != status) { + return status; + } + + on_read_done(); + return HAILO_SUCCESS; + } + + hailo_status wait_for_async_ready() + { + return get().wait_for_async_ready(m_buffer->size(), HAILORTCLI_DEFAULT_TIMEOUT); + } + + hailo_status read_async(typename Reader::TransferDoneCallback callback) + { + auto self = std::enable_shared_from_this>::shared_from_this(); + return get().read_async(MemoryView(*m_buffer), + [self, original=callback](const typename Reader::CompletionInfo &completion_info) { + original(completion_info); + if (completion_info.status == HAILO_SUCCESS) { + self->on_read_done(); + } + }); + } + +private: + ReaderWrapper(Reader &reader, BufferPtr &&buffer, const LatencyMeterPtr &overall_latency_meter, + std::shared_ptr net_live_track) : + m_reader(std::ref(reader)), + m_buffer(std::move(buffer)), + m_overall_latency_meter(overall_latency_meter), + m_net_live_track(net_live_track) + {} + + void on_read_done() + { + if (m_overall_latency_meter) { + m_overall_latency_meter->add_end_sample(get().name(), std::chrono::steady_clock::now().time_since_epoch()); + } + + if (m_net_live_track) { + m_net_live_track->progress(); + } + } + + std::reference_wrapper m_reader; + BufferPtr m_buffer; + LatencyMeterPtr m_overall_latency_meter; + std::shared_ptr m_net_live_track; +}; + +template +using ReaderWrapperPtr = std::shared_ptr>; + +#endif /* _HAILO_IO_WRAPPERS_HPP_ */ diff --git a/hailort/hailortcli/run2/live_printer.cpp b/hailort/hailortcli/run2/live_printer.cpp deleted file mode 100644 index 5e4e866..0000000 --- a/hailort/hailortcli/run2/live_printer.cpp +++ /dev/null @@ -1,86 +0,0 @@ -/** - * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved. - * Distributed under the MIT license (https://opensource.org/licenses/MIT) - **/ -/** - * @file live_printer.cpp - * @brief Live printer - **/ - -#include "live_printer.hpp" -#include "../common.hpp" -#include "common/os_utils.hpp" -#include "common/utils.hpp" -#include -#include - -using namespace hailort; - -LivePrinter::LivePrinter(std::chrono::milliseconds interval) : - m_interval(interval), - m_stop_event(Event::create_shared(Event::State::not_signalled)), - m_tracks(), - m_mutex(), - m_prev_count(0), - m_enable_ansi_escape_sequences(CursorAdjustment()) -{ -} - -LivePrinter::~LivePrinter() -{ - (void)m_stop_event->signal(); - if (m_thread.joinable()) { - m_thread.join(); - } - print(); -} - -void LivePrinter::add(std::shared_ptr track, uint8_t level) -{ - std::unique_lock lock(m_mutex); - if (!contains(m_tracks, level)) { - m_tracks[level] = {}; - } - m_tracks[level].emplace_back(track); -} - -void LivePrinter::print() -{ - std::stringstream ss; - uint32_t count = 0; - - { - std::unique_lock lock(m_mutex); - for (auto &level_pair : m_tracks) { - for (auto &track : level_pair.second) { - count += track->get_text(ss); - } - } - } - CliCommon::reset_cursor(m_prev_count); - // On the first print m_prev_count = 0, so no lines will be deleted - std::cout << ss.str() << std::flush; - m_prev_count = count; -} - -hailo_status LivePrinter::start() -{ - for (auto &level_pair : m_tracks) { - for (auto &track : level_pair.second) { - CHECK_SUCCESS(track->start()); - } - } - - m_thread = std::thread([this] () { - OsUtils::set_current_thread_name("LIVE_PRINTER"); - while (true) { - print(); - auto status = m_stop_event->wait(m_interval); - if (HAILO_TIMEOUT != status) { - break; - } - } - }); - - return HAILO_SUCCESS; -} diff --git a/hailort/hailortcli/run2/live_printer.hpp b/hailort/hailortcli/run2/live_printer.hpp deleted file mode 100644 index 26ea8ea..0000000 --- a/hailort/hailortcli/run2/live_printer.hpp +++ /dev/null @@ -1,54 +0,0 @@ -/** - * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved. - * Distributed under the MIT license (https://opensource.org/licenses/MIT) - **/ -/** - * @file live_printer.hpp - * @brief Live printer - **/ - -#ifndef _HAILO_HAILORTCLI_RUN2_LIVE_PRINTER_HPP_ -#define _HAILO_HAILORTCLI_RUN2_LIVE_PRINTER_HPP_ - -#include "common/os_utils.hpp" -#include "hailo/event.hpp" -#include -#include -#include -#include -#include -#include - -class LivePrinter final -{ -public: - class Track - { - public: - Track() : m_started(false) - {} - - virtual hailo_status start() = 0; - virtual uint32_t get_text(std::stringstream &ss) = 0; - - protected: - bool m_started; - }; - - LivePrinter(std::chrono::milliseconds interval); - ~LivePrinter(); - void add(std::shared_ptr track, uint8_t level); // prints tracks in consecutive order from low-to-high levels - void print(); - hailo_status start(); - -private: - std::chrono::milliseconds m_interval; - hailort::EventPtr m_stop_event; - std::map>> m_tracks; - std::thread m_thread; - std::mutex m_mutex; - uint32_t m_prev_count; - hailort::CursorAdjustment m_enable_ansi_escape_sequences; -}; - -#endif /* _HAILO_HAILORTCLI_RUN2_LIVE_PRINTER_HPP_ */ \ No newline at end of file diff --git a/hailort/hailortcli/run2/live_stats.cpp b/hailort/hailortcli/run2/live_stats.cpp new file mode 100644 index 0000000..fe2f98c --- /dev/null +++ b/hailort/hailortcli/run2/live_stats.cpp @@ -0,0 +1,149 @@ +/** + * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved. + * Distributed under the MIT license (https://opensource.org/licenses/MIT) + **/ +/** + * @file live_stats.cpp + * @brief Live stats + **/ + +#include "live_stats.hpp" +#include "../common.hpp" +#include "common/os_utils.hpp" +#include "common/utils.hpp" +#include +#include +#include + +using namespace hailort; + +hailo_status LiveStats::Track::start() +{ + CHECK_SUCCESS(start_impl()); + m_started = true; + return HAILO_SUCCESS; +} + +uint32_t LiveStats::Track::push_text(std::stringstream &ss) +{ + if (!m_started) { + return 0; + } + return push_text_impl(ss); +} + +void LiveStats::Track::push_json(nlohmann::ordered_json &json) +{ + if (!m_started) { + return; + } + push_json_impl(json); +} + +LiveStats::LiveStats(std::chrono::milliseconds interval) : + m_running(false), + m_interval(interval), + m_stop_event(Event::create_shared(Event::State::not_signalled)), + m_tracks(), + m_mutex(), + m_prev_count(0), + m_enable_ansi_escape_sequences(CursorAdjustment()) +{ +} + +LiveStats::~LiveStats() +{ + stop(); + print(); +} + +void LiveStats::add(std::shared_ptr track, uint8_t level) +{ + std::unique_lock lock(m_mutex); + m_tracks[level].emplace_back(track); +} + +void LiveStats::print() +{ + std::stringstream ss; + uint32_t count = 0; + + { + std::unique_lock lock(m_mutex); + for (auto &level_pair : m_tracks) { + for (auto &track : level_pair.second) { + count += track->push_text(ss); + } + } + } + CliCommon::reset_cursor(m_prev_count); + // On the first print m_prev_count = 0, so no lines will be deleted + std::cout << ss.str() << std::flush; + m_prev_count = count; +} + +hailo_status LiveStats::dump_stats(const std::string &json_path, const std::string &inference_mode) +{ + stop(); // stop measuring before creating json because we want the json to hold the last measurements + nlohmann::ordered_json json; + + auto time = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()); + auto str_time = std::string(std::ctime(&time)); + if (str_time.length()){ + str_time.pop_back(); + } + + json["time"] = str_time; + json["inference_mode"] = inference_mode; + json["network_groups"] = nlohmann::ordered_json::array(); + + std::unique_lock lock(m_mutex); + for (auto &level_pair : m_tracks) { + for (auto &track : level_pair.second) { + track->push_json(json); + } + } + + std::ofstream output_json(json_path); + CHECK(output_json, HAILO_FILE_OPERATION_FAILURE, "Failed opening file '{}'", json_path); + + output_json << std::setw(4) << json << std::endl; // 4: amount of spaces to indent (for pretty printing) + CHECK(!output_json.bad() && !output_json.fail(), HAILO_FILE_OPERATION_FAILURE, + "Failed writing to file '{}'", json_path); + + return HAILO_SUCCESS; +} + +hailo_status LiveStats::start() +{ + // In order to re-start LiveStats, we should add m_stop_event->reset() here + m_running = true; + for (auto &level_pair : m_tracks) { + for (auto &track : level_pair.second) { + CHECK_SUCCESS(track->start()); + } + } + + m_thread = std::thread([this] () { + OsUtils::set_current_thread_name("LIVE_PRINTER"); + while (true) { + print(); + auto status = m_stop_event->wait(m_interval); + if (HAILO_TIMEOUT != status) { + break; + } + } + }); + return HAILO_SUCCESS; +} + +void LiveStats::stop() +{ + if (m_running){ + (void)m_stop_event->signal(); + if (m_thread.joinable()) { + m_thread.join(); + } + m_running = false; + } +} diff --git a/hailort/hailortcli/run2/live_stats.hpp b/hailort/hailortcli/run2/live_stats.hpp new file mode 100644 index 0000000..6c1b4ea --- /dev/null +++ b/hailort/hailortcli/run2/live_stats.hpp @@ -0,0 +1,63 @@ +/** + * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved. + * Distributed under the MIT license (https://opensource.org/licenses/MIT) + **/ +/** + * @file live_stats.hpp + * @brief Live stats + **/ + +#ifndef _HAILO_HAILORTCLI_RUN2_LIVE_STATS_HPP_ +#define _HAILO_HAILORTCLI_RUN2_LIVE_STATS_HPP_ + +#include "common/os_utils.hpp" +#include "hailo/event.hpp" +#include +#include +#include +#include +#include +#include +#include + +class LiveStats final +{ +public: + class Track + { + public: + Track() : m_started(false) + {} + + hailo_status start(); + uint32_t push_text(std::stringstream &ss); + void push_json(nlohmann::ordered_json &json); + + protected: + virtual hailo_status start_impl() = 0; + virtual uint32_t push_text_impl(std::stringstream &ss) = 0; + virtual void push_json_impl(nlohmann::ordered_json &json) = 0; + + bool m_started; + }; + + LiveStats(std::chrono::milliseconds interval); + ~LiveStats(); + void add(std::shared_ptr track, uint8_t level); // prints tracks in consecutive order from low-to-high levels + void print(); + hailo_status dump_stats(const std::string &json_path, const std::string &inference_mode); + hailo_status start(); + void stop(); + +private: + bool m_running; + std::chrono::milliseconds m_interval; + hailort::EventPtr m_stop_event; + std::map>> m_tracks; + std::thread m_thread; + std::mutex m_mutex; + uint32_t m_prev_count; + hailort::CursorAdjustment m_enable_ansi_escape_sequences; +}; + +#endif /* _HAILO_HAILORTCLI_RUN2_LIVE_STATS_HPP_ */ \ No newline at end of file diff --git a/hailort/hailortcli/run2/measurement_live_track.cpp b/hailort/hailortcli/run2/measurement_live_track.cpp index cf001bf..f098cfe 100644 --- a/hailort/hailortcli/run2/measurement_live_track.cpp +++ b/hailort/hailortcli/run2/measurement_live_track.cpp @@ -17,7 +17,6 @@ #include #include - using namespace hailort; Expected> MeasurementLiveTrack::create_shared(Device &device, bool measure_power, bool measure_current, @@ -53,35 +52,27 @@ Expected> MeasurementLiveTrack::create_sha MeasurementLiveTrack::MeasurementLiveTrack(std::shared_ptr power_measurement, std::shared_ptr current_measurement, std::shared_ptr temp_measurement, const std::string &device_id) : - LivePrinter::Track(), m_power_measurement(std::move(power_measurement)), m_current_measurement(std::move(current_measurement)), + LiveStats::Track(), m_power_measurement(std::move(power_measurement)), m_current_measurement(std::move(current_measurement)), m_temp_measurement(std::move(temp_measurement)), m_device_id(device_id) {} -hailo_status MeasurementLiveTrack::start() +hailo_status MeasurementLiveTrack::start_impl() { if (m_power_measurement) { CHECK_SUCCESS(m_power_measurement->start_measurement()); } - if (m_current_measurement) { CHECK_SUCCESS(m_current_measurement->start_measurement()); } - if (m_temp_measurement) { CHECK_SUCCESS(m_temp_measurement->start_measurement()); } - m_started = true; - return HAILO_SUCCESS; } -uint32_t MeasurementLiveTrack::get_text(std::stringstream &ss) +uint32_t MeasurementLiveTrack::push_text_impl(std::stringstream &ss) { - if (!m_started) { - return 0; - } - auto rows_count = 0; if (m_power_measurement || m_current_measurement || m_temp_measurement) { @@ -138,4 +129,37 @@ uint32_t MeasurementLiveTrack::get_text(std::stringstream &ss) } return rows_count; +} + +void MeasurementLiveTrack::push_json_measurment_val(nlohmann::ordered_json &device_json, std::shared_ptr measurment, const std::string &measurment_name) +{ + auto measurment_info = measurment->get_data(); + auto measurement_unit = measurment->measurement_unit(); + auto min = measurment_info.min(); + auto max = measurment_info.max(); + auto mean = measurment_info.mean(); + if (min && max && mean){ + device_json[measurment_name] = { + {"min", std::to_string(min.value()) + " " + measurement_unit}, + {"max", std::to_string(max.value()) + " " + measurement_unit}, + {"average", std::to_string(mean.value()) + " " + measurement_unit} + }; + } +} + +void MeasurementLiveTrack::push_json_impl(nlohmann::ordered_json &json) +{ + nlohmann::ordered_json device_json; + device_json["device_id"] = m_device_id; + + if (m_power_measurement){ + push_json_measurment_val(device_json, m_power_measurement, "power"); + } + if (m_current_measurement){ + push_json_measurment_val(device_json, m_current_measurement, "current"); + } + if (m_temp_measurement){ + push_json_measurment_val(device_json, m_temp_measurement, "temperature"); + } + json["devices"].emplace_back(device_json); } \ No newline at end of file diff --git a/hailort/hailortcli/run2/measurement_live_track.hpp b/hailort/hailortcli/run2/measurement_live_track.hpp index 17288d9..782681e 100644 --- a/hailort/hailortcli/run2/measurement_live_track.hpp +++ b/hailort/hailortcli/run2/measurement_live_track.hpp @@ -13,24 +13,26 @@ #include "hailo/hailort.h" #include "common/device_measurements.hpp" +#include "live_stats.hpp" -#include "live_printer.hpp" +#include - -class MeasurementLiveTrack : public LivePrinter::Track +class MeasurementLiveTrack : public LiveStats::Track { public: static hailort::Expected> create_shared(hailort::Device &vdevice, bool measure_power, bool measure_current, bool measure_temp); virtual ~MeasurementLiveTrack() = default; - virtual hailo_status start() override; - virtual uint32_t get_text(std::stringstream &ss) override; + virtual hailo_status start_impl() override; + virtual uint32_t push_text_impl(std::stringstream &ss) override; + virtual void push_json_impl(nlohmann::ordered_json &json) override; MeasurementLiveTrack(std::shared_ptr power_measurement, std::shared_ptr current_measurement, std::shared_ptr temp_measurement, const std::string &device_id); private: + void push_json_measurment_val(nlohmann::ordered_json &device_json, std::shared_ptr measurment, const std::string &measurment_name); std::shared_ptr m_power_measurement; std::shared_ptr m_current_measurement; std::shared_ptr m_temp_measurement; diff --git a/hailort/hailortcli/run2/network_live_track.cpp b/hailort/hailortcli/run2/network_live_track.cpp index b4e85c2..ae59018 100644 --- a/hailort/hailortcli/run2/network_live_track.cpp +++ b/hailort/hailortcli/run2/network_live_track.cpp @@ -13,52 +13,110 @@ #include #include -NetworkLiveTrack::NetworkLiveTrack(const std::string &name, std::shared_ptr cng, LatencyMeterPtr overall_latency_meter) : - m_name(name), m_count(0), m_last_get_time(), m_cng(cng), m_overall_latency_meter(overall_latency_meter) +size_t NetworkLiveTrack::max_ng_name = 0; +std::mutex NetworkLiveTrack::mutex; + +NetworkLiveTrack::NetworkLiveTrack(const std::string &name, std::shared_ptr cng, + LatencyMeterPtr overall_latency_meter, bool measure_fps, const std::string &hef_path) : + m_name(name), + m_count(0), + m_last_get_time(), + m_cng(cng), + m_overall_latency_meter(overall_latency_meter), + m_measure_fps(measure_fps), + m_hef_path(hef_path) { + std::lock_guard lock(mutex); + max_ng_name = std::max(m_name.size(), max_ng_name); } -hailo_status NetworkLiveTrack::start() +hailo_status NetworkLiveTrack::start_impl() { m_last_get_time = std::chrono::steady_clock::now(); m_count = 0; - m_started = true; return HAILO_SUCCESS; } -uint32_t NetworkLiveTrack::get_text(std::stringstream &ss) +double NetworkLiveTrack::get_fps() { - if (!m_started) { - return 0; - } - auto elapsed_time = std::chrono::steady_clock::now() - m_last_get_time; auto count = m_count.load(); - auto fps = count / std::chrono::duration(elapsed_time).count(); - ss << fmt::format("{}:\n\t| fps: {:.2f}", m_name, fps); + return fps; +} + +uint32_t NetworkLiveTrack::push_text_impl(std::stringstream &ss) +{ + ss << fmt::format("{}:", m_name); + ss << std::string(max_ng_name - m_name.size(), ' '); + + bool first = true; + auto get_separator = [&first] () { + auto res = first ? " " : " | "; + first = false; + return res; + }; + + if (m_measure_fps) { + auto fps = get_fps(); + ss << fmt::format("{}fps: {:.2f}", get_separator(), fps); + } auto hw_latency_measurement = m_cng->get_latency_measurement(); if (hw_latency_measurement) { - ss << fmt::format(" | hw latency: {:.2f} ms", InferResultsFormatUtils::latency_result_to_ms(hw_latency_measurement->avg_hw_latency)); + ss << fmt::format("{}hw latency: {:.2f} ms", get_separator(), InferResultsFormatUtils::latency_result_to_ms(hw_latency_measurement->avg_hw_latency)); } else if (HAILO_NOT_AVAILABLE != hw_latency_measurement.status()) { // HAILO_NOT_AVAILABLE is a valid error, we ignore it - ss << fmt::format(" | hw latency: failed with status={}", hw_latency_measurement.status()); + ss << fmt::format("{}hw latency: NaN (err)", get_separator()); } if (m_overall_latency_meter) { - auto overall_latency_measurement = m_overall_latency_meter->get_latency(true); + auto overall_latency_measurement = m_overall_latency_meter->get_latency(false); if (overall_latency_measurement) { - ss << fmt::format(" | overall latency: {:.2f} ms", InferResultsFormatUtils::latency_result_to_ms(*overall_latency_measurement)); + ss << fmt::format("{}overall latency: {:.2f} ms", get_separator(), InferResultsFormatUtils::latency_result_to_ms(*overall_latency_measurement)); } else if (HAILO_NOT_AVAILABLE != overall_latency_measurement.status()) { // HAILO_NOT_AVAILABLE is a valid error, we ignore it - ss << fmt::format(" | overall latency: failed with status={}", overall_latency_measurement.status()); + ss << fmt::format("{}overall latency: NaN (err)", get_separator()); } } ss << "\n"; - return 2; + return 1; +} + +void NetworkLiveTrack::push_json_impl(nlohmann::ordered_json &json) +{ + nlohmann::ordered_json network_group_json; + network_group_json["name"] = m_name; + network_group_json["full_hef_path"] = m_hef_path; + + // TODO: HRT-8695 Support stats display per network + // auto networks_info = m_cng->get_network_infos(); + // if (networks_info){ + // network_group_json["networks"] = nlohmann::ordered_json::array(); + // for (const auto &network_info : networks_info.value()){ + // network_group_json["networks"].emplace_back(nlohmann::json::object({ {"name", network_info.name} })); + // } + // } + + if (m_measure_fps) { + auto fps = get_fps(); + network_group_json["FPS"] = std::to_string(fps); + } + + auto hw_latency_measurement = m_cng->get_latency_measurement(); + if (hw_latency_measurement){ + network_group_json["hw_latency"] = InferResultsFormatUtils::latency_result_to_ms(hw_latency_measurement->avg_hw_latency); + } + + if (m_overall_latency_meter){ + auto overall_latency_measurement = m_overall_latency_meter->get_latency(false); + if (overall_latency_measurement){ + network_group_json["overall_latency"] = InferResultsFormatUtils::latency_result_to_ms(*overall_latency_measurement); + } + } + json["network_groups"].emplace_back(network_group_json); } void NetworkLiveTrack::progress() diff --git a/hailort/hailortcli/run2/network_live_track.hpp b/hailort/hailortcli/run2/network_live_track.hpp index dec00fd..ba3138c 100644 --- a/hailort/hailortcli/run2/network_live_track.hpp +++ b/hailort/hailortcli/run2/network_live_track.hpp @@ -15,24 +15,36 @@ #include "common/latency_meter.hpp" -#include "live_printer.hpp" +#include "live_stats.hpp" +#include -class NetworkLiveTrack : public LivePrinter::Track + +class NetworkLiveTrack : public LiveStats::Track { public: - NetworkLiveTrack(const std::string &name, std::shared_ptr cng, hailort::LatencyMeterPtr overall_latency_meter); + NetworkLiveTrack(const std::string &name, std::shared_ptr cng, + hailort::LatencyMeterPtr overall_latency_meter, bool measure_fps, const std::string &hef_path); virtual ~NetworkLiveTrack() = default; - virtual hailo_status start() override; - virtual uint32_t get_text(std::stringstream &ss) override; + virtual hailo_status start_impl() override; + virtual uint32_t push_text_impl(std::stringstream &ss) override; + virtual void push_json_impl(nlohmann::ordered_json &json) override; + void progress(); private: + double get_fps(); + + static size_t max_ng_name; + static std::mutex mutex; + std::string m_name; std::atomic m_count; std::chrono::time_point m_last_get_time; std::shared_ptr m_cng; hailort::LatencyMeterPtr m_overall_latency_meter; + const bool m_measure_fps; + const std::string &m_hef_path; }; #endif /* _HAILO_HAILORTCLI_RUN2_NETWORK_LIVE_TRACK_HPP_ */ \ No newline at end of file diff --git a/hailort/hailortcli/run2/network_runner.cpp b/hailort/hailortcli/run2/network_runner.cpp index f2901a8..f095b1d 100644 --- a/hailort/hailortcli/run2/network_runner.cpp +++ b/hailort/hailortcli/run2/network_runner.cpp @@ -11,38 +11,62 @@ #include "hailo/hailort_common.hpp" #include "hailo/hailort_defaults.hpp" -#include "common/async_thread.hpp" #include "common/file_utils.hpp" #include "common/latency_meter.hpp" #include "network_runner.hpp" +#if defined(_MSC_VER) +#include +#endif using namespace hailort; +SignalEventScopeGuard::SignalEventScopeGuard(Event &event) : + m_event(event) +{} -class SignalEventScopeGuard final +SignalEventScopeGuard::~SignalEventScopeGuard() { -public: - SignalEventScopeGuard(Event &event) : m_event(event) - {} + m_event.signal(); +} - ~SignalEventScopeGuard() - { - m_event.signal(); +BarrierTerminateScopeGuard::BarrierTerminateScopeGuard(BarrierPtr barrier) : + m_barrier(barrier) +{} + +BarrierTerminateScopeGuard::~BarrierTerminateScopeGuard() +{ + if (m_barrier) { + m_barrier->terminate(); } +} - Event &m_event; +#if defined(_MSC_VER) +class TimeBeginScopeGuard final +{ +public: + TimeBeginScopeGuard() { + // default interval between timer interrupts on Windows is 15.625 ms. + // This will change it to be 1 ms, enabling us to sleep in granularity of 1 milliseconds. + // As from Windows 10 2004, in general processes are no longer affected by other processes calling timeBeginPeriod. + // https://randomascii.wordpress.com/2020/10/04/windows-timer-resolution-the-great-rule-change/ + timeBeginPeriod(1); + } + ~TimeBeginScopeGuard() { + timeEndPeriod(1); + } }; +#endif //TODO: duplicated -static hailo_status wait_for_threads(std::vector> &threads) +hailo_status NetworkRunner::wait_for_threads(std::vector> &threads) { auto last_error_status = HAILO_SUCCESS; for (auto &thread : threads) { auto thread_status = thread->get(); - if ((HAILO_SUCCESS != thread_status) && (HAILO_STREAM_ABORTED_BY_USER != thread_status)) { + if (!inference_succeeded(thread_status)) { last_error_status = thread_status; LOGGER__ERROR("Thread failed with with status {}", thread_status); } @@ -50,218 +74,192 @@ static hailo_status wait_for_threads(std::vector> & return last_error_status; } -VStreamParams::VStreamParams() : name(), params(HailoRTDefaults::get_vstreams_params()) +IoParams::IoParams() : name(), input_file_path() { } -NetworkParams::NetworkParams() : hef_path(), net_group_name(), vstream_params(), scheduling_algorithm(HAILO_SCHEDULING_ALGORITHM_ROUND_ROBIN), - batch_size(HAILO_DEFAULT_BATCH_SIZE), scheduler_threshold(0), scheduler_timeout_ms(0), framerate(UNLIMITED_FRAMERATE), measure_hw_latency(false), +VStreamParams::VStreamParams() : IoParams(), params(HailoRTDefaults::get_vstreams_params()) +{ +} + +StreamParams::StreamParams() : IoParams(), flags(HAILO_STREAM_FLAGS_NONE) +{ +} + +NetworkParams::NetworkParams() : hef_path(), net_group_name(), vstream_params(), stream_params(), + scheduling_algorithm(HAILO_SCHEDULING_ALGORITHM_ROUND_ROBIN), batch_size(HAILO_DEFAULT_BATCH_SIZE), + scheduler_threshold(0), scheduler_timeout_ms(0), framerate(UNLIMITED_FRAMERATE), measure_hw_latency(false), measure_overall_latency(false) { } NetworkRunner::NetworkRunner(const NetworkParams ¶ms, const std::string &name, - std::vector &&input_vstreams, std::vector &&output_vstreams, - std::shared_ptr cng, LatencyMeterPtr overall_latency_meter) - : m_params(params), m_name(name), m_input_vstreams(std::move(input_vstreams)), - m_output_vstreams(std::move(output_vstreams)), m_cng(cng), m_overall_latency_meter(overall_latency_meter) + VDevice &vdevice, std::shared_ptr cng) : + m_vdevice(vdevice), + m_params(params), + m_name(name), + m_cng(cng), + m_overall_latency_meter(nullptr), + m_latency_barrier(nullptr) { } Expected> NetworkRunner::create_shared(VDevice &vdevice, const NetworkParams ¶ms) { - auto hef = Hef::create(params.hef_path); + // The network params passed to the NetworkRunner may be changed by this function, hence we copy them. + auto final_net_params = params; + + auto hef = Hef::create(final_net_params.hef_path); CHECK_EXPECTED(hef); // Get NG's name if single - auto net_group_name = params.net_group_name; + auto net_group_name = final_net_params.net_group_name; if (net_group_name.empty()) { auto net_groups_names = hef->get_network_groups_names(); - CHECK_AS_EXPECTED(net_groups_names.size() == 1, HAILO_INVALID_ARGUMENT, "HEF {} doesn't contain a single NetworkGroup. Pass --name", params.hef_path); + CHECK_AS_EXPECTED(net_groups_names.size() == 1, HAILO_INVALID_ARGUMENT, "HEF {} doesn't contain a single NetworkGroup. Pass --name", final_net_params.hef_path); net_group_name = net_groups_names[0]; } auto cfg_params = vdevice.create_configure_params(hef.value(), net_group_name); CHECK_EXPECTED(cfg_params); - cfg_params->batch_size = params.batch_size; - if (params.measure_hw_latency) { + cfg_params->batch_size = final_net_params.batch_size; + if (final_net_params.batch_size == HAILO_DEFAULT_BATCH_SIZE) { + // Changing batch_size to 1. If HAILO_DEFAULT_BATCH_SIZE is configured, the sched will send one frame per batch + final_net_params.batch_size = 1; + } + if (final_net_params.measure_hw_latency) { cfg_params->latency |= HAILO_LATENCY_MEASURE; } + if (final_net_params.is_async()) { + for (auto &stream_name_params_pair : cfg_params->stream_params_by_name) { + stream_name_params_pair.second.flags = HAILO_STREAM_FLAGS_ASYNC; + } + } auto cfgr_net_groups = vdevice.configure(hef.value(), {{net_group_name, cfg_params.value()}}); CHECK_EXPECTED(cfgr_net_groups); assert(1 == cfgr_net_groups->size()); auto cfgr_net_group = cfgr_net_groups.value()[0]; - if (HAILO_SCHEDULING_ALGORITHM_NONE!= params.scheduling_algorithm) { - CHECK_SUCCESS_AS_EXPECTED(cfgr_net_group->set_scheduler_threshold(params.scheduler_threshold)); - CHECK_SUCCESS_AS_EXPECTED(cfgr_net_group->set_scheduler_timeout(std::chrono::milliseconds(params.scheduler_timeout_ms))); - CHECK_SUCCESS_AS_EXPECTED(cfgr_net_group->set_scheduler_priority(params.scheduler_priority)); + if (HAILO_SCHEDULING_ALGORITHM_NONE!= final_net_params.scheduling_algorithm) { + CHECK_SUCCESS_AS_EXPECTED(cfgr_net_group->set_scheduler_threshold(final_net_params.scheduler_threshold)); + CHECK_SUCCESS_AS_EXPECTED(cfgr_net_group->set_scheduler_timeout(std::chrono::milliseconds(final_net_params.scheduler_timeout_ms))); + CHECK_SUCCESS_AS_EXPECTED(cfgr_net_group->set_scheduler_priority(final_net_params.scheduler_priority)); } - std::map vstreams_params; - for (auto &vstream_params : params.vstream_params) { - vstreams_params.emplace(vstream_params.name, vstream_params.params); - } - auto vstreams = create_vstreams(*cfgr_net_group, vstreams_params); - CHECK_EXPECTED(vstreams); - - LatencyMeterPtr overall_latency_meter = nullptr; - if (params.measure_overall_latency) { - CHECK_AS_EXPECTED((1 == vstreams->first.size()), HAILO_INVALID_OPERATION, - "Overall latency measurement over multiple inputs network is not supported"); - - std::set output_names; - for (auto &output_vstream : vstreams->second) { - output_names.insert(output_vstream.name()); + std::shared_ptr net_runner_ptr = nullptr; + switch (final_net_params.mode) + { + case InferenceMode::FULL: + { + std::map vstreams_params; + for (auto &vstream_params : final_net_params.vstream_params) { + vstreams_params.emplace(vstream_params.name, vstream_params.params); } - - overall_latency_meter = make_shared_nothrow(output_names, OVERALL_LATENCY_TIMESTAMPS_LIST_LENGTH); - CHECK_NOT_NULL_AS_EXPECTED(overall_latency_meter, HAILO_OUT_OF_HOST_MEMORY); + auto vstreams = create_vstreams(*cfgr_net_group, vstreams_params); + CHECK_EXPECTED(vstreams); + + auto net_runner = make_shared_nothrow(final_net_params, net_group_name, vdevice, + std::move(vstreams->first), std::move(vstreams->second), cfgr_net_group); + CHECK_NOT_NULL_AS_EXPECTED(net_runner, HAILO_OUT_OF_HOST_MEMORY); + net_runner_ptr = std::static_pointer_cast(net_runner); + break; } - auto net_runner = make_shared_nothrow(params, net_group_name, std::move(vstreams->first), - std::move(vstreams->second), cfgr_net_group, overall_latency_meter); - CHECK_NOT_NULL_AS_EXPECTED(net_runner, HAILO_OUT_OF_HOST_MEMORY); - return net_runner; -} -Expected NetworkRunner::create_dataset_from_input_file(const std::string &file_path, - const InputVStream &input_vstream) -{ - auto buffer = read_binary_file(file_path); - CHECK_EXPECTED(buffer); - CHECK_AS_EXPECTED(0 == (buffer->size() % input_vstream.get_frame_size()), HAILO_INVALID_ARGUMENT, - "Input file ({}) size {} must be a multiple of the frame size {} ({})", - file_path, buffer->size(), input_vstream.get_frame_size(), input_vstream.name()); + case InferenceMode::RAW: // Fallthrough + case InferenceMode::RAW_ASYNC: // Fallthrough + case InferenceMode::RAW_ASYNC_SINGLE_THREAD: + { + auto input_streams = cfgr_net_group->get_input_streams(); + CHECK_AS_EXPECTED(input_streams.size() > 0, HAILO_INTERNAL_FAILURE); - auto buffer_ptr = make_shared_nothrow(buffer.release()); - CHECK_NOT_NULL_AS_EXPECTED(buffer_ptr, HAILO_OUT_OF_HOST_MEMORY); + auto output_streams = cfgr_net_group->get_output_streams(); + CHECK_AS_EXPECTED(output_streams.size() > 0, HAILO_INTERNAL_FAILURE); - return buffer_ptr; -} + auto net_runner = make_shared_nothrow(final_net_params, net_group_name, vdevice, + std::move(input_streams), std::move(output_streams), cfgr_net_group); + CHECK_NOT_NULL_AS_EXPECTED(net_runner, HAILO_OUT_OF_HOST_MEMORY); + net_runner_ptr = std::static_pointer_cast(net_runner); + break; + } + default: + // Shouldn't get here + return make_unexpected(HAILO_INTERNAL_FAILURE); + } -Expected NetworkRunner::create_constant_dataset(const InputVStream &input_vstream) -{ - const uint8_t const_byte = 0xAB; - auto constant_buffer = Buffer::create_shared(input_vstream.get_frame_size(), const_byte); - CHECK_EXPECTED(constant_buffer); + if (final_net_params.measure_overall_latency || final_net_params.measure_hw_latency) { + auto input_names = net_runner_ptr->get_input_names(); + auto output_names = net_runner_ptr->get_output_names(); - return constant_buffer.release(); -} + CHECK_AS_EXPECTED((1 == input_names.size()), HAILO_INVALID_OPERATION, + "Latency measurement over multiple inputs network is not supported"); -hailo_status NetworkRunner::run_input_vstream(InputVStream &vstream, Event &shutdown_event, BufferPtr dataset, - LatencyMeterPtr overall_latency_meter) -{ - auto signal_event_scope_guard = SignalEventScopeGuard(shutdown_event); - - auto last_write_time = std::chrono::steady_clock::now(); - auto framerate_interval = std::chrono::duration(1) / m_params.framerate; - size_t buffer_offset = 0; - while(true) { - if (overall_latency_meter) { - overall_latency_meter->add_start_sample(std::chrono::steady_clock::now().time_since_epoch()); - } - auto status = vstream.write(MemoryView((dataset->data() + buffer_offset), vstream.get_frame_size())); - if (status == HAILO_STREAM_ABORTED_BY_USER) { - return status; - } - CHECK_SUCCESS(status); - buffer_offset += vstream.get_frame_size(); - buffer_offset %= dataset->size(); - - if (m_params.framerate != UNLIMITED_FRAMERATE) { - auto elapsed_time = std::chrono::steady_clock::now() - last_write_time; - std::this_thread::sleep_for(framerate_interval - elapsed_time); - last_write_time = std::chrono::steady_clock::now(); + if (final_net_params.measure_overall_latency) { + auto overall_latency_meter = make_shared_nothrow(output_names, OVERALL_LATENCY_TIMESTAMPS_LIST_LENGTH); + CHECK_NOT_NULL_AS_EXPECTED(overall_latency_meter, HAILO_OUT_OF_HOST_MEMORY); + net_runner_ptr->set_overall_latency_meter(overall_latency_meter); } + + // We use a barrier for both hw and overall latency + auto latency_barrier = make_shared_nothrow(input_names.size() + output_names.size()); + CHECK_NOT_NULL_AS_EXPECTED(latency_barrier, HAILO_OUT_OF_HOST_MEMORY); + net_runner_ptr->set_latency_barrier(latency_barrier); } - return HAILO_SUCCESS; + + return net_runner_ptr; } -hailo_status NetworkRunner::run_output_vstream(OutputVStream &vstream, bool first, std::shared_ptr net_live_track, - Event &shutdown_event, LatencyMeterPtr overall_latency_meter) +bool NetworkRunner::inference_succeeded(hailo_status status) { - auto signal_event_scope_guard = SignalEventScopeGuard(shutdown_event); - - auto result = Buffer::create(vstream.get_frame_size()); - CHECK_EXPECTED_AS_STATUS(result); - while(true) { - auto status = vstream.read(MemoryView(result.value())); - if (status == HAILO_STREAM_ABORTED_BY_USER) { - return status; - } - CHECK_SUCCESS(status); - if (overall_latency_meter) { - overall_latency_meter->add_end_sample(vstream.name(), std::chrono::steady_clock::now().time_since_epoch()); - } - if (first) { - net_live_track->progress(); - } - } - return HAILO_SUCCESS; + const auto status_find_result = std::find(NetworkRunner::ALLOWED_INFERENCE_RETURN_VALUES.cbegin(), + NetworkRunner::ALLOWED_INFERENCE_RETURN_VALUES.cend(), status); + // If the status is in the allowed list, the inference has succeeded + return status_find_result != NetworkRunner::ALLOWED_INFERENCE_RETURN_VALUES.cend(); } -hailo_status NetworkRunner::run(Event &shutdown_event, LivePrinter &live_printer, Barrier &barrier) +hailo_status NetworkRunner::run(EventPtr shutdown_event, LiveStats &live_stats, Barrier &activation_barrier) { auto ang = std::unique_ptr(nullptr); if (HAILO_SCHEDULING_ALGORITHM_NONE == m_params.scheduling_algorithm) { auto ang_exp = m_cng->activate(); if (!ang_exp) { - barrier.terminate(); + activation_barrier.terminate(); } CHECK_EXPECTED_AS_STATUS(ang_exp); ang = ang_exp.release(); } - auto net_live_track = std::make_shared(m_name, m_cng, m_overall_latency_meter); - live_printer.add(net_live_track, 1); //support progress over multiple outputs - barrier.arrive_and_wait(); + // If we measure latency (hw or overall) we send frames one at a time. Hence we don't measure fps. + const auto measure_fps = !m_params.measure_hw_latency && !m_params.measure_overall_latency; + auto net_live_track = std::make_shared(m_name, m_cng, m_overall_latency_meter, measure_fps, m_params.hef_path); + live_stats.add(net_live_track, 1); //support progress over multiple outputs - std::vector> threads; - for (auto &input_vstream : m_input_vstreams) { - BufferPtr dataset = nullptr; - for (auto ¶ms : m_params.vstream_params) { - if ((input_vstream.name() == params.name) && (!params.input_file_path.empty())) { - auto dataset_exp = create_dataset_from_input_file(params.input_file_path, input_vstream); - CHECK_EXPECTED_AS_STATUS(dataset_exp); - dataset = dataset_exp.release(); - } - } - if (nullptr == dataset) { - auto dataset_exp = create_constant_dataset(input_vstream); - CHECK_EXPECTED_AS_STATUS(dataset_exp); - dataset = dataset_exp.release(); - } +#if defined(_MSC_VER) + TimeBeginScopeGuard time_begin_scope_guard; +#endif - threads.emplace_back(std::make_unique>("SEND", [this, &input_vstream, &shutdown_event, - dataset](){ - return run_input_vstream(input_vstream, shutdown_event, dataset, m_overall_latency_meter); - })); - } + activation_barrier.arrive_and_wait(); - bool first = true; //TODO: check with multiple outputs - for (auto &output_vstream : m_output_vstreams) { - threads.emplace_back(std::make_unique>("RECV", [this, &output_vstream, first, net_live_track, - &shutdown_event](){ - return run_output_vstream(output_vstream, first, net_live_track, shutdown_event, m_overall_latency_meter); - })); - first = false; + if (m_params.mode == InferenceMode::RAW_ASYNC_SINGLE_THREAD) { + return run_single_thread_async_infer(shutdown_event, net_live_track); + } else { + auto threads = start_inference_threads(shutdown_event, net_live_track); + CHECK_EXPECTED_AS_STATUS(threads); + + CHECK_SUCCESS(shutdown_event->wait(HAILO_INFINITE_TIMEOUT)); + stop(); + return wait_for_threads(threads.value()); } +} - //TODO: return threads and move stop outside? - CHECK_SUCCESS(shutdown_event.wait(HAILO_INFINITE_TIMEOUT)); - stop(); - return wait_for_threads(threads); +void NetworkRunner::set_overall_latency_meter(LatencyMeterPtr latency_meter) +{ + m_overall_latency_meter = latency_meter; } -void NetworkRunner::stop() +void NetworkRunner::set_latency_barrier(BarrierPtr latency_barrier) { - for (auto &input_vstream : m_input_vstreams) { - (void) input_vstream.abort(); - } - for (auto &output_vstream : m_output_vstreams) { - (void) output_vstream.abort(); - } + m_latency_barrier = latency_barrier; } Expected, std::vector>> NetworkRunner::create_vstreams( @@ -277,8 +275,7 @@ Expected, std::vector>> Netwo if (elem_it != params.end()) { input_vstreams_params.emplace(input_vstream_info.name, elem_it->second); match_count++; - } - else { + } else { input_vstreams_params.emplace(input_vstream_info.name, HailoRTDefaults::get_vstreams_params()); } } @@ -306,4 +303,276 @@ Expected, std::vector>> Netwo CHECK_EXPECTED(output_vstreams); return {{input_vstreams.release(), output_vstreams.release()}};//TODO: move? copy elision? +} + +const std::vector NetworkRunner::ALLOWED_INFERENCE_RETURN_VALUES{ + {HAILO_SUCCESS, HAILO_STREAM_ABORTED_BY_USER, HAILO_SHUTDOWN_EVENT_SIGNALED} +}; + +FullNetworkRunner::FullNetworkRunner(const NetworkParams ¶ms, const std::string &name, VDevice &vdevice, + std::vector &&input_vstreams, std::vector &&output_vstreams, + std::shared_ptr cng) : + NetworkRunner(params, name, vdevice, cng), + m_input_vstreams(std::move(input_vstreams)), + m_output_vstreams(std::move(output_vstreams)) +{ +} + +Expected>> FullNetworkRunner::start_inference_threads(EventPtr shutdown_event, + std::shared_ptr net_live_track) +{ + std::vector> threads; + for (auto &input_vstream : m_input_vstreams) { + const auto vstream_params = get_params(input_vstream.name()); + auto writer = WriterWrapper::create(input_vstream, vstream_params, m_overall_latency_meter, + m_params.framerate); + CHECK_EXPECTED(writer); + + threads.emplace_back(std::make_unique>("WRITE", + [this, writer = writer.release(), shutdown_event]() mutable { + return run_write(writer, shutdown_event, m_latency_barrier); + })); + } + + bool first = true; //TODO: check with multiple outputs + for (auto &output_vstream : m_output_vstreams) { + auto reader = ReaderWrapper::create(output_vstream, m_overall_latency_meter, + first ? net_live_track : nullptr); + CHECK_EXPECTED(reader); + + threads.emplace_back(std::make_unique>("READ", + [this, reader=reader.release(), shutdown_event]() mutable { + return run_read(reader, shutdown_event, m_latency_barrier); + })); + first = false; + } + + return threads; +} + +void FullNetworkRunner::stop() +{ + for (auto &input_vstream : m_input_vstreams) { + (void) input_vstream.abort(); + } + for (auto &output_vstream : m_output_vstreams) { + (void) output_vstream.abort(); + } +} + +std::set FullNetworkRunner::get_input_names() +{ + std::set result; + + for (const auto &vstream : m_input_vstreams) { + result.insert(vstream.name()); + } + + return result; +} + +std::set FullNetworkRunner::get_output_names() +{ + std::set result; + + for (const auto &vstream : m_output_vstreams) { + result.insert(vstream.name()); + } + + return result; +} + +VStreamParams FullNetworkRunner::get_params(const std::string &name) +{ + for (const auto ¶ms : m_params.vstream_params) { + if (name == params.name) { + return params; + } + } + return VStreamParams(); +} + +RawNetworkRunner::RawNetworkRunner(const NetworkParams ¶ms, const std::string &name, VDevice &vdevice, + InputStreamRefVector &&input_streams, OutputStreamRefVector &&output_streams, + std::shared_ptr cng) : + NetworkRunner(params, name, vdevice, cng), + m_input_streams(std::move(input_streams)), + m_output_streams(std::move(output_streams)) +{ +} + +Expected>> RawNetworkRunner::start_inference_threads(EventPtr shutdown_event, + std::shared_ptr net_live_track) +{ + const bool async_streams = (m_params.is_async()); + std::vector> threads; + for (auto &input_stream : m_input_streams) { + const auto stream_params = get_params(input_stream.get().name()); + auto writer = WriterWrapper::create(input_stream.get(), stream_params, m_overall_latency_meter, + m_params.framerate); + CHECK_EXPECTED(writer); + + if (async_streams) { + threads.emplace_back(std::make_unique>("WRITE_ASYNC", + [this, writer = writer.release(), shutdown_event]() mutable { + return run_write_async(writer, shutdown_event, m_latency_barrier); + })); + } else { + threads.emplace_back(std::make_unique>("WRITE", + [this, writer = writer.release(), shutdown_event]() mutable { + return run_write(writer, shutdown_event, m_latency_barrier); + })); + } + } + + bool first = true; //TODO: check with multiple outputs + for (auto &output_stream : m_output_streams) { + auto reader = ReaderWrapper::create(output_stream.get(), m_overall_latency_meter, + first ? net_live_track : nullptr); + CHECK_EXPECTED(reader); + + if (async_streams) { + threads.emplace_back(std::make_unique>("READ_ASYNC", + [this, reader=reader.release(), shutdown_event]() mutable { + return run_read_async(reader, shutdown_event, m_latency_barrier); + })); + } else { + threads.emplace_back(std::make_unique>("READ", + [this, reader=reader.release(), shutdown_event]() mutable { + return run_read(reader, shutdown_event, m_latency_barrier); + })); + } + first = false; + } + + return threads; +} + +hailo_status RawNetworkRunner::run_single_thread_async_infer(EventPtr shutdown_event, + std::shared_ptr net_live_track) +{ + // Build output wrappers + std::vector> reader_wrappers; + std::vector output_semaphores; + bool is_first_output = true; + for (auto &output_stream : m_output_streams) { + auto reader_wrapper = ReaderWrapper::create(output_stream.get(), m_overall_latency_meter, + is_first_output ? net_live_track : nullptr); + CHECK_EXPECTED_AS_STATUS(reader_wrapper); + is_first_output = false; + + auto max_queue_size = reader_wrapper.value()->get().get_async_max_queue_size(); + CHECK_EXPECTED_AS_STATUS(max_queue_size); + + auto semaphore = Semaphore::create_shared(static_cast(*max_queue_size)); + CHECK_NOT_NULL(semaphore, HAILO_OUT_OF_HOST_MEMORY); + + output_semaphores.emplace_back(semaphore); + reader_wrappers.emplace_back(reader_wrapper.release()); + } + + // Build input wrappers + std::vector> writer_wrappers; + std::vector input_semaphores; + for (auto &input_stream : m_input_streams) { + auto writer_wrapper = WriterWrapper::create(input_stream.get(), + get_params(input_stream.get().name()), m_overall_latency_meter, m_params.framerate); + CHECK_EXPECTED_AS_STATUS(writer_wrapper); + + auto max_queue_size = writer_wrapper.value()->get().get_async_max_queue_size(); + CHECK_EXPECTED_AS_STATUS(max_queue_size); + + auto semaphore = Semaphore::create_shared(static_cast(*max_queue_size)); + CHECK_NOT_NULL(semaphore, HAILO_OUT_OF_HOST_MEMORY); + + input_semaphores.emplace_back(semaphore); + writer_wrappers.emplace_back(writer_wrapper.release()); + } + + // Build waitables list with reference to previous input/output semaphores. + // We put output semaphores before inputs because we want to always have place to write + // the data into. It also makes sure that the framerate throttle will work properly. + const size_t shutdown_index = 0; + const size_t output_index_start = shutdown_index + 1; + const size_t input_index_start = output_index_start + output_semaphores.size(); + + std::vector> waitables; + waitables.emplace_back(std::ref(*shutdown_event)); + auto add_to_waitables = [&waitables](const SemaphorePtr &sem) { waitables.emplace_back(std::ref(*sem)); }; + std::for_each(output_semaphores.begin(), output_semaphores.end(), add_to_waitables); + std::for_each(input_semaphores.begin(), input_semaphores.end(), add_to_waitables); + WaitableGroup wait_group(std::move(waitables)); + + // Inference + while (true) { + auto wait_index = wait_group.wait_any(HAILORTCLI_DEFAULT_TIMEOUT); + CHECK_EXPECTED_AS_STATUS(wait_index); + + if (*wait_index == shutdown_index) { + // Stopping the network so we won't get timeout on the flush. The async operations may still be active + // (until network deactivation). + stop(); + break; + } else if ((*wait_index >= output_index_start) && (*wait_index < input_index_start)) { + // output is ready + const size_t output_index = *wait_index - output_index_start; + auto status = reader_wrappers[output_index]->read_async( + [semaphore=output_semaphores[output_index]](const OutputStream::CompletionInfo &) { + (void)semaphore->signal(); + } + ); + CHECK_SUCCESS(status); + } else { + // input is ready + const size_t input_index = *wait_index - input_index_start; + auto status = writer_wrappers[input_index]->write_async( + [semaphore=input_semaphores[input_index]](const InputStream::CompletionInfo &) { + (void)semaphore->signal(); + } + ); + CHECK_SUCCESS(status); + } + } + + return HAILO_SUCCESS; +} + +void RawNetworkRunner::stop() +{ + for (auto &input_stream : m_input_streams) { + (void) input_stream.get().abort(); + } + for (auto &output_stream : m_output_streams) { + (void) output_stream.get().abort(); + } +} + +std::set RawNetworkRunner::get_input_names() +{ + std::set result; + for (const auto &stream : m_input_streams) { + result.insert(stream.get().name()); + } + + return result; +} + +std::set RawNetworkRunner::get_output_names() +{ + std::set result; + for (const auto &stream : m_output_streams) { + result.insert(stream.get().name()); + } + + return result; +} + +StreamParams RawNetworkRunner::get_params(const std::string &name) +{ + for (const auto ¶ms : m_params.stream_params) { + if (name == params.name) { + return params; + } + } + return StreamParams(); } \ No newline at end of file diff --git a/hailort/hailortcli/run2/network_runner.hpp b/hailort/hailortcli/run2/network_runner.hpp index dda0651..5eafec0 100644 --- a/hailort/hailortcli/run2/network_runner.hpp +++ b/hailort/hailortcli/run2/network_runner.hpp @@ -10,7 +10,15 @@ #ifndef _HAILO_HAILORTCLI_RUN2_NETWORK_RUNNER_HPP_ #define _HAILO_HAILORTCLI_RUN2_NETWORK_RUNNER_HPP_ +#include "io_wrappers.hpp" +#include "live_stats.hpp" +#include "network_live_track.hpp" + +#include "../hailortcli.hpp" + #include "common/barrier.hpp" +#include "common/async_thread.hpp" +#include "common/event_internal.hpp" #include "hailo/vdevice.hpp" #include "hailo/vstream.hpp" @@ -19,25 +27,44 @@ #include "hailo/expected.hpp" #include "hailo/buffer.hpp" -#include "../hailortcli.hpp" - -#include "live_printer.hpp" -#include "network_live_track.hpp" - #include #include -constexpr uint32_t UNLIMITED_FRAMERATE = 0; +using namespace hailort; + +constexpr std::chrono::milliseconds SYNC_EVENT_TIMEOUT(1000); + -struct VStreamParams +enum class InferenceMode { + FULL, + + RAW, + RAW_ASYNC, + RAW_ASYNC_SINGLE_THREAD, +}; + +struct IoParams { - VStreamParams(); + IoParams(); std::string name; - hailo_vstream_params_t params; std::string input_file_path; }; +struct VStreamParams : public IoParams +{ + VStreamParams(); + + hailo_vstream_params_t params; +}; + +struct StreamParams : public IoParams +{ + StreamParams(); + + hailo_stream_flags_t flags; +}; + struct NetworkParams { NetworkParams(); @@ -45,6 +72,7 @@ struct NetworkParams std::string hef_path; std::string net_group_name; std::vector vstream_params; + std::vector stream_params; hailo_scheduling_algorithm_t scheduling_algorithm; // Network parameters @@ -58,35 +86,274 @@ struct NetworkParams bool measure_hw_latency; bool measure_overall_latency; + InferenceMode mode; + + bool is_async() const + { + return (mode == InferenceMode::RAW_ASYNC) || (mode == InferenceMode::RAW_ASYNC_SINGLE_THREAD); + } +}; + +class SignalEventScopeGuard final +{ +public: + SignalEventScopeGuard(Event &event); + ~SignalEventScopeGuard(); + +private: + Event &m_event; +}; + +class BarrierTerminateScopeGuard final +{ +public: + BarrierTerminateScopeGuard(BarrierPtr barrier); + ~BarrierTerminateScopeGuard(); + +private: + BarrierPtr m_barrier; }; class NetworkRunner { public: + static Expected> create_shared(VDevice &vdevice, const NetworkParams ¶ms); + NetworkRunner(const NetworkParams ¶ms, const std::string &name, - std::vector &&input_vstreams, std::vector &&output_vstreams, - std::shared_ptr cng, hailort::LatencyMeterPtr overall_latency_meter); - static hailort::Expected> create_shared(hailort::VDevice &vdevice, const NetworkParams ¶ms); - hailo_status run(hailort::Event &shutdown_event, LivePrinter &live_printer, hailort::Barrier &barrier); - void stop(); + VDevice &vdevice, std::shared_ptr cng); + virtual ~NetworkRunner() = default; -private: - static hailort::Expected, std::vector>> create_vstreams( - hailort::ConfiguredNetworkGroup &net_group, const std::map ¶ms); - hailo_status run_input_vstream(hailort::InputVStream &vstream, hailort::Event &shutdown_event, hailort::BufferPtr dataset, - hailort::LatencyMeterPtr overall_latency_meter); - static hailo_status run_output_vstream(hailort::OutputVStream &vstream, bool first, std::shared_ptr net_live_track, - hailort::Event &shutdown_event, hailort::LatencyMeterPtr overall_latency_meter); + hailo_status run(EventPtr shutdown_event, LiveStats &live_stats, Barrier &activation_barrier); + virtual void stop() = 0; + // Must be called prior to run + void set_overall_latency_meter(LatencyMeterPtr latency_meter); + void set_latency_barrier(BarrierPtr latency_barrier); -static hailort::Expected create_constant_dataset(const hailort::InputVStream &input_vstream); -static hailort::Expected create_dataset_from_input_file(const std::string &file_path, const hailort::InputVStream &input_vstream); +protected: + static bool inference_succeeded(hailo_status status); + // Use 'inference_succeeded(async_thread->get())' to check for a thread's success + virtual Expected>> start_inference_threads(EventPtr shutdown_event, + std::shared_ptr net_live_track) = 0; + virtual hailo_status run_single_thread_async_infer(EventPtr shutdown_event, + std::shared_ptr net_live_track) = 0; - const NetworkParams &m_params;//TODO: copy instead of ref? + virtual std::set get_input_names() = 0; + virtual std::set get_output_names() = 0; + + static Expected, std::vector>> create_vstreams( + ConfiguredNetworkGroup &net_group, const std::map ¶ms); + + template + hailo_status run_write(WriterWrapperPtr writer, EventPtr shutdown_event, + std::shared_ptr latency_barrier) + { + auto latency_barrier_scope_guard = BarrierTerminateScopeGuard(latency_barrier); + auto signal_event_scope_guard = SignalEventScopeGuard(*shutdown_event); + + while (true) { + if (latency_barrier) { + latency_barrier->arrive_and_wait(); + } + + for (auto i = 0; i < m_params.batch_size; i++) { + auto status = writer->write(); + if (status == HAILO_STREAM_ABORTED_BY_USER) { + return status; + } + CHECK_SUCCESS(status); + } + } + return HAILO_SUCCESS; + } + + template + hailo_status run_write_async(WriterWrapperPtr writer, EventPtr shutdown_event, + std::shared_ptr latency_barrier) + { + auto latency_barrier_scope_guard = BarrierTerminateScopeGuard(latency_barrier); + auto signal_event_scope_guard = SignalEventScopeGuard(*shutdown_event); + + // When measuring latency we want to send one frame at a time (to avoid back-pressure) + // sync_event will be used to send one frame at a time + EventPtr sync_event = nullptr; + if (m_params.measure_hw_latency || m_params.measure_overall_latency) { + sync_event = Event::create_shared(Event::State::not_signalled); + CHECK_NOT_NULL(sync_event, HAILO_OUT_OF_HOST_MEMORY); + } + + while (true) { + if (latency_barrier) { + latency_barrier->arrive_and_wait(); + } + + for (auto i = 0; i < m_params.batch_size; i++) { + auto status = writer->wait_for_async_ready(); + if (status == HAILO_STREAM_ABORTED_BY_USER) { + return status; + } + CHECK_SUCCESS(status); + + status = writer->write_async( + [sync_event](const typename Writer::CompletionInfo &) { + if (sync_event) { + (void)sync_event->signal(); + } + }); + if (status == HAILO_STREAM_ABORTED_BY_USER) { + return status; + } + CHECK_SUCCESS(status); + + if (m_params.measure_hw_latency || m_params.measure_overall_latency) { + status = WaitOrShutdown(sync_event, shutdown_event).wait(SYNC_EVENT_TIMEOUT); + if (HAILO_SHUTDOWN_EVENT_SIGNALED == status) { + // Don't print an error for this + return status; + } + CHECK_SUCCESS(status); + status = sync_event->reset(); + CHECK_SUCCESS(status); + } + } + } + return HAILO_SUCCESS; + } + + template + hailo_status run_read(ReaderWrapperPtr reader, EventPtr shutdown_event, + std::shared_ptr latency_barrier) + { + auto latency_barrier_scope_guard = BarrierTerminateScopeGuard(latency_barrier); + auto signal_event_scope_guard = SignalEventScopeGuard(*shutdown_event); + + while (true) { + if (latency_barrier) { + latency_barrier->arrive_and_wait(); + } + + for (auto i = 0; i < m_params.batch_size; i++) { + auto status = reader->read(); + if (status == HAILO_STREAM_ABORTED_BY_USER) { + return status; + } + CHECK_SUCCESS(status); + } + } + return HAILO_SUCCESS; + } + + template + hailo_status run_read_async(ReaderWrapperPtr reader, EventPtr shutdown_event, + std::shared_ptr latency_barrier) + { + auto latency_barrier_scope_guard = BarrierTerminateScopeGuard(latency_barrier); + auto signal_event_scope_guard = SignalEventScopeGuard(*shutdown_event); + + // When measuring latency we want to send one frame at a time (to avoid back-pressure) + // sync_event will be used to send one frame at a time + EventPtr sync_event = nullptr; + if (m_params.measure_hw_latency || m_params.measure_overall_latency) { + sync_event = Event::create_shared(Event::State::not_signalled); + CHECK_NOT_NULL(sync_event, HAILO_OUT_OF_HOST_MEMORY); + } + + while (true) { + if (latency_barrier) { + latency_barrier->arrive_and_wait(); + } + + for (auto i = 0; i < m_params.batch_size; i++) { + auto status = reader->wait_for_async_ready(); + if (status == HAILO_STREAM_ABORTED_BY_USER) { + return status; + } + CHECK_SUCCESS(status); + + status = reader->read_async( + [sync_event](const typename Reader::CompletionInfo &) { + if (sync_event) { + (void)sync_event->signal(); + } + }); + if (status == HAILO_STREAM_ABORTED_BY_USER) { + return status; + } + CHECK_SUCCESS(status); + + if (m_params.measure_hw_latency || m_params.measure_overall_latency) { + status = WaitOrShutdown(sync_event, shutdown_event).wait(SYNC_EVENT_TIMEOUT); + if (HAILO_SHUTDOWN_EVENT_SIGNALED == status) { + // Don't print an error for this + return status; + } + CHECK_SUCCESS(status); + status = sync_event->reset(); + CHECK_SUCCESS(status); + } + } + } + return HAILO_SUCCESS; + } + + VDevice &m_vdevice; + const NetworkParams m_params; std::string m_name; - std::vector m_input_vstreams; - std::vector m_output_vstreams; - std::shared_ptr m_cng; - hailort::LatencyMeterPtr m_overall_latency_meter; + std::shared_ptr m_cng; + LatencyMeterPtr m_overall_latency_meter; + BarrierPtr m_latency_barrier; + +private: + static const std::vector ALLOWED_INFERENCE_RETURN_VALUES; + static hailo_status wait_for_threads(std::vector> &threads); + static Expected create_constant_dataset(size_t size); + static Expected create_dataset_from_input_file(const std::string &file_path, size_t size); +}; + +class FullNetworkRunner : public NetworkRunner +{ +public: + FullNetworkRunner(const NetworkParams ¶ms, const std::string &name, VDevice &vdevice, + std::vector &&input_vstreams, std::vector &&output_vstreams, + std::shared_ptr cng); + + virtual Expected>> start_inference_threads(EventPtr shutdown_event, + std::shared_ptr net_live_track) override; + virtual hailo_status run_single_thread_async_infer(EventPtr, std::shared_ptr) override + { + return HAILO_NOT_IMPLEMENTED; + }; + + virtual void stop() override; + virtual std::set get_input_names() override; + virtual std::set get_output_names() override; + VStreamParams get_params(const std::string &name); + +private: + std::vector m_input_vstreams; + std::vector m_output_vstreams; +}; + +class RawNetworkRunner : public NetworkRunner +{ +public: + RawNetworkRunner(const NetworkParams ¶ms, const std::string &name, VDevice &vdevice, + InputStreamRefVector &&input_streams, OutputStreamRefVector &&output_streams, + std::shared_ptr cng); + + virtual Expected>> start_inference_threads(EventPtr shutdown_event, + std::shared_ptr net_live_track) override; + + virtual hailo_status run_single_thread_async_infer(EventPtr shutdown_event, + std::shared_ptr net_live_track) override; + + virtual void stop() override; + virtual std::set get_input_names() override; + virtual std::set get_output_names() override; + StreamParams get_params(const std::string &name); + +private: + InputStreamRefVector m_input_streams; + OutputStreamRefVector m_output_streams; }; #endif /* _HAILO_HAILORTCLI_RUN2_NETWORK_RUNNER_HPP_ */ \ No newline at end of file diff --git a/hailort/hailortcli/run2/run2_command.cpp b/hailort/hailortcli/run2/run2_command.cpp index e4c1070..3d8cf98 100644 --- a/hailort/hailortcli/run2/run2_command.cpp +++ b/hailort/hailortcli/run2/run2_command.cpp @@ -8,13 +8,14 @@ **/ #include "run2_command.hpp" -#include "live_printer.hpp" +#include "live_stats.hpp" #include "timer_live_track.hpp" #include "measurement_live_track.hpp" #include "network_runner.hpp" #include "common/barrier.hpp" #include "common/async_thread.hpp" +#include "../common.hpp" #include "hailo/vdevice.hpp" #include "hailo/hef.hpp" @@ -73,32 +74,102 @@ std::vector VStreamNameValidator::get_values(const std::string &hef return names; } +class StreamNameValidator : public CLI::Validator { + public: + StreamNameValidator(const CLI::Option *hef_path_option, const CLI::Option *net_group_name_option); +private: + static std::vector get_values(const std::string &hef_path, const std::string &net_group_name); +}; + +StreamNameValidator::StreamNameValidator(const CLI::Option *hef_path_option, const CLI::Option *net_group_name_option) : Validator("STREAM") { + func_ = [](std::string&) { + //TODO: support? + return std::string(); + }; + autocomplete_func_ = [hef_path_option, net_group_name_option](const std::string&) { + // TODO: remove existing names from prev user input + return get_values(hef_path_option->as(), net_group_name_option->as()); + }; +} + +std::vector StreamNameValidator::get_values(const std::string &hef_path, const std::string &net_group_name) +{ + auto hef = Hef::create(hef_path); + if (!hef.has_value()) { + return {}; + } + + // TODO: duplicate + auto actual_net_group_name = net_group_name; + if (actual_net_group_name.empty()) { + auto net_groups_names = hef->get_network_groups_names(); + if (net_groups_names.size() != 1) { + return {}; + } + actual_net_group_name = net_groups_names[0]; + } + + auto streams_info = hef->get_all_stream_infos(actual_net_group_name); + if (!streams_info.has_value()) { + return {}; + } + + std::vector names; + for (auto &stream_info : streams_info.value()) { + names.emplace_back(stream_info.name); + } + return names; +} + +IoApp::IoApp(const std::string &description, const std::string &name, Type type) : + CLI::App(description, name), + m_type(type), + m_vstream_params(), + m_stream_params() +{ +} + +IoApp::Type IoApp::get_type() const +{ + return m_type; +} + +const VStreamParams &IoApp::get_vstream_params() const +{ + // TODO: instead of copy do a move + call reset()? change func name to move_params? same for NetworkParams/NetworkApp class + return m_vstream_params; +} + +const StreamParams &IoApp::get_stream_params() const +{ + // TODO: instead of copy do a move + call reset()? change func name to move_params? same for NetworkParams/NetworkApp class + return m_stream_params; +} + /** VStreamApp */ -class VStreamApp : public CLI::App +class VStreamApp : public IoApp { public: VStreamApp(const std::string &description, const std::string &name, CLI::Option *hef_path_option, CLI::Option *net_group_name_option); - const VStreamParams& get_params(); private: CLI::Option* add_flag_callback(CLI::App *app, const std::string &name, const std::string &description, std::function function); - - VStreamParams m_params; }; VStreamApp::VStreamApp(const std::string &description, const std::string &name, CLI::Option *hef_path_option, - CLI::Option *net_group_name_option) : CLI::App(description, name), m_params() + CLI::Option *net_group_name_option) : + IoApp(description, name, IoApp::Type::VSTREAM) { - add_option("name", m_params.name, "vStream name") + add_option("name", m_vstream_params.name, "vStream name") ->check(VStreamNameValidator(hef_path_option, net_group_name_option)); - add_option("--input-file", m_params.input_file_path, + add_option("--input-file", m_vstream_params.input_file_path, "Input file path. If not given, random data will be used. File format should be raw binary data with size that is a factor of the input shape size") ->default_val(""); auto format_opt_group = add_option_group("Format"); - format_opt_group->add_option("--type", m_params.params.user_buffer_format.type, "Format type") + format_opt_group->add_option("--type", m_vstream_params.params.user_buffer_format.type, "Format type") ->transform(HailoCheckedTransformer({ { "auto", HAILO_FORMAT_TYPE_AUTO }, { "uint8", HAILO_FORMAT_TYPE_UINT8 }, @@ -107,7 +178,7 @@ VStreamApp::VStreamApp(const std::string &description, const std::string &name, })) ->default_val("auto"); - format_opt_group->add_option("--order", m_params.params.user_buffer_format.order, "Format order") + format_opt_group->add_option("--order", m_vstream_params.params.user_buffer_format.order, "Format order") ->transform(HailoCheckedTransformer({ { "auto", HAILO_FORMAT_ORDER_AUTO }, { "nhwc", HAILO_FORMAT_ORDER_NHWC }, @@ -130,27 +201,50 @@ VStreamApp::VStreamApp(const std::string &description, const std::string &name, add_flag_callback(format_opt_group, "-q,--quantized,!--no-quantized", "Whether or not data is quantized", [this](bool result){ - m_params.params.user_buffer_format.flags = result ? - static_cast(m_params.params.user_buffer_format.flags | HAILO_FORMAT_FLAGS_QUANTIZED) : - static_cast(m_params.params.user_buffer_format.flags & (~HAILO_FORMAT_FLAGS_QUANTIZED));}) + m_vstream_params.params.user_buffer_format.flags = result ? + static_cast(m_vstream_params.params.user_buffer_format.flags | HAILO_FORMAT_FLAGS_QUANTIZED) : + static_cast(m_vstream_params.params.user_buffer_format.flags & (~HAILO_FORMAT_FLAGS_QUANTIZED));}) ->run_callback_for_default() ->default_val(true); // default_val() must be after run_callback_for_default() } -const VStreamParams& VStreamApp::get_params() +CLI::Option* VStreamApp::add_flag_callback(CLI::App *app, const std::string &name, const std::string &description, + std::function function) { - //TODO: instead of copy do a move + call reset()? change func name to move_params? same for NetworkParams/NetworkApp class - return m_params; + // get_option doesn't support multiple names so taking the first one + auto first_name = name.substr(0, name.find(',')); + auto wrap_function = [app, function, first_name](std::int64_t){function(app->get_option(first_name)->as());}; + return app->add_flag_function(name, wrap_function, description); } -CLI::Option* VStreamApp::add_flag_callback(CLI::App *app, const std::string &name, const std::string &description, - std::function function) - { - // get_option doesn't support multiple names so taking the first one - auto first_name = name.substr(0, name.find(',')); - auto wrap_function = [app, function, first_name](std::int64_t){function(app->get_option(first_name)->as());}; - return app->add_flag_function(name, wrap_function, description); - } +/** StreamApp */ +class StreamApp : public IoApp +{ +public: + StreamApp(const std::string &description, const std::string &name, CLI::Option *hef_path_option, CLI::Option *net_group_name_option); +}; + +StreamApp::StreamApp(const std::string &description, const std::string &name, CLI::Option *hef_path_option, + CLI::Option *net_group_name_option) : + IoApp(description, name, IoApp::Type::STREAM) +{ + add_option("name", m_stream_params.name, "Stream name") + ->check(StreamNameValidator(hef_path_option, net_group_name_option)); + + add_option("--input-file", m_stream_params.input_file_path, + "Input file path. If not given, random data will be used. File format should be raw binary data with size that is a factor of the input shape size") + ->default_val(""); + + // TODO: async option (HRT-9580) + // TODO: flag callback? + // add_flag_callback(format_opt_group, "-q,--quantized,!--no-quantized", "Whether or not data is quantized", + // [this](bool result){ + // m_params.params.user_buffer_format.flags = result ? + // static_cast(m_params.params.user_buffer_format.flags | HAILO_FORMAT_FLAGS_QUANTIZED) : + // static_cast(m_params.params.user_buffer_format.flags & (~HAILO_FORMAT_FLAGS_QUANTIZED));}) + // ->run_callback_for_default() + // ->default_val(true); // default_val() must be after run_callback_for_default() +} /** NetworkGroupNameValidator */ class NetworkGroupNameValidator : public CLI::Validator { @@ -173,18 +267,9 @@ NetworkGroupNameValidator::NetworkGroupNameValidator(const CLI::Option *hef_path } /** NetworkApp */ -class NetworkApp : public CLI::App -{ -public: - NetworkApp(const std::string &description, const std::string &name); - const NetworkParams& get_params(); - -private: - void add_vstream_app_subcom(CLI::Option *hef_path_option, CLI::Option *net_group_name_option); - NetworkParams m_params; -}; - -NetworkApp::NetworkApp(const std::string &description, const std::string &name) : CLI::App(description, name), m_params() +NetworkApp::NetworkApp(const std::string &description, const std::string &name) : + CLI::App(description, name), + m_params() { auto hef_path_option = add_option("hef", m_params.hef_path, "HEF file path")->check(CLI::ExistingFile); auto net_group_name_option = add_option("--name", m_params.net_group_name, "Network group name") @@ -204,34 +289,11 @@ NetworkApp::NetworkApp(const std::string &description, const std::string &name) // TODO: support multiple scheduling algorithms m_params.scheduling_algorithm = HAILO_SCHEDULING_ALGORITHM_ROUND_ROBIN; - add_vstream_app_subcom(hef_path_option, net_group_name_option); -} - -void NetworkApp::add_vstream_app_subcom(CLI::Option *hef_path_option, CLI::Option *net_group_name_option) -{ - auto vstream_app = std::make_shared("Set vStream", "set-vstream", hef_path_option, net_group_name_option); - vstream_app->immediate_callback(); - vstream_app->callback([this, vstream_app, hef_path_option, net_group_name_option]() { - m_params.vstream_params.push_back(vstream_app->get_params()); - - // Throw an error if anything is left over and should not be. - _process_extras(); - - // NOTE: calling "net_app->clear(); m_params = NetworkParams();" is not sufficient because default values - // need to be re-set. we can override clear and reset them but there might be other issues as well - // and this one feels less hacky ATM - remove_subcommand(vstream_app.get()); - // Remove from parsed_subcommands_ as well (probably a bug in CLI11) - parsed_subcommands_.erase(std::remove_if( - parsed_subcommands_.begin(), parsed_subcommands_.end(), - [vstream_app](auto x){return x == vstream_app.get();}), - parsed_subcommands_.end()); - add_vstream_app_subcom(hef_path_option, net_group_name_option); - }); - - // Must set fallthrough to support nested repeated subcommands. - vstream_app->fallthrough(); - add_subcommand(vstream_app); + auto vstream_subcommand = add_io_app_subcom("Set vStream", "set-vstream", hef_path_option, net_group_name_option); + auto stream_subcommand = add_io_app_subcom("Set Stream", "set-stream", hef_path_option, net_group_name_option); + // TODO: doesn't seam to be working (HRT-9886) + vstream_subcommand->excludes(stream_subcommand); + stream_subcommand->excludes(vstream_subcommand); } const NetworkParams& NetworkApp::get_params() @@ -252,16 +314,23 @@ public: bool get_measure_power(); bool get_measure_current(); bool get_measure_temp(); + bool get_measure_hw_latency(); + bool get_measure_overall_latency(); bool get_multi_process_service(); const std::string &get_group_id(); + InferenceMode get_mode() const; + const std::string &get_output_json_path(); void set_scheduling_algorithm(hailo_scheduling_algorithm_t scheduling_algorithm); + void set_inference_mode(); void set_measure_latency(); private: void add_net_app_subcom(); std::vector m_network_params; uint32_t m_time_to_run; + InferenceMode m_mode; + std::string m_stats_json_path; std::vector m_device_id; uint32_t m_device_count; bool m_multi_process_service; @@ -282,6 +351,17 @@ Run2::Run2() : CLI::App("Run networks (preview)", "run2") add_option("-t,--time-to-run", m_time_to_run, "Time to run (seconds)") ->default_val(DEFAULT_TIME_TO_RUN_SECONDS) ->check(CLI::PositiveNumber); + add_option("-m,--mode", m_mode, "Inference mode") + ->transform(HailoCheckedTransformer({ + { "full", InferenceMode::FULL }, + { "raw", InferenceMode::RAW }, + { "raw_async", InferenceMode::RAW_ASYNC }, + { "raw_async_single_thread", InferenceMode::RAW_ASYNC_SINGLE_THREAD, OptionVisibility::HIDDEN } + }))->default_val("full"); + static const char *JSON_SUFFIX = ".json"; + add_option("-j,--json", m_stats_json_path, "If set save statistics as json to the specified path") + ->default_val("") + ->check(FileSuffixValidator(JSON_SUFFIX)); auto vdevice_options_group = add_option_group("VDevice Options"); @@ -303,13 +383,13 @@ Run2::Run2() : CLI::App("Run networks (preview)", "run2") auto measure_power_opt = measurement_options_group->add_flag("--measure-power", m_measure_power, "Measure power consumption") ->default_val(false); - + measurement_options_group->add_flag("--measure-current", m_measure_current, "Measure current")->excludes(measure_power_opt) ->default_val(false); - measurement_options_group->add_flag("--measure-latency", m_measure_hw_latency, "Measure network latency") + measurement_options_group->add_flag("--measure-latency", m_measure_hw_latency, "Measure network latency on the NN core") ->default_val(false); - + measurement_options_group->add_flag("--measure-overall-latency", m_measure_overall_latency, "Measure overall latency measurement") ->default_val(false); @@ -341,6 +421,7 @@ void Run2::add_net_app_subcom() // NOTE: fallthrough() is not a must here but it is also not working (causing only a single vstream param // instead of >1). Debug - App.hpp::void _parse(std::vector &args) add_subcommand(net_app); + // TODO: set _autocomplete based on m_mode (HRT-9886) } const std::vector& Run2::get_network_params() @@ -368,6 +449,16 @@ bool Run2::get_measure_temp() return m_measure_temp; } +bool Run2::get_measure_hw_latency() +{ + return m_measure_hw_latency; +} + +bool Run2::get_measure_overall_latency() +{ + return m_measure_overall_latency; +} + std::vector Run2::get_dev_ids() { std::vector res; @@ -386,6 +477,13 @@ uint32_t Run2::get_device_count() return m_device_count; } +void Run2::set_inference_mode() +{ + for (auto ¶ms : m_network_params) { + params.mode = m_mode; + } +} + void Run2::set_scheduling_algorithm(hailo_scheduling_algorithm_t scheduling_algorithm) { for (auto ¶ms: m_network_params) { @@ -395,7 +493,7 @@ void Run2::set_scheduling_algorithm(hailo_scheduling_algorithm_t scheduling_algo void Run2::set_measure_latency() { - for (auto ¶ms: m_network_params) { + for (auto ¶ms : m_network_params) { params.measure_hw_latency = m_measure_hw_latency; params.measure_overall_latency = m_measure_overall_latency; } @@ -411,6 +509,15 @@ const std::string &Run2::get_group_id() return m_group_id; } +InferenceMode Run2::get_mode() const +{ + return m_mode; +} + +const std::string &Run2::get_output_json_path() +{ + return m_stats_json_path; +} /** Run2Command */ Run2Command::Run2Command(CLI::App &parent_app) : Command(parent_app.add_subcommand(std::make_shared())) @@ -437,10 +544,27 @@ bool is_valid_ip(const std::string &ip) IS_FIT_IN_UINT8(a) && IS_FIT_IN_UINT8(b) && IS_FIT_IN_UINT8(c) && IS_FIT_IN_UINT8(d); } +std::string get_str_infer_mode(const InferenceMode& infer_mode) +{ + switch(infer_mode){ + case InferenceMode::FULL: + return "full"; + case InferenceMode::RAW: + return "raw"; + case InferenceMode::RAW_ASYNC: + return "raw_async"; + case InferenceMode::RAW_ASYNC_SINGLE_THREAD: + return "raw_async_single_thread"; + } + + return ""; +} + hailo_status Run2Command::execute() { Run2 *app = reinterpret_cast(m_app); + app->set_inference_mode(); app->set_measure_latency(); if (0 == app->get_network_params().size()) { @@ -450,8 +574,12 @@ hailo_status Run2Command::execute() if (1 == app->get_network_params().size()) { LOGGER__WARN("\"hailortcli run2\" is in preview. It is recommended to use \"hailortcli run\" command for a single network group"); } + if (app->get_measure_hw_latency() || app->get_measure_overall_latency()) { + CHECK(1 == app->get_network_params().size(), HAILO_INVALID_OPERATION, "When latency measurement is enabled, only one model is allowed"); + LOGGER__WARN("Measuring latency; frames are sent one at a time and FPS will not be measured"); + } - hailo_vdevice_params_t vdevice_params = {}; + hailo_vdevice_params_t vdevice_params{}; CHECK_SUCCESS(hailo_init_vdevice_params(&vdevice_params)); auto dev_ids = app->get_dev_ids(); if (!dev_ids.empty()) { @@ -467,6 +595,12 @@ hailo_status Run2Command::execute() } else { vdevice_params.device_count = app->get_device_count(); } + // TODO: Async stream support for scheduler (HRT-9878) + if ((app->get_mode() == InferenceMode::RAW_ASYNC) || (app->get_mode() == InferenceMode::RAW_ASYNC_SINGLE_THREAD)) { + vdevice_params.scheduling_algorithm = HAILO_SCHEDULING_ALGORITHM_NONE; + CHECK(1 == app->get_network_params().size(), HAILO_INVALID_OPERATION, "Only one model is allowed with aw async inference mode"); + app->set_scheduling_algorithm(HAILO_SCHEDULING_ALGORITHM_NONE); + } vdevice_params.group_id = app->get_group_id().c_str(); vdevice_params.multi_process_service = app->get_multi_process_service(); @@ -482,40 +616,51 @@ hailo_status Run2Command::execute() net_runners.emplace_back(net_runner.release()); } - auto live_printer = std::make_unique(std::chrono::seconds(1)); - live_printer->add(std::make_shared(app->get_time_to_run()), 0); + auto live_stats = std::make_unique(std::chrono::seconds(1)); + + live_stats->add(std::make_shared(app->get_time_to_run()), 0); + + auto shutdown_event = Event::create_shared(Event::State::not_signalled); + CHECK_NOT_NULL(shutdown_event, HAILO_OUT_OF_HOST_MEMORY); - auto shutdown_event = Event::create(Event::State::not_signalled); - CHECK_EXPECTED_AS_STATUS(shutdown_event); std::vector> threads; - Barrier barrier(net_runners.size() + 1); // We wait for all nets to finish activation + this thread to start sampling + Barrier activation_barrier(net_runners.size() + 1); // We wait for all nets to finish activation + this thread to start sampling for (auto &net_runner : net_runners) { threads.emplace_back(std::make_unique>("NG_INFER", [&net_runner, &shutdown_event, - &live_printer, &barrier](){ - return net_runner->run(shutdown_event.value(), *live_printer, barrier); + &live_stats, &activation_barrier](){ + return net_runner->run(shutdown_event, *live_stats, activation_barrier); })); } + auto signal_event_scope_guard = SignalEventScopeGuard(*shutdown_event); + auto physical_devices = vdevice.value()->get_physical_devices(); CHECK_EXPECTED_AS_STATUS(physical_devices); for (auto &device : physical_devices.value()) { auto measurement_live_track = MeasurementLiveTrack::create_shared(device.get(), app->get_measure_power(), app->get_measure_current(), app->get_measure_temp()); + if (HAILO_SUCCESS != measurement_live_track.status()) { + activation_barrier.terminate(); + } CHECK_EXPECTED_AS_STATUS(measurement_live_track); - live_printer->add(measurement_live_track.release(), 2); + + live_stats->add(measurement_live_track.release(), 2); } // TODO: wait for all nets before starting timer. start() should update TimerLiveTrack to start. or maybe append here but first in vector... - barrier.arrive_and_wait(); - CHECK_SUCCESS(live_printer->start()); + activation_barrier.arrive_and_wait(); + CHECK_SUCCESS(live_stats->start()); auto status = shutdown_event->wait(app->get_time_to_run()); if (HAILO_TIMEOUT != status) { // if shutdown_event is signaled its because one of the send/recv threads failed LOGGER__ERROR("Encountered error during inference. See log for more information."); } - live_printer.reset(); // Ensures that the final print will include real values and not with values of when streams are already aborted. + if (!app->get_output_json_path().empty()){ + live_stats->dump_stats(app->get_output_json_path(), get_str_infer_mode(app->get_mode())); + } + live_stats.reset(); // Ensures that the final print will include real values and not with values of when streams are already aborted. shutdown_event->signal(); return wait_for_threads(threads); } \ No newline at end of file diff --git a/hailort/hailortcli/run2/run2_command.hpp b/hailort/hailortcli/run2/run2_command.hpp index e569deb..015fe8c 100644 --- a/hailort/hailortcli/run2/run2_command.hpp +++ b/hailort/hailortcli/run2/run2_command.hpp @@ -11,6 +11,10 @@ #define _HAILO_HAILORTCLI_RUN2_RUN2_COMMAND_HPP_ #include "../command.hpp" +#include "network_runner.hpp" + +#include + class Run2Command : public Command { public: @@ -20,4 +24,71 @@ public: private: }; -#endif /* _HAILO_HAILORTCLI_RUN2_RUN2_COMMAND_HPP_ */ \ No newline at end of file +class IoApp : public CLI::App +{ +public: + enum class Type { + STREAM, + VSTREAM + }; + + IoApp(const std::string &description, const std::string &name, Type type); + Type get_type() const; + const VStreamParams& get_vstream_params() const; + const StreamParams& get_stream_params() const; + +protected: + Type m_type; + VStreamParams m_vstream_params; + StreamParams m_stream_params; +}; + +class NetworkApp : public CLI::App +{ +public: + NetworkApp(const std::string &description, const std::string &name); + const NetworkParams& get_params(); + +private: + template + CLI::App *add_io_app_subcom(const std::string &description, const std::string &name, + CLI::Option *hef_path_option, CLI::Option *net_group_name_option) + { + static_assert(std::is_base_of::value, "T is not a subclass of IoApp"); + + auto io_app = std::make_shared(description, name, hef_path_option, net_group_name_option); + io_app->immediate_callback(); + io_app->callback([this, description, name, io_app, hef_path_option, net_group_name_option]() { + if (io_app->get_type() == IoApp::Type::VSTREAM) { + auto vstream_params = io_app->get_vstream_params(); + m_params.vstream_params.push_back(vstream_params); + } else { + auto stream_params = io_app->get_stream_params(); + m_params.stream_params.push_back(stream_params); + } + + // Throw an error if anything is left over and should not be. + _process_extras(); + + // NOTE: calling "net_app->clear(); m_params = NetworkParams();" is not sufficient because default values + // need to be re-set. we can override clear and reset them but there might be other issues as well + // and this one feels less hacky ATM + remove_subcommand(io_app.get()); + // Remove from parsed_subcommands_ as well (probably a bug in CLI11) + parsed_subcommands_.erase(std::remove_if( + parsed_subcommands_.begin(), parsed_subcommands_.end(), + [io_app](auto x){return x == io_app.get();}), + parsed_subcommands_.end()); + add_io_app_subcom(description, name, hef_path_option, net_group_name_option); + }); + + // Must set fallthrough to support nested repeated subcommands. + io_app->fallthrough(); + return add_subcommand(io_app); + } + + NetworkParams m_params; +}; + + +#endif /* _HAILO_HAILORTCLI_RUN2_RUN2_COMMAND_HPP_ */ diff --git a/hailort/hailortcli/run2/timer_live_track.cpp b/hailort/hailortcli/run2/timer_live_track.cpp index 05fd73d..3367082 100644 --- a/hailort/hailortcli/run2/timer_live_track.cpp +++ b/hailort/hailortcli/run2/timer_live_track.cpp @@ -13,23 +13,18 @@ #include TimerLiveTrack::TimerLiveTrack(std::chrono::milliseconds duration) : - LivePrinter::Track(), m_duration(duration), m_start_time() + LiveStats::Track(), m_duration(duration), m_start_time() { } -hailo_status TimerLiveTrack::start() +hailo_status TimerLiveTrack::start_impl() { m_start_time = std::chrono::steady_clock::now(); - m_started = true; - return HAILO_SUCCESS; } -uint32_t TimerLiveTrack::get_text(std::stringstream &ss) +uint32_t TimerLiveTrack::push_text_impl(std::stringstream &ss) { - if (!m_started) { - return 0; - } static const uint32_t MAX_PROGRESS_BAR_WIDTH = 20; auto elapsed_time = std::chrono::steady_clock::now() - m_start_time; auto eta = std::chrono::seconds(std::max(0, static_cast(std::round(std::chrono::duration(m_duration - elapsed_time).count())))); // std::chrono::round is from C++17 @@ -39,4 +34,11 @@ uint32_t TimerLiveTrack::get_text(std::stringstream &ss) ss << fmt::format("[{:=>{}}{:{}}] {:>3}% {}\n", '>', progress_bar_width, "", MAX_PROGRESS_BAR_WIDTH - progress_bar_width, elapsed_percentage, CliCommon::duration_to_string(eta)); return 1; +} + +void TimerLiveTrack::push_json_impl(nlohmann::ordered_json &json) +{ + std::stringstream time_to_run; + time_to_run << std::fixed << std::setprecision(2) << std::round(std::chrono::duration(m_duration).count()) << " seconds"; + json["time_to_run"] = time_to_run.str(); } \ No newline at end of file diff --git a/hailort/hailortcli/run2/timer_live_track.hpp b/hailort/hailortcli/run2/timer_live_track.hpp index af6e7e7..836b692 100644 --- a/hailort/hailortcli/run2/timer_live_track.hpp +++ b/hailort/hailortcli/run2/timer_live_track.hpp @@ -7,18 +7,19 @@ * @brief Timer live track **/ -#include "live_printer.hpp" +#include "live_stats.hpp" #ifndef _HAILO_HAILORTCLI_RUN2_TIMER_LIVE_TRACK_HPP_ #define _HAILO_HAILORTCLI_RUN2_TIMER_LIVE_TRACK_HPP_ -class TimerLiveTrack : public LivePrinter::Track +class TimerLiveTrack : public LiveStats::Track { public: TimerLiveTrack(std::chrono::milliseconds duration); virtual ~TimerLiveTrack() = default; - virtual hailo_status start() override; - virtual uint32_t get_text(std::stringstream &ss) override; + virtual hailo_status start_impl() override; + virtual uint32_t push_text_impl(std::stringstream &ss) override; + virtual void push_json_impl(nlohmann::ordered_json &json) override; private: std::chrono::milliseconds m_duration; diff --git a/hailort/hailortcli/run_command.hpp b/hailort/hailortcli/run_command.hpp index e00199e..502911d 100644 --- a/hailort/hailortcli/run_command.hpp +++ b/hailort/hailortcli/run_command.hpp @@ -133,7 +133,7 @@ public: }; desc_function_ = []() { - return "\t\tInput file path/paths. On single input network, give the full path of the data file.\n\ + return "\t\tInput file (.bin) path/paths. On single input network, give the full path of the data file.\n\ \t\tOn multiple inputs network, the format is input_name1=path1 input_name2=path2, where\n\ \t\tinput_name1 is the name of the input stream. If not given, random data will be used"; }; diff --git a/hailort/libhailort/CMakeLists.txt b/hailort/libhailort/CMakeLists.txt index 3993b61..504348b 100644 --- a/hailort/libhailort/CMakeLists.txt +++ b/hailort/libhailort/CMakeLists.txt @@ -2,7 +2,7 @@ cmake_minimum_required(VERSION 3.0.0) # set(CMAKE_C_CLANG_TIDY "clang-tidy;-checks=*") set(HAILORT_MAJOR_VERSION 4) -set(HAILORT_MINOR_VERSION 13) +set(HAILORT_MINOR_VERSION 14) set(HAILORT_REVISION_VERSION 0) # Add the cmake folder so the modules there are found diff --git a/hailort/libhailort/bindings/gstreamer/CMakeLists.txt b/hailort/libhailort/bindings/gstreamer/CMakeLists.txt index e0c06c9..dde4203 100644 --- a/hailort/libhailort/bindings/gstreamer/CMakeLists.txt +++ b/hailort/libhailort/bindings/gstreamer/CMakeLists.txt @@ -8,7 +8,7 @@ if(NOT CMAKE_HOST_UNIX) message(FATAL_ERROR "Only unix hosts are supported, stopping build") endif() -find_package(HailoRT 4.13.0 EXACT REQUIRED) +find_package(HailoRT 4.14.0 EXACT REQUIRED) # GST_PLUGIN_DEFINE needs PACKAGE to be defined set(GST_HAILO_PACKAGE_NAME "hailo") @@ -36,6 +36,12 @@ set_property(TARGET gsthailo PROPERTY CXX_STANDARD 14) set_target_properties(gsthailo PROPERTIES PUBLIC_HEADER "gst-hailo/metadata/tensor_meta.hpp" + CXX_STANDARD 14 + CXX_STANDARD_REQUIRED YES + CXX_EXTENSIONS NO + C_VISIBILITY_PRESET hidden + CXX_VISIBILITY_PRESET hidden + # VISIBILITY_INLINES_HIDDEN YES ) target_compile_options(gsthailo PRIVATE diff --git a/hailort/libhailort/bindings/gstreamer/gst-hailo/common.hpp b/hailort/libhailort/bindings/gstreamer/gst-hailo/common.hpp index 73d126e..ee8d5a4 100644 --- a/hailort/libhailort/bindings/gstreamer/gst-hailo/common.hpp +++ b/hailort/libhailort/bindings/gstreamer/gst-hailo/common.hpp @@ -48,7 +48,7 @@ using namespace hailort; #define DEFAULT_VDEVICE_KEY (0) #define MIN_VALID_VDEVICE_KEY (1) -#define HAILO_SUPPORTED_FORMATS "{ RGB, RGBA, YUY2, NV12, NV21, I420 }" +#define HAILO_SUPPORTED_FORMATS "{ RGB, RGBA, YUY2, NV12, NV21, I420, GRAY8 }" #define HAILO_VIDEO_CAPS GST_VIDEO_CAPS_MAKE(HAILO_SUPPORTED_FORMATS) #define HAILO_DEFAULT_SCHEDULER_TIMEOUT_MS (0) diff --git a/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailonet.cpp b/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailonet.cpp index 7b4f755..77ce6bd 100644 --- a/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailonet.cpp +++ b/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailonet.cpp @@ -102,6 +102,7 @@ enum PROP_SCHEDULING_ALGORITHM, PROP_SCHEDULER_TIMEOUT_MS, PROP_SCHEDULER_THRESHOLD, + PROP_SCHEDULER_PRIORITY, PROP_MULTI_PROCESS_SERVICE, PROP_INPUT_QUANTIZED, PROP_OUTPUT_QUANTIZED, @@ -187,6 +188,10 @@ static void gst_hailonet_class_init(GstHailoNetClass *klass) g_object_class_install_property(gobject_class, PROP_SCHEDULER_THRESHOLD, g_param_spec_uint("scheduler-threshold", "Frames threshold for scheduler", "The minimum number of send requests required before the hailonet is considered ready to get run time from the scheduler.", HAILO_DEFAULT_SCHEDULER_THRESHOLD, std::numeric_limits::max(), HAILO_DEFAULT_SCHEDULER_THRESHOLD, (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); + g_object_class_install_property(gobject_class, PROP_SCHEDULER_PRIORITY, + g_param_spec_uint("scheduler-priority", "Priority index for scheduler", "When the scheduler will choose the next hailonet to run, higher priority will be prioritized in the selection. " + "Bigger number represent higher priority", + HAILO_SCHEDULER_PRIORITY_MIN, HAILO_SCHEDULER_PRIORITY_MAX, HAILO_SCHEDULER_PRIORITY_NORMAL, (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); g_object_class_install_property(gobject_class, PROP_MULTI_PROCESS_SERVICE, g_param_spec_boolean("multi-process-service", "Should run over HailoRT service", "Controls wether to run HailoRT over its service. " "To use this property, the service should be active and scheduling-algorithm should be set. Defaults to false.", @@ -474,7 +479,7 @@ void HailoNetImpl::set_property(GObject *object, guint property_id, const GValue break; case PROP_SCHEDULER_TIMEOUT_MS: if (m_was_configured) { - g_warning("The network was already configured so changing the scheduling algorithm will not take place!"); + g_warning("The network was already configured so changing the scheduling timeout will not take place!"); break; } if (m_props.m_is_active.was_changed()) { @@ -485,7 +490,7 @@ void HailoNetImpl::set_property(GObject *object, guint property_id, const GValue break; case PROP_SCHEDULER_THRESHOLD: if (m_was_configured) { - g_warning("The network was already configured so changing the scheduling algorithm will not take place!"); + g_warning("The network was already configured so changing the scheduling threshold will not take place!"); break; } if (m_props.m_is_active.was_changed()) { @@ -494,6 +499,17 @@ void HailoNetImpl::set_property(GObject *object, guint property_id, const GValue } m_props.m_scheduler_threshold = g_value_get_uint(value); break; + case PROP_SCHEDULER_PRIORITY: + if (m_was_configured) { + g_warning("The network was already configured so changing the scheduling priority will not take place!"); + break; + } + if (m_props.m_is_active.was_changed()) { + g_error("scheduler usage (scheduler-priority) in combination with 'is-active' is not supported."); + break; + } + m_props.m_scheduler_priority = static_cast(g_value_get_uint(value)); + break; case PROP_MULTI_PROCESS_SERVICE: if (m_was_configured) { g_warning("The network was already configured so changing the multi-process-service property will not take place!"); @@ -596,6 +612,9 @@ void HailoNetImpl::get_property(GObject *object, guint property_id, GValue *valu case PROP_SCHEDULER_THRESHOLD: g_value_set_uint(value, m_props.m_scheduler_threshold.get()); break; + case PROP_SCHEDULER_PRIORITY: + g_value_set_uint(value, m_props.m_scheduler_priority.get()); + break; case PROP_MULTI_PROCESS_SERVICE: g_value_set_boolean(value, m_props.m_multi_process_service.get()); break; @@ -696,6 +715,10 @@ hailo_status HailoNetImpl::configure_network_group() status = m_net_group_handle->set_scheduler_threshold(m_props.m_network_name.get(), m_props.m_scheduler_threshold.get()); GST_CHECK_SUCCESS(status, m_element, RESOURCE, "Setting scheduler threshold failed, status = %d", status); } + if (m_props.m_scheduler_priority.was_changed()) { + status = m_net_group_handle->set_scheduler_priority(m_props.m_network_name.get(), m_props.m_scheduler_priority.get()); + GST_CHECK_SUCCESS(status, m_element, RESOURCE, "Setting scheduler priority failed, status = %d", status); + } auto vstreams = m_net_group_handle->create_vstreams(m_props.m_network_name.get(), m_props.m_scheduling_algorithm.get(), m_output_formats, static_cast(m_props.m_input_quantized.get()), static_cast(m_props.m_output_quantized.get()), m_props.m_input_format_type.get(), m_props.m_output_format_type.get()); diff --git a/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailonet.hpp b/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailonet.hpp index 0c3e6f8..2840eb8 100644 --- a/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailonet.hpp +++ b/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailonet.hpp @@ -53,7 +53,7 @@ struct HailoNetProperties final public: HailoNetProperties() : m_device_id(nullptr), m_hef_path(nullptr), m_network_name(nullptr), m_batch_size(HAILO_DEFAULT_BATCH_SIZE), m_is_active(false), m_device_count(0), m_vdevice_key(DEFAULT_VDEVICE_KEY), m_scheduling_algorithm(HAILO_SCHEDULING_ALGORITHM_ROUND_ROBIN), - m_scheduler_timeout_ms(HAILO_DEFAULT_SCHEDULER_TIMEOUT_MS), m_scheduler_threshold(HAILO_DEFAULT_SCHEDULER_THRESHOLD), + m_scheduler_timeout_ms(HAILO_DEFAULT_SCHEDULER_TIMEOUT_MS), m_scheduler_threshold(HAILO_DEFAULT_SCHEDULER_THRESHOLD), m_scheduler_priority(HAILO_SCHEDULER_PRIORITY_NORMAL), m_multi_process_service(HAILO_DEFAULT_MULTI_PROCESS_SERVICE), m_input_quantized(true), m_output_quantized(true), m_input_format_type(HAILO_FORMAT_TYPE_AUTO), m_output_format_type(HAILO_FORMAT_TYPE_AUTO) @@ -69,6 +69,7 @@ public: HailoElemProperty m_scheduling_algorithm; HailoElemProperty m_scheduler_timeout_ms; HailoElemProperty m_scheduler_threshold; + HailoElemProperty m_scheduler_priority; HailoElemProperty m_multi_process_service; HailoElemProperty m_input_quantized; HailoElemProperty m_output_quantized; diff --git a/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailorecv.cpp b/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailorecv.cpp index e7955da..322545a 100644 --- a/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailorecv.cpp +++ b/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailorecv.cpp @@ -234,9 +234,11 @@ hailo_status HailoRecvImpl::read_from_vstreams(bool should_print_latency) std::chrono::duration latency = std::chrono::system_clock::now() - start_time; GST_DEBUG("%s latency: %f milliseconds", output_info.vstream().name().c_str(), latency.count()); } - GST_CHECK_SUCCESS(status, m_element, STREAM, "Reading from vstream failed, status = %d", status); - gst_buffer_unmap(*buffer, &buffer_info); + if (HAILO_STREAM_ABORTED_BY_USER == status) { + return status; + } + GST_CHECK_SUCCESS(status, m_element, STREAM, "Reading from vstream failed, status = %d", status); } if (should_print_latency) { diff --git a/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailosend.cpp b/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailosend.cpp index 184886c..c04927b 100644 --- a/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailosend.cpp +++ b/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailosend.cpp @@ -30,6 +30,7 @@ GST_DEBUG_CATEGORY_STATIC(gst_hailosend_debug_category); #define GST_CAT_DEFAULT gst_hailosend_debug_category #define RGB_FEATURES_SIZE (3) #define RGBA_FEATURES_SIZE (4) +#define GRAY8_FEATURES_SIZE (1) #define YUY2_FEATURES_SIZE (2) #define NV12_FEATURES_SIZE (3) #define NV21_FEATURES_SIZE (3) @@ -65,7 +66,7 @@ static void gst_hailosend_class_init(GstHailoSendClass *klass) gst_pad_template_new("sink", GST_PAD_SINK, GST_PAD_ALWAYS, gst_caps_from_string(HAILO_VIDEO_CAPS))); gst_element_class_set_static_metadata(GST_ELEMENT_CLASS(klass), - "hailosend element", "Hailo/Filter/Video", "Send RGB/RGBA/YUY2/NV12/NV21/I420 video to HailoRT", PLUGIN_AUTHOR); + "hailosend element", "Hailo/Filter/Video", "Send RGB/RGBA/GRAY8/YUY2/NV12/NV21/I420 video to HailoRT", PLUGIN_AUTHOR); element_class->change_state = GST_DEBUG_FUNCPTR(gst_hailosend_change_state); @@ -212,15 +213,28 @@ GstCaps *HailoSendImpl::get_caps(GstBaseTransform */*trans*/, GstPadDirection /* format = "RGBA"; break; } + else if (m_input_vstream_infos[0].shape.features == GRAY8_FEATURES_SIZE) + { + format = "GRAY8"; + break; + } /* Fallthrough */ case HAILO_FORMAT_ORDER_NHCW: case HAILO_FORMAT_ORDER_FCR: case HAILO_FORMAT_ORDER_F8CR: - format = "RGB"; - GST_CHECK(RGB_FEATURES_SIZE == m_input_vstream_infos[0].shape.features, NULL, m_element, STREAM, - "Features of input vstream %s is not %d for RGB format! (features=%d)", m_input_vstream_infos[0].name, RGB_FEATURES_SIZE, - m_input_vstream_infos[0].shape.features); - break; + if (m_input_vstream_infos[0].shape.features == GRAY8_FEATURES_SIZE) + { + format = "GRAY8"; + break; + } + else + { + format = "RGB"; + GST_CHECK(RGB_FEATURES_SIZE == m_input_vstream_infos[0].shape.features, NULL, m_element, STREAM, + "Features of input vstream %s is not %d for RGB format! (features=%d)", m_input_vstream_infos[0].name, RGB_FEATURES_SIZE, + m_input_vstream_infos[0].shape.features); + break; + } case HAILO_FORMAT_ORDER_YUY2: format = "YUY2"; GST_CHECK(YUY2_FEATURES_SIZE == m_input_vstream_infos[0].shape.features, NULL, m_element, STREAM, diff --git a/hailort/libhailort/bindings/gstreamer/gst-hailo/network_group_handle.cpp b/hailort/libhailort/bindings/gstreamer/gst-hailo/network_group_handle.cpp index fb0aecd..83f075a 100644 --- a/hailort/libhailort/bindings/gstreamer/gst-hailo/network_group_handle.cpp +++ b/hailort/libhailort/bindings/gstreamer/gst-hailo/network_group_handle.cpp @@ -180,6 +180,11 @@ hailo_status NetworkGroupHandle::set_scheduler_threshold(const char *network_nam return m_cng->set_scheduler_threshold(threshold, network_name); } +hailo_status NetworkGroupHandle::set_scheduler_priority(const char *network_name, uint8_t priority) +{ + return m_cng->set_scheduler_priority(priority, network_name); +} + Expected, std::vector>> NetworkGroupHandle::create_vstreams(const char *network_name, hailo_scheduling_algorithm_t scheduling_algorithm, const std::vector &output_formats, bool input_quantized, bool output_quantized, hailo_format_type_t input_format_type, hailo_format_type_t output_format_type) @@ -294,10 +299,10 @@ Expected> NetworkGroupConfigManager::con std::shared_ptr found_cng = get_configured_network_group(device_id, hef->hash(), network_group_name, batch_size); if (nullptr != found_cng) { - // If cng was already configured auto infos = found_cng->get_network_infos(); GST_CHECK_EXPECTED(infos, element, RESOURCE, "Failed getting network infos"); if ((infos.release().size() > 1) || (scheduling_algorithm == HAILO_SCHEDULING_ALGORITHM_NONE)) { + // If cng was already configured // But hailonet is not running all networks in the cng (or if not using scheduler) - // Do not use multiplexer! return found_cng; diff --git a/hailort/libhailort/bindings/gstreamer/gst-hailo/network_group_handle.hpp b/hailort/libhailort/bindings/gstreamer/gst-hailo/network_group_handle.hpp index 665d0ac..0a4fabf 100644 --- a/hailort/libhailort/bindings/gstreamer/gst-hailo/network_group_handle.hpp +++ b/hailort/libhailort/bindings/gstreamer/gst-hailo/network_group_handle.hpp @@ -90,7 +90,7 @@ public: hailo_status set_scheduler_timeout(const char *network_name, uint32_t timeout_ms); hailo_status set_scheduler_threshold(const char *network_name, uint32_t threshold); - + hailo_status set_scheduler_priority(const char *network_name, uint8_t priority); std::shared_ptr hef() { diff --git a/hailort/libhailort/bindings/python/CMakeLists.txt b/hailort/libhailort/bindings/python/CMakeLists.txt index febd4f0..ffdcfc9 100644 --- a/hailort/libhailort/bindings/python/CMakeLists.txt +++ b/hailort/libhailort/bindings/python/CMakeLists.txt @@ -1 +1,4 @@ +cmake_minimum_required(VERSION 3.11.0) + +include(externals/pybind11.cmake) add_subdirectory(src) diff --git a/hailort/libhailort/bindings/python/externals/pybind11.cmake b/hailort/libhailort/bindings/python/externals/pybind11.cmake new file mode 100644 index 0000000..db0b705 --- /dev/null +++ b/hailort/libhailort/bindings/python/externals/pybind11.cmake @@ -0,0 +1,35 @@ +cmake_minimum_required(VERSION 3.11.0) + +include(FetchContent) + +if(NOT PYTHON_EXECUTABLE AND PYBIND11_PYTHON_VERSION) + # venv version is prioritized (instead of PYBIND11_PYTHON_VERSION) if PYTHON_EXECUTABLE is not set. + # See https://pybind11.readthedocs.io/en/stable/changelog.html#v2-6-0-oct-21-2020 + if((${CMAKE_VERSION} VERSION_LESS "3.22.0") AND (NOT WIN32)) + find_package(PythonInterp ${PYBIND11_PYTHON_VERSION} REQUIRED) + set(PYTHON_EXECUTABLE ${Python_EXECUTABLE}) + else() + find_package(Python3 ${PYBIND11_PYTHON_VERSION} REQUIRED EXACT COMPONENTS Interpreter Development) + set(PYTHON_EXECUTABLE ${Python3_EXECUTABLE}) + endif() +endif() + +FetchContent_Declare( + pybind11 + GIT_REPOSITORY https://github.com/pybind/pybind11.git + GIT_TAG 80dc998efced8ceb2be59756668a7e90e8bef917 # Version 2.10.1 + #GIT_SHALLOW TRUE + SOURCE_DIR "${CMAKE_CURRENT_LIST_DIR}/pybind11" + BINARY_DIR "${CMAKE_CURRENT_LIST_DIR}/pybind11" +) + +if(NOT HAILO_OFFLINE_COMPILATION) + # https://stackoverflow.com/questions/65527126/disable-install-for-fetchcontent + FetchContent_GetProperties(pybind11) + if(NOT pybind11_POPULATED) + FetchContent_Populate(pybind11) + add_subdirectory(${pybind11_SOURCE_DIR} ${pybind11_BINARY_DIR} EXCLUDE_FROM_ALL) + endif() +else() + add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/pybind11 EXCLUDE_FROM_ALL) +endif() \ No newline at end of file diff --git a/hailort/libhailort/bindings/python/platform/hailo_platform/__init__.py b/hailort/libhailort/bindings/python/platform/hailo_platform/__init__.py index 18d66a1..ed8ac13 100644 --- a/hailort/libhailort/bindings/python/platform/hailo_platform/__init__.py +++ b/hailort/libhailort/bindings/python/platform/hailo_platform/__init__.py @@ -26,7 +26,7 @@ from hailo_platform.pyhailort.pyhailort import (HEF, ConfigureParams, InputVStreams, OutputVStreams, InferVStreams, HailoStreamDirection, HailoFormatFlags, HailoCpuId, Device, VDevice, DvmTypes, PowerMeasurementTypes, SamplingPeriod, AveragingFactor, MeasurementBufferIndex, - HailoRTException, YOLOv5PostProcessOp, HailoSchedulingAlgorithm) + HailoRTException, HailoSchedulingAlgorithm, HailoRTStreamAbortedByUser) def _verify_pyhailort_lib_exists(): python_version = "".join(str(i) for i in sys.version_info[:2]) @@ -62,4 +62,4 @@ __all__ = ['EthernetDevice', 'DvmTypes', 'PowerMeasurementTypes', 'MipiIspImageInOrder', 'MipiIspImageOutDataType', 'join_drivers_path', 'IspLightFrequency', 'HailoPowerMode', 'Endianness', 'HailoStreamInterface', 'InputVStreamParams', 'OutputVStreamParams', 'InputVStreams', 'OutputVStreams', 'InferVStreams', 'HailoStreamDirection', 'HailoFormatFlags', 'HailoCpuId', - 'Device', 'VDevice', 'HailoRTException', 'YOLOv5PostProcessOp', 'HailoSchedulingAlgorithm'] + 'Device', 'VDevice', 'HailoRTException', 'HailoSchedulingAlgorithm', 'HailoRTStreamAbortedByUser'] diff --git a/hailort/libhailort/bindings/python/platform/hailo_platform/pyhailort/control_object.py b/hailort/libhailort/bindings/python/platform/hailo_platform/pyhailort/control_object.py index a1f5095..4b472bb 100644 --- a/hailort/libhailort/bindings/python/platform/hailo_platform/pyhailort/control_object.py +++ b/hailort/libhailort/bindings/python/platform/hailo_platform/pyhailort/control_object.py @@ -2,6 +2,7 @@ """Control operations for the Hailo hardware device.""" +from hailo_platform.common.logger.logger import default_logger from hailo_platform.pyhailort.pyhailort import (Control, InternalPcieDevice, ExceptionWrapper, BoardInformation, # noqa F401 CoreInformation, DeviceArchitectureTypes, ExtendedDeviceInformation, # noqa F401 HealthInformation, SamplingPeriod, AveragingFactor, DvmTypes, # noqa F401 @@ -38,8 +39,7 @@ class UdpHcpControl(HcpControl): """ # In the C API we define the total amount of attempts, instead of the amount of retries. - # TODO: HRT-9987 - Add this deprecation warning - # default_logger().warning("UdpHcpControl is deprecated! Please Use Control object") + default_logger().warning("UdpHcpControl is deprecated! Please Use Control object") max_number_of_attempts = retries + 1 response_timeout_milliseconds = int(response_timeout_seconds * 1000) if device is None: @@ -57,8 +57,8 @@ class PcieHcpControl(HcpControl): def __init__(self, device=None, device_info=None): """Initializes a new HailoPcieController object.""" - # TODO: HRT-9987 - Add this deprecation warning - # default_logger().warning("PcieHcpControl is deprecated! Please Use Control object") + + default_logger().warning("PcieHcpControl is deprecated! Please Use Control object") if device_info is None: device_info = InternalPcieDevice.scan_devices()[0] diff --git a/hailort/libhailort/bindings/python/platform/hailo_platform/pyhailort/hw_object.py b/hailort/libhailort/bindings/python/platform/hailo_platform/pyhailort/hw_object.py index 32483c5..bf078b7 100644 --- a/hailort/libhailort/bindings/python/platform/hailo_platform/pyhailort/hw_object.py +++ b/hailort/libhailort/bindings/python/platform/hailo_platform/pyhailort/hw_object.py @@ -27,8 +27,7 @@ class HailoHWObjectException(Exception): class HailoHWObject(object): - # TODO: HRT-9987 - Add (deprecated) to this doc - """Abstract Hailo hardware device representation""" + """Abstract Hailo hardware device representation (deprecated)""" NAME = InferenceTargets.UNINITIALIZED IS_HARDWARE = True @@ -44,8 +43,7 @@ class HailoHWObject(object): self._is_device_used = False self._hef_loaded = False - # TODO: HRT-9987 - Add this deprecation warning - # self._logger.warning("HailoHWObject is deprecated! Please use VDevice/Device object.") + self._logger.warning("HailoHWObject is deprecated! Please use VDevice/Device object.") # TODO: HRT-6310 Remove this. def __eq__(self, other): @@ -53,17 +51,15 @@ class HailoHWObject(object): @property def name(self): - """str: The name of this target. Valid values are defined by :class:`~hailo_platform.pyhailort.hw_object.InferenceTargets`""" - # TODO: HRT-9987 - Add this deprecation warning - # self._logger.warning("HailoHWObject name property is deprecated! Please use VDevice/Device object with device_id.") + """str: The name of this target. Valid values are defined by :class:`~hailo_platform.pyhailort.hw_object.InferenceTargets` (deprecated)""" + self._logger.warning("HailoHWObject name property is deprecated! Please use VDevice/Device object with device_id.") return type(self).NAME @property def is_hardware(self): - """bool: Indicates this target runs on a physical hardware device.""" + """bool: Indicates this target runs on a physical hardware device. (deprecated)""" # TODO: SDK should implement in Target - # TODO: HRT-9987 - Add this deprecation warning - # self._logger.warning("HailoHWObject is_hardware property is deprecated! Please use VDevice/Device object, or derive from it.") + self._logger.warning("HailoHWObject is_hardware property is deprecated! Please use VDevice/Device object, or derive from it.") return type(self).IS_HARDWARE @property @@ -76,46 +72,42 @@ class HailoHWObject(object): @property def sorted_output_layer_names(self): - """Getter for the property sorted_output_names. + """Getter for the property sorted_output_names (deprecated). Returns: list of str: Sorted list of the output layer names. """ - # TODO: HRT-9987 - Add this deprecation warning - # self._logger.warning("HailoHWObject sorted_output_layer_names property is deprecated! Please use ConfiguredNetwork get_sorted_output_names.") + self._logger.warning("HailoHWObject sorted_output_layer_names property is deprecated! Please use ConfiguredNetwork get_sorted_output_names.") if len(self._loaded_network_groups) != 1: raise HailoHWObjectException("Access to sorted_output_layer_names is only allowed when there is a single loaded network group") return self._loaded_network_groups[0].get_sorted_output_names() @contextmanager def use_device(self, *args, **kwargs): - # TODO: HRT-9987 - Add this deprecation warning and (deprecated) to this docs - # self._logger.warning("HailoHWObject use_device context manager is deprecated! Please use VDevice/Device object.") - """A context manager that wraps the usage of the device.""" + """A context manager that wraps the usage of the device. (deprecated)""" + self._logger.warning("HailoHWObject use_device context manager is deprecated! Please use VDevice/Device object.") self._is_device_used = True yield self._is_device_used = False def get_output_device_layer_to_original_layer_map(self): - """Get a mapping between the device outputs to the layers' names they represent. + """Get a mapping between the device outputs to the layers' names they represent (deprecated). Returns: dict: Keys are device output names and values are lists of layers' names. """ - # TODO: HRT-9987 - Add this deprecation warning and (deprecated) to this docs - # self._logger.warning("HailoHWObject get_output_device_layer_to_original_layer_map function is deprecated!") + self._logger.warning("HailoHWObject get_output_device_layer_to_original_layer_map function is deprecated!") if len(self._loaded_network_groups) != 1: raise HailoHWObjectException("Access to layer names is only allowed when there is a single loaded network group") return {stream_info.name : self._loaded_network_groups[0].get_vstream_names_from_stream_name(stream_info.name) for stream_info in self.get_output_stream_infos()} def get_original_layer_to_device_layer_map(self): - """Get a mapping between the layer names and the device outputs that contain them. + """Get a mapping between the layer names and the device outputs that contain them (deprecated). Returns: dict: Keys are the names of the layers and values are device outputs names. """ - # TODO: HRT-9987 - Add this deprecation warning and (deprecated) to this docs - # self._logger.warning("HailoHWObject get_original_layer_to_device_layer_map function is deprecated!") + self._logger.warning("HailoHWObject get_original_layer_to_device_layer_map function is deprecated!") if len(self._loaded_network_groups) != 1: raise HailoHWObjectException("Access to layer names is only allowed when there is a single loaded network group") return {vstream_info.name : self._loaded_network_groups[0].get_stream_names_from_vstream_name(vstream_info.name) @@ -123,69 +115,61 @@ class HailoHWObject(object): @property def device_input_layers(self): - """Get a list of the names of the device's inputs.""" - # TODO: HRT-9987 - Add this deprecation warning and (deprecated) to this docs - # self._logger.warning("HailoHWObject device_input_layers function is deprecated! Please use ConfiguredNetwork object.") + """Get a list of the names of the device's inputs. (deprecated)""" + self._logger.warning("HailoHWObject device_input_layers function is deprecated! Please use ConfiguredNetwork object.") return [layer.name for layer in self.get_input_stream_infos()] @property def device_output_layers(self): - """Get a list of the names of the device's outputs.""" - # TODO: HRT-9987 - Add this deprecation warning and (deprecated) to this docs - # self._logger.warning("HailoHWObject device_output_layers function is deprecated! Please use ConfiguredNetwork object.") + """Get a list of the names of the device's outputs. (deprecated)""" + self._logger.warning("HailoHWObject device_output_layers function is deprecated! Please use ConfiguredNetwork object.") return [layer.name for layer in self.get_output_stream_infos()] def hef_loaded(self): - """Return True if this object has loaded the model HEF to the hardware device.""" + """Return True if this object has loaded the model HEF to the hardware device. (deprecated)""" # TODO: SDK should implement in Target - # TODO: HRT-9987 - Add this deprecation warning and (deprecated) to this docs - # self._logger.warning("HailoHWObject hef_loaded function is deprecated! Please use VDevice/Device object, or derive from it.") + self._logger.warning("HailoHWObject hef_loaded function is deprecated! Please use VDevice/Device object, or derive from it.") return self._hef_loaded def outputs_count(self): """Return the amount of output tensors that are returned from the hardware device for every - input image. + input image (deprecated). """ - # TODO: HRT-9987 - Add this deprecation warning and (deprecated) to this docs - # self._logger.warning("HailoHWObject outputs_count function is deprecated! Please use ConfiguredNetwork object.") + self._logger.warning("HailoHWObject outputs_count function is deprecated! Please use ConfiguredNetwork object.") return len(self.get_output_vstream_infos()) def _clear_shapes(self): # TODO: SDK should implement in Target - # TODO: HRT-9987 - Add this deprecation warning and (deprecated) to this docs - # self._logger.warning("HailoHWObject _clear_shapes function is deprecated! Please use ConfiguredNetwork object.") + self._logger.warning("HailoHWObject _clear_shapes function is deprecated! Please use ConfiguredNetwork object.") self._hw_consts = None @property def model_name(self): - """Get the name of the current model. + """Get the name of the current model (deprecated). Returns: str: Model name. """ - # TODO: HRT-9987 - Add this deprecation warning and (deprecated) to this docs - # self._logger.warning("HailoHWObject model_name property is deprecated! Please use ConfiguredNetwork object.") + self._logger.warning("HailoHWObject model_name property is deprecated! Please use ConfiguredNetwork object.") if len(self._loaded_network_groups) == 1: return self._loaded_network_groups[0].name raise HailoHWObjectException( "This function is only supported when there is exactly 1 loaded network group. one should use HEF.get_network_group_names() / ConfiguredNetwork.name / ActivatedNetwork.name") def get_output_shapes(self): - """Get the model output shapes, as returned to the user (without any hardware padding). + """Get the model output shapes, as returned to the user (without any hardware padding) (deprecated). Returns: Tuple of output shapes, sorted by the output names. """ - # TODO: HRT-9987 - Add this deprecation warning and (deprecated) to this docs - # self._logger.warning("HailoHWObject get_output_shapes function is deprecated! Please use ConfiguredNetwork object.") + self._logger.warning("HailoHWObject get_output_shapes function is deprecated! Please use ConfiguredNetwork object.") if len(self._loaded_network_groups) != 1: raise HailoHWObjectException("Calling get_output_shapes is only allowed when there is a single loaded network group") return self._loaded_network_groups[0].get_output_shapes() class HailoChipObject(HailoHWObject): - # TODO: HRT-9987 - Add (deprecated) to this docs - """Hailo hardware device representation""" + """Hailo hardware device representation (deprecated)""" def __init__(self): """Create the Hailo Chip hardware object.""" @@ -208,17 +192,16 @@ class HailoChipObject(HailoHWObject): return self._control_object def get_all_input_layers_dtype(self): - """Get the model inputs dtype. + """Get the model inputs dtype (deprecated). Returns: dict of :obj:'numpy.dtype': where the key is model input_layer name, and the value is dtype as the device expect to get for this input. """ - # TODO: HRT-9987 - Add this deprecation warning and (deprecated) to this docs - # self._logger.warning("HailoChipObject get_all_input_layers_dtype function is deprecated! Please use ConfiguredNetwork object.") + self._logger.warning("HailoChipObject get_all_input_layers_dtype function is deprecated! Please use ConfiguredNetwork object.") return {stream.name: HailoRTTransformUtils.get_dtype(stream.data_bytes) for stream in self.get_input_stream_infos()} def get_input_vstream_infos(self, network_name=None): - """Get input vstreams information of a specific network group. + """Get input vstreams information of a specific network group (deprecated). Args: network_name (str, optional): The name of the network to access. In case not given, all the networks in the network group will be addressed. @@ -227,14 +210,13 @@ class HailoChipObject(HailoHWObject): If there is exactly one configured network group, returns a list of :obj:`hailo_platform.pyhailort._pyhailort.VStreamInfo`: with all the information objects of all input vstreams """ - # TODO: HRT-9987 - Add this deprecation warning and (deprecated) to this docs - # self._logger.warning("HailoChipObject get_input_vstream_infos function is deprecated! Please use ConfiguredNetwork object.") + self._logger.warning("HailoChipObject get_input_vstream_infos function is deprecated! Please use ConfiguredNetwork object.") if len(self._loaded_network_groups) != 1: raise HailoHWObjectException("Access to network vstream info is only allowed when there is a single loaded network group") return self._loaded_network_groups[0].get_input_vstream_infos(network_name=network_name) def get_output_vstream_infos(self, network_name=None): - """Get output vstreams information of a specific network group. + """Get output vstreams information of a specific network group (deprecated). Args: network_name (str, optional): The name of the network to access. In case not given, all the networks in the network group will be addressed. @@ -243,14 +225,13 @@ class HailoChipObject(HailoHWObject): If there is exactly one configured network group, returns a list of :obj:`hailo_platform.pyhailort._pyhailort.VStreamInfo`: with all the information objects of all output vstreams """ - # TODO: HRT-9987 - Add this deprecation warning and (deprecated) to this docs - # self._logger.warning("HailoChipObject get_output_vstream_infos function is deprecated! Please use ConfiguredNetwork object.") + self._logger.warning("HailoChipObject get_output_vstream_infos function is deprecated! Please use ConfiguredNetwork object.") if len(self._loaded_network_groups) != 1: raise HailoHWObjectException("Access to network vstream info is only allowed when there is a single loaded network group") return self._loaded_network_groups[0].get_output_vstream_infos(network_name=network_name) def get_all_vstream_infos(self, network_name=None): - """Get input and output vstreams information. + """Get input and output vstreams information (deprecated). Args: network_name (str, optional): The name of the network to access. In case not given, all the networks in the network group will be addressed. @@ -259,14 +240,13 @@ class HailoChipObject(HailoHWObject): If there is exactly one configured network group, returns a list of :obj:`hailo_platform.pyhailort._pyhailort.VStreamInfo`: with all the information objects of all input and output vstreams """ - # TODO: HRT-9987 - Add this deprecation warning and (deprecated) to this docs - # self._logger.warning("HailoChipObject get_all_vstream_infos function is deprecated! Please use ConfiguredNetwork object.") + self._logger.warning("HailoChipObject get_all_vstream_infos function is deprecated! Please use ConfiguredNetwork object.") if len(self._loaded_network_groups) != 1: raise HailoHWObjectException("Access to network vstream info is only allowed when there is a single loaded network group") return self._loaded_network_groups[0].get_all_vstream_infos(network_name=network_name) def get_input_stream_infos(self, network_name=None): - """Get the input low-level streams information of a specific network group. + """Get the input low-level streams information of a specific network group (deprecated). Args: network_name (str, optional): The name of the network to access. In case not given, all the networks in the network group will be addressed. @@ -276,14 +256,13 @@ class HailoChipObject(HailoHWObject): :obj:`hailo_platform.pyhailort._pyhailort.VStreamInfo`: with information objects of all input low-level streams. """ - # TODO: HRT-9987 - Add this deprecation warning and (deprecated) to this docs - # self._logger.warning("HailoChipObject get_input_stream_infos function is deprecated! Please use ConfiguredNetwork object.") + self._logger.warning("HailoChipObject get_input_stream_infos function is deprecated! Please use ConfiguredNetwork object.") if len(self._loaded_network_groups) != 1: raise HailoHWObjectException("Access to network stream info is only allowed when there is a single loaded network group") return self._loaded_network_groups[0].get_input_stream_infos(network_name=network_name) def get_output_stream_infos(self, network_name=None): - """Get the output low-level streams information of a specific network group. + """Get the output low-level streams information of a specific network group (deprecated). Args: network_name (str, optional): The name of the network to access. In case not given, all the networks in the network group will be addressed. @@ -293,14 +272,13 @@ class HailoChipObject(HailoHWObject): :obj:`hailo_platform.pyhailort._pyhailort.VStreamInfo`: with information objects of all output low-level streams. """ - # TODO: HRT-9987 - Add this deprecation warning and (deprecated) to this docs - # self._logger.warning("HailoChipObject get_output_stream_infos function is deprecated! Please use ConfiguredNetwork object.") + self._logger.warning("HailoChipObject get_output_stream_infos function is deprecated! Please use ConfiguredNetwork object.") if len(self._loaded_network_groups) != 1: raise HailoHWObjectException("Access to network stream info is only allowed when there is a single loaded network group") return self._loaded_network_groups[0].get_output_stream_infos(network_name=network_name) def get_all_stream_infos(self, network_name=None): - """Get input and output streams information of a specific network group. + """Get input and output streams information of a specific network group (deprecated). Args: network_name (str, optional): The name of the network to access. In case not given, all the networks in the network group will be addressed. @@ -309,8 +287,7 @@ class HailoChipObject(HailoHWObject): If there is exactly one configured network group, returns a list of :obj:`hailo_platform.pyhailort._pyhailort.StreamInfo`: with all the information objects of all input and output streams """ - # TODO: HRT-9987 - Add this deprecation warning and (deprecated) to this docs - # self._logger.warning("HailoChipObject get_all_stream_infos function is deprecated! Please use ConfiguredNetwork object.") + self._logger.warning("HailoChipObject get_all_stream_infos function is deprecated! Please use ConfiguredNetwork object.") if len(self._loaded_network_groups) != 1: raise HailoHWObjectException("Access to network stream info is only allowed when there is a single loaded network group") return self._loaded_network_groups[0].get_all_stream_infos(network_name=network_name) @@ -339,12 +316,12 @@ class HailoChipObject(HailoHWObject): raise HailoRTException("Device can only be configured from the process it was created in.") configured_apps = self.control.configure(hef, configure_params_by_name) self._hef_loaded = True - configured_networks = [ConfiguredNetwork(configured_app, self, hef) for configured_app in configured_apps] + configured_networks = [ConfiguredNetwork(configured_app) for configured_app in configured_apps] self._loaded_network_groups.extend(configured_networks) return configured_networks def get_input_shape(self, name=None): - """Get the input shape (not padded) of a network. + """Get the input shape (not padded) of a network (deprecated). Args: name (str, optional): The name of the desired input. If a name is not provided, return @@ -353,8 +330,7 @@ class HailoChipObject(HailoHWObject): Returns: Tuple of integers representing the input_shape. """ - # TODO: HRT-9987 - Add this deprecation warning and (deprecated) to this docs - # self._logger.warning("HailoChipObject get_input_shape function is deprecated! Please use ConfiguredNetwork object.") + self._logger.warning("HailoChipObject get_input_shape function is deprecated! Please use ConfiguredNetwork object.") if name is None: name = self.get_input_vstream_infos()[0].name @@ -366,7 +342,7 @@ class HailoChipObject(HailoHWObject): [input_vstream.name for input_vstream in self.get_input_vstream_infos()])) def get_index_from_name(self, name): - """Get the index in the output list from the name. + """Get the index in the output list from the name (deprecated). Args: name (str): The name of the output. @@ -374,8 +350,7 @@ class HailoChipObject(HailoHWObject): Returns: int: The index of the layer name in the output list. """ - # TODO: HRT-9987 - Add this deprecation warning and (deprecated) to this docs - # self._logger.warning("HailoChipObject get_index_from_name function is deprecated! Please use ConfiguredNetwork object.") + self._logger.warning("HailoChipObject get_index_from_name function is deprecated! Please use ConfiguredNetwork object.") try: return self.sorted_output_layer_names.index(name) except ValueError: @@ -398,8 +373,7 @@ class HailoChipObject(HailoHWObject): class EthernetDevice(HailoChipObject): - # TODO: HRT-9987 - Add (deprecated) to this docs - """Represents any Hailo hardware device that supports UDP control and dataflow""" + """Represents any Hailo hardware device that supports UDP control and dataflow (deprecated)""" NAME = InferenceTargets.UDP_CONTROLLER @@ -417,6 +391,8 @@ class EthernetDevice(HailoChipObject): super(EthernetDevice, self).__init__() + self._logger.warning("EthernetDevice is deprecated! Please use VDevice/Device object.") + gc.collect() self._remote_ip = remote_ip @@ -442,8 +418,7 @@ class EthernetDevice(HailoChipObject): Returns: list of str: IPs of scanned devices. """ - # TODO: HRT-9987 - Add this deprecation warning and (deprecated) to this docs - # default_logger().warning("EthernetDevice scan_devices method is deprecated! Please use scan() of Device object.") + default_logger().warning("EthernetDevice scan_devices method is deprecated! Please use scan() of Device object.") udp_scanner = HailoUdpScan() return udp_scanner.scan_devices(interface_name, timeout_seconds=timeout_seconds) @@ -463,15 +438,13 @@ class EthernetDevice(HailoChipObject): @property def remote_ip(self): - """Return the IP of the remote device.""" - # TODO: HRT-9987 - Add this deprecation warning and (deprecated) to this docs - # self._logger.warning("EthernetDevice remote_ip method is deprecated! Please use VDevice/Device object.") + """Return the IP of the remote device (deprecated).""" + self._logger.warning("EthernetDevice remote_ip method is deprecated! Please use VDevice/Device object.") return self._remote_ip class PcieDevice(HailoChipObject): - # TODO: HRT-9987 - Add (deprecated) to this docs - """Hailo PCIe production device representation""" + """Hailo PCIe production device representation (deprecated)""" NAME = InferenceTargets.PCIE_CONTROLLER @@ -486,8 +459,7 @@ class PcieDevice(HailoChipObject): :func:`PcieDevice.scan_devices` to get list of all available devices. """ super(PcieDevice, self).__init__() - # TODO: HRT-9987 - Add this deprecation warning - # self._logger.warning("PcieDevice is deprecated! Please use VDevice/Device object.") + self._logger.warning("PcieDevice is deprecated! Please use VDevice/Device object.") gc.collect() # PcieDevice __del__ function tries to release self._device. @@ -506,13 +478,12 @@ class PcieDevice(HailoChipObject): @staticmethod def scan_devices(): - """Scans for all pcie devices on the system. + """Scans for all pcie devices on the system (deprecated). Returns: list of :obj:`hailo_platform.pyhailort.pyhailort.PcieDeviceInfo` """ - # TODO: HRT-9987 - Add this deprecation warning and (deprecated) to this docs - # default_logger().warning("PcieDevice scan_devices method is deprecated! Please use Device object.") + default_logger().warning("PcieDevice scan_devices method is deprecated! Please use Device object.") return InternalPcieDevice.scan_devices() def _open_device(self, device_info): diff --git a/hailort/libhailort/bindings/python/platform/hailo_platform/pyhailort/pyhailort.py b/hailort/libhailort/bindings/python/platform/hailo_platform/pyhailort/pyhailort.py index 6b85acf..6f73059 100644 --- a/hailort/libhailort/bindings/python/platform/hailo_platform/pyhailort/pyhailort.py +++ b/hailort/libhailort/bindings/python/platform/hailo_platform/pyhailort/pyhailort.py @@ -29,8 +29,7 @@ from hailo_platform.pyhailort._pyhailort import (TemperatureInfo, # noqa F401 MipiClockSelection, MipiIspImageInOrder, MipiIspImageOutDataType, IspLightFrequency, BootSource, HailoSocketDefs, Endianness, - MipiInputStreamParams, SensorConfigTypes, - SensorConfigOpCode) + MipiInputStreamParams, SensorConfigTypes) BBOX_PARAMS = _pyhailort.HailoRTDefaults.BBOX_PARAMS() HAILO_DEFAULT_ETH_CONTROL_PORT = _pyhailort.HailoRTDefaults.HAILO_DEFAULT_ETH_CONTROL_PORT() @@ -75,6 +74,9 @@ class HailoRTTimeout(HailoRTException): class HailoRTStreamAborted(HailoRTException): pass +class HailoRTStreamAbortedByUser(HailoRTException): + pass + class HailoRTInvalidOperationException(HailoRTException): pass @@ -127,6 +129,8 @@ class ExceptionWrapper(object): raise HailoRTTimeout("Received a timeout - hailort has failed because a timeout had occurred") from libhailort_exception if string_error_code == "HAILO_STREAM_ABORTED_BY_HW": raise HailoRTStreamAborted("Stream aborted due to an external event") from libhailort_exception + if string_error_code == "HAILO_STREAM_ABORTED_BY_USER": + raise HailoRTStreamAbortedByUser("Stream was aborted by user") from libhailort_exception if string_error_code == "HAILO_INVALID_OPERATION": raise HailoRTInvalidOperationException("Invalid operation. See hailort.log for more information") from libhailort_exception @@ -170,23 +174,26 @@ class HailoUdpScan(object): return device_ip_addresses -class TrafficControl(object): +class NetworkRateLimiter(object): def __init__(self, ip, port, rate_bytes_per_sec): if sys.platform != 'linux': - raise HailoRTInvalidOperationException('TrafficControl is supported only on UNIX os') - with ExceptionWrapper(): - self._tc_util = _pyhailort.TrafficControlUtil(ip, port, int(rate_bytes_per_sec)) - + raise HailoRTInvalidOperationException('NetworkRateLimiter is supported only on UNIX os') + self._ip = ip + self._port = port + self._rate_bytes_per_sec = rate_bytes_per_sec + def set_rate_limit(self): - self._tc_util.set_rate_limit() + with ExceptionWrapper(): + return _pyhailort.NetworkRateLimiter.set_rate_limit(self._ip, self._port, self._rate_bytes_per_sec) def reset_rate_limit(self): - self._tc_util.reset_rate_limit() + with ExceptionWrapper(): + return _pyhailort.NetworkRateLimiter.reset_rate_limit(self._ip, self._port) def get_interface_name(ip): "get the interface corresponding to the given ip" with ExceptionWrapper(): - return _pyhailort.TrafficControlUtil.get_interface_name(ip) + return _pyhailort.NetworkRateLimiter.get_interface_name(ip) class ConfigureParams(object): @@ -524,15 +531,13 @@ class HEF(object): class ConfiguredNetwork(object): """Represents a network group loaded to the device.""" - def __init__(self, configured_network, target, hef): + def __init__(self, configured_network): self._configured_network = configured_network self._input_vstreams_holders = [] self._output_vstreams_holders = [] - self._target = target - self._hef = hef def get_networks_names(self): - return self._hef.get_networks_names(self.name) + return self._configured_network.get_networks_names() def activate(self, network_group_params=None): """Activate this network group in order to infer data through it. @@ -544,14 +549,18 @@ class ConfiguredNetwork(object): Returns: :class:`ActivatedNetworkContextManager`: Context manager that returns the activated network group. + + Note: + Usage of `activate` when scheduler enabled is deprecated. On this case, this function will return None and print deprecation warning. """ - # TODO: HRT-9988 - Add deprecation warning when changing to service by default - network_group_params = network_group_params or self.create_params() + if self._configured_network.is_scheduled(): + default_logger().warning("Calls to `activate()` when working with scheduler are deprecated! On future versions this call will raise an error.") + return EmptyContextManager() + network_group_params = network_group_params or self.create_params() with ExceptionWrapper(): return ActivatedNetworkContextManager(self, - self._configured_network.activate(network_group_params), - self._target, self._hef) + self._configured_network.activate(network_group_params)) def wait_for_activation(self, timeout_ms=None): """Block until activated, or until ``timeout_ms`` is passed. @@ -590,7 +599,7 @@ class ConfiguredNetwork(object): return tuple(results) def get_sorted_output_names(self): - return self._hef.get_sorted_output_names(self.name) + return self._configured_network.get_sorted_output_names() def get_input_vstream_infos(self, network_name=None): """Get input vstreams information. @@ -602,8 +611,8 @@ class ConfiguredNetwork(object): list of :obj:`hailo_platform.pyhailort._pyhailort.VStreamInfo`: with all the information objects of all input vstreams """ - name = network_name if network_name is not None else self.name - return self._hef.get_input_vstream_infos(name) + name = network_name if network_name is not None else "" + return self._configured_network.get_input_vstream_infos(name) def get_output_vstream_infos(self, network_name=None): """Get output vstreams information. @@ -615,8 +624,8 @@ class ConfiguredNetwork(object): list of :obj:`hailo_platform.pyhailort._pyhailort.VStreamInfo`: with all the information objects of all output vstreams """ - name = network_name if network_name is not None else self.name - return self._hef.get_output_vstream_infos(name) + name = network_name if network_name is not None else "" + return self._configured_network.get_output_vstream_infos(name) def get_all_vstream_infos(self, network_name=None): """Get input and output vstreams information. @@ -628,8 +637,8 @@ class ConfiguredNetwork(object): list of :obj:`hailo_platform.pyhailort._pyhailort.VStreamInfo`: with all the information objects of all input and output vstreams """ - name = network_name if network_name is not None else self.name - return self._hef.get_all_vstream_infos(name) + name = network_name if network_name is not None else "" + return self._configured_network.get_all_vstream_infos(name) def get_input_stream_infos(self, network_name=None): """Get the input low-level streams information of a specific network group. @@ -642,8 +651,8 @@ class ConfiguredNetwork(object): of all input low-level streams. """ - name = network_name if network_name is not None else self.name - return self._hef.get_input_stream_infos(name) + name = network_name if network_name is not None else "" + return self._configured_network.get_input_stream_infos(name) def get_output_stream_infos(self, network_name=None): """Get the output low-level streams information of a specific network group. @@ -656,8 +665,8 @@ class ConfiguredNetwork(object): of all output low-level streams. """ - name = network_name if network_name is not None else self.name - return self._hef.get_output_stream_infos(name) + name = network_name if network_name is not None else "" + return self._configured_network.get_output_stream_infos(name) def get_all_stream_infos(self, network_name=None): """Get input and output streams information of a specific network group. @@ -669,8 +678,8 @@ class ConfiguredNetwork(object): list of :obj:`hailo_platform.pyhailort._pyhailort.StreamInfo`: with all the information objects of all input and output streams """ - name = network_name if network_name is not None else self.name - return self._hef.get_all_stream_infos(name) + name = network_name if network_name is not None else "" + return self._configured_network.get_all_stream_infos(name) def get_udp_rates_dict(self, fps, max_supported_rate_bytes): with ExceptionWrapper(): @@ -720,7 +729,7 @@ class ConfiguredNetwork(object): list of str: All the underlying streams names for the provided vstream name. """ with ExceptionWrapper(): - return self._hef.get_stream_names_from_vstream_name(vstream_name, self.name) + return self._configured_network.get_stream_names_from_vstream_name(vstream_name) def get_vstream_names_from_stream_name(self, stream_name): """Get vstream names list from their underlying stream name for a specific network group. @@ -732,7 +741,7 @@ class ConfiguredNetwork(object): list of str: All the matching vstream names for the provided stream name. """ with ExceptionWrapper(): - return self._hef.get_vstream_names_from_stream_name(stream_name, self.name) + return self._configured_network.get_vstream_names_from_stream_name(stream_name) def set_scheduler_timeout(self, timeout_ms, network_name=None): """Sets the maximum time period that may pass before getting run time from the scheduler, @@ -767,19 +776,29 @@ class ConfiguredNetwork(object): return self._configured_network.set_scheduler_priority(priority) +class EmptyContextManager(object): + """An empty context manager that returns instead of activated network group when scheduler is enabled`.""" + + def __init__(self): + pass + + def __enter__(self): + pass + + def __exit__(self, *args): + pass + + class ActivatedNetworkContextManager(object): """A context manager that returns the activated network group upon enter.""" - def __init__(self, configured_network, activated_network, target, hef): + def __init__(self, configured_network, activated_network): self._configured_network = configured_network self._activated_network = activated_network - self._target = target - self._hef = hef def __enter__(self): with ExceptionWrapper(): - activated_network_group = ActivatedNetwork(self._configured_network, self._activated_network.__enter__(), self._target, - self._hef) + activated_network_group = ActivatedNetwork(self._configured_network, self._activated_network.__enter__()) return activated_network_group def __exit__(self, *args): @@ -789,16 +808,10 @@ class ActivatedNetworkContextManager(object): class ActivatedNetwork(object): """The network group that is currently activated for inference.""" - def __init__(self, configured_network, activated_network, target, hef): + def __init__(self, configured_network, activated_network): self._configured_network = configured_network self._activated_network = activated_network - self._target = target - self._hef = hef self._last_number_of_invalid_frames_read = 0 - - @property - def target(self): - return self._target @property def name(self): @@ -826,7 +839,7 @@ class ActivatedNetwork(object): raise HailoRTException("There are {} invalid frames.".format(number_of_invalid_frames)) def get_sorted_output_names(self): - return self._hef.get_sorted_output_names(self.name) + return self._configured_network.get_sorted_output_names() def _get_intermediate_buffer(self, src_context_index, src_stream_index): with ExceptionWrapper(): @@ -859,7 +872,6 @@ class InferVStreams(object): ``[class_count, BBOX_PARAMS, detections_count]`` padded with empty bboxes. """ - self._logger = default_logger() self._configured_net_group = configured_net_group self._net_group_name = configured_net_group.name self._input_vstreams_params = input_vstreams_params @@ -895,8 +907,9 @@ class InferVStreams(object): network_name = self._input_name_to_network_name[input_name] if (network_name not in already_seen_networks) : already_seen_networks.add(network_name) + output_vstream_infos = self._configured_net_group.get_output_vstream_infos() for output_name in self._network_name_to_outputs[network_name]: - output_buffers_info[output_name] = OutputLayerUtils(self._configured_net_group._hef, output_name, self._infer_pipeline, + output_buffers_info[output_name] = OutputLayerUtils(output_vstream_infos, output_name, self._infer_pipeline, self._net_group_name) output_tensor_info = output_buffers_info[output_name].output_tensor_info shape, dtype = output_tensor_info @@ -920,7 +933,7 @@ class InferVStreams(object): are output data tensors as :obj:`numpy.ndarray` (or list of :obj:`numpy.ndarray` in case of nms output and tf_nms_format=False). """ - time_before_infer_calcs = time.time() + time_before_infer_calcs = time.perf_counter() if not isinstance(input_data, dict): input_stream_infos = self._configured_net_group.get_input_stream_infos() if len(input_stream_infos) != 1: @@ -938,9 +951,9 @@ class InferVStreams(object): self._make_c_contiguous_if_needed(input_layer_name, input_data) with ExceptionWrapper(): - time_before_infer = time.time() + time_before_infer = time.perf_counter() self._infer_pipeline.infer(input_data, output_buffers, batch_size) - self._hw_time = time.time() - time_before_infer + self._hw_time = time.perf_counter() - time_before_infer for name, result_array in output_buffers.items(): is_nms = output_buffers_info[name].is_nms @@ -957,7 +970,7 @@ class InferVStreams(object): else: output_buffers[name] = HailoRTTransformUtils.output_raw_buffer_to_nms_format(result_array, nms_shape.number_of_classes) - self._total_time = time.time() - time_before_infer_calcs + self._total_time = time.perf_counter() - time_before_infer_calcs return output_buffers def get_hw_time(self): @@ -982,7 +995,7 @@ class InferVStreams(object): input_expected_dtype = self._infer_pipeline.get_host_dtype(input_layer_name) if input_dtype != input_expected_dtype: - self._logger.warning("Given input data dtype ({}) is different than inferred dtype ({}). " + default_logger().warning("Given input data dtype ({}) is different than inferred dtype ({}). " "conversion for every frame will reduce performance".format(input_dtype, input_expected_dtype)) input_data[input_layer_name] = input_data[input_layer_name].astype(input_expected_dtype) @@ -1015,7 +1028,7 @@ class InferVStreams(object): def _make_c_contiguous_if_needed(self, input_layer_name, input_data): if not input_data[input_layer_name].flags.c_contiguous: - self._logger.warning("Converting {} numpy array to be C_CONTIGUOUS".format( + default_logger().warning("Converting {} numpy array to be C_CONTIGUOUS".format( input_layer_name)) input_data[input_layer_name] = numpy.asarray(input_data[input_layer_name], order='C') @@ -1139,10 +1152,9 @@ class HailoRTTransformUtils(object): return FormatType.FLOAT32 raise HailoRTException("unsupported data type {}".format(dtype)) +# TODO: HRT-10427 - Remove class InternalEthernetDevice(object): def __init__(self, address, port, response_timeout_seconds=10, max_number_of_attempts=3): - # TODO: HRT-9987 - Add this deprecation warning - # default_logger().warning("InternalEthernetDevice is deprecated! Please use VDevice object.") self.device = None self._address = address self._port = port @@ -1204,7 +1216,7 @@ class PcieDeviceInfo(_pyhailort.PcieDeviceInfo): except HailoRTException: raise ArgumentTypeError('Invalid device info string, format is []::.') - +# TODO: HRT-10427 - Remove class InternalPcieDevice(object): def __init__(self, device_info=None): self.device = None @@ -1224,6 +1236,7 @@ class InternalPcieDevice(object): self.device.release() self.device = None + # TODO: HRT-10427 - Move to a static method in pyhailort_internal when InternalPcieDevice removed @staticmethod def scan_devices(): with ExceptionWrapper(): @@ -1242,7 +1255,7 @@ class InternalPcieDevice(object): with ExceptionWrapper(): return self.device.direct_read_memory(address, size) - +# TODO: HRT-10427 - Remove when removing InternalPcieDevice class PcieDebugLog(object): def __init__(self, pci_device): self._pcie_device = pci_device @@ -1300,7 +1313,7 @@ class HailoFormatFlags(_pyhailort.FormatFlags): SUPPORTED_PROTOCOL_VERSION = 2 SUPPORTED_FW_MAJOR = 4 -SUPPORTED_FW_MINOR = 13 +SUPPORTED_FW_MINOR = 14 SUPPORTED_FW_REVISION = 0 MEGA_MULTIPLIER = 1000.0 * 1000.0 @@ -1622,7 +1635,6 @@ class Control: def __init__(self, device: '_pyhailort.Device'): self.__device = device - self._logger = default_logger() # TODO: should remove? if sys.platform != "win32": @@ -2269,7 +2281,6 @@ class Device: """ gc.collect() - self._logger = default_logger() # Device __del__ function tries to release self._device. # to avoid AttributeError if the __init__ func fails, we set it to None first. # https://stackoverflow.com/questions/6409644/is-del-called-on-an-object-that-doesnt-complete-init @@ -2323,12 +2334,16 @@ class Device: Args: hef (:class:`~hailo_platform.pyhailort.pyhailort.HEF`): HEF to configure the vdevice from configure_params_by_name (dict, optional): Maps between each net_group_name to configure_params. If not provided, default params will be applied + + Note: + This function is deprecated. Support will be removed in future versions. """ + default_logger().warning("Usage of Device.configure is deprecated! One should use VDevice for inference") if self._creation_pid != os.getpid(): raise HailoRTException("Device can only be configured from the process it was created in.") with ExceptionWrapper(): - configured_apps = self._device.configure(hef._hef, configure_params_by_name) - configured_networks = [ConfiguredNetwork(configured_app, self, hef) for configured_app in configured_apps] + configured_ngs_handles = self._device.configure(hef._hef, configure_params_by_name) + configured_networks = [ConfiguredNetwork(configured_ng_handle) for configured_ng_handle in configured_ngs_handles] self._loaded_network_groups.extend(configured_networks) return configured_networks @@ -2385,7 +2400,6 @@ class VDevice(object): list of all available devices. Excludes 'params'. Cannot be used together with device_id. """ gc.collect() - self._logger = default_logger() # VDevice __del__ function tries to release self._vdevice. # to avoid AttributeError if the __init__ func fails, we set it to None first. @@ -2461,8 +2475,8 @@ class VDevice(object): if self._creation_pid != os.getpid(): raise HailoRTException("VDevice can only be configured from the process it was created in.") with ExceptionWrapper(): - configured_apps = self._vdevice.configure(hef._hef, configure_params_by_name) - configured_networks = [ConfiguredNetwork(configured_app, self, hef) for configured_app in configured_apps] + configured_ngs_handles = self._vdevice.configure(hef._hef, configure_params_by_name) + configured_networks = [ConfiguredNetwork(configured_ng_handle) for configured_ng_handle in configured_ngs_handles] self._loaded_network_groups.extend(configured_networks) return configured_networks @@ -2539,9 +2553,9 @@ class InputVStreamParams(object): timeout_ms = DEFAULT_VSTREAM_TIMEOUT_MS if queue_size is None: queue_size = DEFAULT_VSTREAM_QUEUE_SIZE - name = network_name if network_name is not None else configured_network.name + name = network_name if network_name is not None else "" with ExceptionWrapper(): - return configured_network._hef._hef.get_input_vstreams_params(name, quantized, + return configured_network._configured_network.make_input_vstream_params(name, quantized, format_type, timeout_ms, queue_size) @staticmethod @@ -2613,9 +2627,9 @@ class OutputVStreamParams(object): timeout_ms = DEFAULT_VSTREAM_TIMEOUT_MS if queue_size is None: queue_size = DEFAULT_VSTREAM_QUEUE_SIZE - name = network_name if network_name is not None else configured_network.name + name = network_name if network_name is not None else "" with ExceptionWrapper(): - return configured_network._hef._hef.get_output_vstreams_params(name, quantized, + return configured_network._configured_network.make_output_vstream_params(name, quantized, format_type, timeout_ms, queue_size) @staticmethod @@ -2820,8 +2834,8 @@ class InputVStreams(object): class OutputLayerUtils(object): - def __init__(self, hef, vstream_name, pipeline, net_group_name=""): - self._hef = hef + def __init__(self, output_vstream_infos, vstream_name, pipeline, net_group_name=""): + self._output_vstream_infos = output_vstream_infos self._vstream_info = self._get_vstream_info(net_group_name, vstream_name) if isinstance(pipeline, (_pyhailort.InferVStreams)): @@ -2866,8 +2880,7 @@ class OutputLayerUtils(object): return self._quantized_empty_bbox def _get_vstream_info(self, net_group_name, vstream_name): - output_vstream_infos = self._hef.get_output_vstream_infos(net_group_name) - for info in output_vstream_infos: + for info in self._output_vstream_infos: if info.name == vstream_name: return info raise HailoRTException("No vstream matches the given name {}".format(vstream_name)) @@ -2885,7 +2898,8 @@ class OutputVStream(object): def __init__(self, configured_network, recv_object, name, tf_nms_format=False, net_group_name=""): self._recv_object = recv_object - self._output_layer_utils = OutputLayerUtils(configured_network._hef, name, self._recv_object, net_group_name) + output_vstream_infos = configured_network.get_output_vstream_infos() + self._output_layer_utils = OutputLayerUtils(output_vstream_infos, name, self._recv_object, net_group_name) self._output_dtype = self._output_layer_utils.output_dtype self._vstream_info = self._output_layer_utils._vstream_info self._output_tensor_info = self._output_layer_utils.output_tensor_info @@ -3030,15 +3044,3 @@ class OutputVStreams(object): def _after_fork_in_child(self): for vstream in self._vstreams.values(): vstream._after_fork_in_child() - - -class YOLOv5PostProcessOp(object): - - def __init__(self, anchors, shapes, formats, quant_infos, image_height, image_width, confidence_threshold, iou_threshold, num_of_classes, - max_boxes, cross_classes=True): - - self._op = _pyhailort.YOLOv5PostProcessOp.create(anchors, shapes, formats, quant_infos, image_height, image_width, confidence_threshold, - iou_threshold, num_of_classes, max_boxes, cross_classes) - - def execute(self, net_flow_tensors): - return self._op.execute(net_flow_tensors) \ No newline at end of file diff --git a/hailort/libhailort/bindings/python/platform/hailo_platform/tools/udp_rate_limiter.py b/hailort/libhailort/bindings/python/platform/hailo_platform/tools/udp_rate_limiter.py index b597e32..c9d4035 100644 --- a/hailort/libhailort/bindings/python/platform/hailo_platform/tools/udp_rate_limiter.py +++ b/hailort/libhailort/bindings/python/platform/hailo_platform/tools/udp_rate_limiter.py @@ -6,10 +6,9 @@ from __future__ import division from builtins import object -from hailo_platform.pyhailort.pyhailort import ConfiguredNetwork, HEF, TrafficControl, INPUT_DATAFLOW_BASE_PORT +from hailo_platform.pyhailort.pyhailort import HEF, NetworkRateLimiter, INPUT_DATAFLOW_BASE_PORT DEFAULT_MAX_KBPS = 850e3 -DEFAULT_MAX_KBPS_PAPRIKA_B0 = 160e3 BYTES_IN_Kbits = 125.0 @@ -28,15 +27,9 @@ class BadTCCallError(Exception): pass -def get_max_supported_kbps(hw_arch="hailo8"): - # TODO: What should be here? - if hw_arch == "paprika_b0": - return DEFAULT_MAX_KBPS_PAPRIKA_B0 - return DEFAULT_MAX_KBPS - class RateLimiterWrapper(object): """UDPRateLimiter wrapper enabling ``with`` statements.""" - def __init__(self, network_group, fps=1, fps_factor=1.0, remote_ip=None, hw_arch=None): + def __init__(self, configured_network_group, fps=1, fps_factor=1.0, remote_ip=None): """RateLimiterWrapper constructor. Args: @@ -44,32 +37,24 @@ class RateLimiterWrapper(object): target network_group. fps (int): Frame rate. fps_factor (float): Safety factor by which to multiply the calculated UDP rate. + remote_ip (str): Device IP address. """ - if not isinstance(network_group, ConfiguredNetwork): - return RateLimiterException("The API was changed. RateLimiterWrapper accept ConfiguredNetwork instead of ActivatedNetwork") - self._network_group = network_group - if remote_ip is not None: - self._remote_ip = remote_ip - else: - # this line should be removed. this parameter will be removed from the object - self._remote_ip = network_group._target.device_id + self._network_group = configured_network_group + if remote_ip is None: + raise RateLimiterException("In order to use RateLimiterWrapper, one should pass 'remote_ip'") + self._remote_ip = remote_ip self._fps = fps self._fps_factor = fps_factor - if hw_arch is not None: - self._hw_arch = hw_arch - else: - # this line should be removed. this parameter will be removed from the object - self._hw_arch = network_group._target._hw_arch if hasattr(network_group._target, '_hw_arch') else None self._rates_dict = {} self._tc_dict = {} def __enter__(self): - max_supported_kbps_rate = get_max_supported_kbps(self._hw_arch) + max_supported_kbps_rate = DEFAULT_MAX_KBPS self._rates_dict = self._network_group.get_udp_rates_dict((self._fps * self._fps_factor), (max_supported_kbps_rate * BYTES_IN_Kbits)) for port, rate in self._rates_dict.items(): - self._tc_dict[port] = TrafficControl(self._remote_ip, port, rate) + self._tc_dict[port] = NetworkRateLimiter(self._remote_ip, port, rate) self._tc_dict[port].reset_rate_limit() self._tc_dict[port].set_rate_limit() @@ -82,7 +67,7 @@ class RateLimiterWrapper(object): class UDPRateLimiter(object): """Enables limiting or removing limits on UDP communication rate to a board.""" def __init__(self, remote_ip, port, rate_kbits_per_sec = 0): - self._tc = TrafficControl(remote_ip, port, rate_kbits_per_sec * BYTES_IN_Kbits) + self._tc = NetworkRateLimiter(remote_ip, port, rate_kbits_per_sec * BYTES_IN_Kbits) def set_rate_limit(self): return self._tc.set_rate_limit() diff --git a/hailort/libhailort/bindings/python/platform/hailo_tutorials/notebooks/HRT_0_Inference_Tutorial.ipynb b/hailort/libhailort/bindings/python/platform/hailo_tutorials/notebooks/HRT_0_Inference_Tutorial.ipynb index 8a7785c..d376d08 100644 --- a/hailort/libhailort/bindings/python/platform/hailo_tutorials/notebooks/HRT_0_Inference_Tutorial.ipynb +++ b/hailort/libhailort/bindings/python/platform/hailo_tutorials/notebooks/HRT_0_Inference_Tutorial.ipynb @@ -126,8 +126,8 @@ "outputs": [], "source": [ "def send(configured_network, num_frames):\n", - " vstreams_params = InputVStreamParams.make(configured_network)\n", " configured_network.wait_for_activation(1000)\n", + " vstreams_params = InputVStreamParams.make(configured_network)\n", " with InputVStreams(configured_network, vstreams_params) as vstreams:\n", " vstream_to_buffer = {vstream: np.ndarray([1] + list(vstream.shape), dtype=vstream.dtype) for vstream in vstreams}\n", " for _ in range(num_frames):\n", diff --git a/hailort/libhailort/bindings/python/platform/hailo_tutorials/notebooks/HRT_1_Power_Measurement_Tutorial.ipynb b/hailort/libhailort/bindings/python/platform/hailo_tutorials/notebooks/HRT_1_Power_Measurement_Tutorial.ipynb index b361e0d..d4edead 100644 --- a/hailort/libhailort/bindings/python/platform/hailo_tutorials/notebooks/HRT_1_Power_Measurement_Tutorial.ipynb +++ b/hailort/libhailort/bindings/python/platform/hailo_tutorials/notebooks/HRT_1_Power_Measurement_Tutorial.ipynb @@ -25,8 +25,8 @@ "source": [ "## Single power measurement" ], - "cell_type": "markdown", - "metadata": {} + "cell_type": "markdown", + "metadata": {} }, { "cell_type": "code", @@ -37,7 +37,7 @@ "%matplotlib inline\n", "import time\n", "\n", - "from hailo_platform import PcieDevice, DvmTypes, PowerMeasurementTypes, SamplingPeriod, AveragingFactor, MeasurementBufferIndex # noqa F401\n" + "from hailo_platform import Device, DvmTypes, PowerMeasurementTypes, SamplingPeriod, AveragingFactor, MeasurementBufferIndex # noqa F401\n" ] }, { @@ -53,7 +53,7 @@ "metadata": {}, "outputs": [], "source": [ - "target = PcieDevice()" + "target = Device()" ] }, { diff --git a/hailort/libhailort/bindings/python/platform/hailo_tutorials/notebooks/HRT_2_Inference_Tutorial_Multi_Process_Service.ipynb b/hailort/libhailort/bindings/python/platform/hailo_tutorials/notebooks/HRT_2_Inference_Tutorial_Multi_Process_Service.ipynb new file mode 100644 index 0000000..05fc1bc --- /dev/null +++ b/hailort/libhailort/bindings/python/platform/hailo_tutorials/notebooks/HRT_2_Inference_Tutorial_Multi_Process_Service.ipynb @@ -0,0 +1,130 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "# Python inference tutorial - Multi Process Service and Model Scheduler\n", + "\n", + "This tutorial will walk you through the inference process using The Model Scheduler.\n", + "\n", + "**Requirements:**\n", + "\n", + "* Run HailoRT Multi-Process Service before running inference. See installation steps in [Multi-Process Service](../../inference/inference.rst)\n", + "* Run the notebook inside the Python virtual environment: ```source hailo_virtualenv/bin/activate```\n", + "\n", + "It is recommended to use the command ``hailo tutorial`` (when inside the virtualenv) to open a Jupyter server that contains the tutorials." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Running Inference using HailoRT\n", + "\n", + "In this example we will use the Model Scheduler to run inference on multiple models.\n", + "Each model is represented by an HEF which is built using the Hailo Dataflow Compiler.\n", + "An HEF is Hailo's binary format for neural networks. The HEF files contain:\n", + "\n", + "* Target HW configuration\n", + "* Weights\n", + "* Metadata for HailoRT (e.g. input/output scaling)\n", + "\n", + "The Model Scheduler is an HailoRT component that comes to enhance and simplify the usage\n", + "of the same Hailo device by multiple networks. The responsibility for activating/deactivating the network\n", + "groups is now under HailoRT, and done **automatically** without user application intervention.\n", + "In order to use the Model Scheduler, create the VDevice with scheduler enabled, configure all models to the device, and start inference on all models:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "from multiprocessing import Process\n", + "from hailo_platform import (HEF, VDevice, HailoStreamInterface, InferVStreams, ConfigureParams,\n", + " InputVStreamParams, OutputVStreamParams, InputVStreams, OutputVStreams, FormatType, HailoSchedulingAlgorithm)\n", + "\n", + "\n", + "# Define the function to run inference on the model\n", + "def infer(network_group, input_vstreams_params, output_vstreams_params, input_data):\n", + " rep_count = 100\n", + " with InferVStreams(network_group, input_vstreams_params, output_vstreams_params) as infer_pipeline:\n", + " for i in range(rep_count):\n", + " infer_results = infer_pipeline.infer(input_data)\n", + "\n", + "\n", + "# Loading compiled HEFs:\n", + "first_hef_path = '../hefs/resnet_v1_18.hef'\n", + "second_hef_path = '../hefs/shortcut_net.hef'\n", + "first_hef = HEF(first_hef_path)\n", + "second_hef = HEF(second_hef_path)\n", + "hefs = [first_hef, second_hef]\n", + "\n", + "# Creating the VDevice target with scheduler enabled\n", + "params = VDevice.create_params()\n", + "params.scheduling_algorithm = HailoSchedulingAlgorithm.ROUND_ROBIN\n", + "with VDevice(params) as target:\n", + " infer_processes = []\n", + "\n", + " # Configure network groups\n", + " for hef in hefs:\n", + " configure_params = ConfigureParams.create_from_hef(hef=hef, interface=HailoStreamInterface.PCIe)\n", + " network_groups = target.configure(hef, configure_params)\n", + " network_group = network_groups[0]\n", + "\n", + " # Create input and output virtual streams params\n", + " # Quantized argument signifies whether or not the incoming data is already quantized.\n", + " # Data is quantized by HailoRT if and only if quantized == False.\n", + " input_vstreams_params = InputVStreamParams.make(network_group, quantized=False, format_type=FormatType.FLOAT32)\n", + " output_vstreams_params = OutputVStreamParams.make(network_group, quantized=True, format_type=FormatType.UINT8)\n", + "\n", + " # Define dataset params\n", + " input_vstream_info = hef.get_input_vstream_infos()[0]\n", + " image_height, image_width, channels = input_vstream_info.shape\n", + " num_of_frames = 10\n", + " low, high = 2, 20\n", + "\n", + " # Generate random dataset\n", + " dataset = np.random.randint(low, high, (num_of_frames, image_height, image_width, channels)).astype(np.float32)\n", + " input_data = {input_vstream_info.name: dataset}\n", + "\n", + " # Create infer process\n", + " infer_process = Process(target=infer, args=(network_group, input_vstreams_params, output_vstreams_params, input_data))\n", + " infer_processes.append(infer_process)\n", + "\n", + " print(f'Starting streaming on multiple models using scheduler')\n", + " for infer_process in infer_processes:\n", + " infer_process.start()\n", + " for infer_process in infer_processes:\n", + " infer_process.join()\n", + "\n", + " print('Done inference')" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/hailort/libhailort/bindings/python/platform/requirements.txt b/hailort/libhailort/bindings/python/platform/requirements.txt index c11aa35..bce1a18 100644 --- a/hailort/libhailort/bindings/python/platform/requirements.txt +++ b/hailort/libhailort/bindings/python/platform/requirements.txt @@ -1,8 +1,8 @@ appdirs==1.4.4 argcomplete==2.0.0 contextlib2==0.6.0.post1 -distlib==0.3.4 -filelock==3.4.1 +distlib==0.3.6 +filelock==3.8.0 future==0.18.2 importlib-metadata==5.1.0 importlib-resources==5.1.2 @@ -11,4 +11,4 @@ netifaces==0.10.9 numpy==1.23.3 typing_extensions==4.1.1 verboselogs==1.7 -virtualenv==20.4.3 +virtualenv==20.17.0 diff --git a/hailort/libhailort/bindings/python/platform/setup.py b/hailort/libhailort/bindings/python/platform/setup.py index 445e867..425291b 100644 --- a/hailort/libhailort/bindings/python/platform/setup.py +++ b/hailort/libhailort/bindings/python/platform/setup.py @@ -69,6 +69,6 @@ if __name__ == "__main__": "linux_aarch64", ], url="https://hailo.ai/", - version="4.13.0", + version="4.14.0", zip_safe=False, ) diff --git a/hailort/libhailort/bindings/python/src/CMakeLists.txt b/hailort/libhailort/bindings/python/src/CMakeLists.txt index bd032c4..0f170a8 100644 --- a/hailort/libhailort/bindings/python/src/CMakeLists.txt +++ b/hailort/libhailort/bindings/python/src/CMakeLists.txt @@ -1,12 +1,23 @@ cmake_minimum_required(VERSION 3.0.0) -option(HAILO_BUILD_PYHAILORT_INTERNAL OFF) +include(ExternalProject) + +FUNCTION(exclude_archive_libs_symbols target) # should be same as in common_compiler_options.cmake + if(WIN32) + # TODO: check if there are required actions for Windows + elseif(UNIX) + get_property(TEMP_LINK_FLAGS TARGET ${target} PROPERTY LINK_FLAGS) + set(TEMP_LINK_FLAGS "${TEMP_LINK_FLAGS} -Wl,--exclude-libs=ALL") + set_property(TARGET ${target} PROPERTY LINK_FLAGS ${TEMP_LINK_FLAGS}) + else() + message(FATAL_ERROR "Unexpeced host, stopping build") + endif() +ENDFUNCTION() if(${CMAKE_SYSTEM_NAME} STREQUAL "Linux") if(NOT DEFINED PYBIND11_PYTHON_VERSION) message(FATAL_ERROR "PYBIND11_PYTHON_VERSION is not defined. To build _pyhailort, pass python version") endif() - string(REPLACE "." "" dpython ${PYBIND11_PYTHON_VERSION}) # E.g "3.5" -> "35" if(${dpython} LESS "38") set(m_flag "m") @@ -16,6 +27,8 @@ if(${CMAKE_SYSTEM_NAME} STREQUAL "Linux") set(PYTHON_MODULE_EXTENSION ".cpython-${dpython}${m_flag}-${CMAKE_SYSTEM_PROCESSOR}-linux-gnu.so") endif() +option(HAILO_BUILD_PYHAILORT_INTERNAL OFF) + set(PYHAILORT_DIR ${CMAKE_CURRENT_LIST_DIR}) pybind11_add_module(_pyhailort @@ -24,29 +37,27 @@ pybind11_add_module(_pyhailort hef_api.cpp vstream_api.cpp quantization_api.cpp - ${HAILORT_OPS_CPP_SOURCES} - ${HAILORT_COMMON_CPP_SOURCES} ) set_target_properties(_pyhailort PROPERTIES CXX_STANDARD 14 CXX_STANDARD_REQUIRED YES + CXX_EXTENSIONS NO + C_VISIBILITY_PRESET hidden + CXX_VISIBILITY_PRESET hidden + # VISIBILITY_INLINES_HIDDEN YES ) -target_include_directories(_pyhailort - PRIVATE - $ - $ - $ - $ -) +find_package(HailoRT 4.14.0 EXACT REQUIRED) -target_link_libraries(_pyhailort PRIVATE libhailort spdlog::spdlog) +target_link_libraries(_pyhailort PRIVATE HailoRT::libhailort) if(WIN32) - target_link_libraries(_pyhailort PRIVATE Ws2_32 Iphlpapi Shlwapi) -endif() -if(HAILO_BUILD_SERVICE) - target_link_libraries(_pyhailort PRIVATE grpc++_unsecure hailort_rpc_grpc_proto hef_proto) + target_link_libraries(_pyhailort PRIVATE Ws2_32) + target_compile_options(_pyhailort PRIVATE + /DWIN32_LEAN_AND_MEAN + /DNOMINMAX # NOMINMAX is required in order to play nice with std::min/std::max (otherwise Windows.h defines it's own) + /wd4201 /wd4251 + ) endif() target_compile_options(_pyhailort PRIVATE ${HAILORT_COMPILE_OPTIONS}) diff --git a/hailort/libhailort/bindings/python/src/bindings_common.hpp b/hailort/libhailort/bindings/python/src/bindings_common.hpp index 513175b..6d87df9 100644 --- a/hailort/libhailort/bindings/python/src/bindings_common.hpp +++ b/hailort/libhailort/bindings/python/src/bindings_common.hpp @@ -14,8 +14,6 @@ #include "hailo/hailort_common.hpp" #include "hailo/network_group.hpp" -#include "common/logger_macros.hpp" - #include "utils.hpp" #include diff --git a/hailort/libhailort/bindings/python/src/device_api.cpp b/hailort/libhailort/bindings/python/src/device_api.cpp index c6f4a5b..6315daf 100644 --- a/hailort/libhailort/bindings/python/src/device_api.cpp +++ b/hailort/libhailort/bindings/python/src/device_api.cpp @@ -9,6 +9,7 @@ **/ #include "device_api.hpp" +#include namespace hailort @@ -39,28 +40,7 @@ DeviceWrapper DeviceWrapper::create_pcie(hailo_pcie_device_info_t &device_info) DeviceWrapper DeviceWrapper::create_eth(const std::string &device_address, uint16_t port, uint32_t timeout_milliseconds, uint8_t max_number_of_attempts) { - hailo_eth_device_info_t device_info = {}; - - /* Validate address length */ - if (INET_ADDRSTRLEN < device_address.size()) { - EXIT_WITH_ERROR("device_address is too long") - } - - device_info.host_address.sin_family = AF_INET; - device_info.host_address.sin_port = HAILO_ETH_PORT_ANY; - auto status = Socket::pton(AF_INET, HAILO_ETH_ADDRESS_ANY, &(device_info.host_address.sin_addr)); - VALIDATE_STATUS(status); - - device_info.device_address.sin_family = AF_INET; - device_info.device_address.sin_port = port; - status = Socket::pton(AF_INET, device_address.c_str(), &(device_info.device_address.sin_addr)); - VALIDATE_STATUS(status); - - device_info.timeout_millis = timeout_milliseconds; - device_info.max_number_of_attempts = max_number_of_attempts; - device_info.max_payload_size = HAILO_DEFAULT_ETH_MAX_PAYLOAD_SIZE; - - auto device = Device::create_eth(device_info); + auto device = Device::create_eth(device_address, port, timeout_milliseconds, max_number_of_attempts); VALIDATE_EXPECTED(device); return DeviceWrapper(device.release()); @@ -125,7 +105,7 @@ bool DeviceWrapper::get_overcurrent_state() py::bytes DeviceWrapper::read_memory(uint32_t address, uint32_t length) { - std::unique_ptr response = make_unique_nothrow(length, '\x00'); + std::unique_ptr response = std::make_unique(length, '\x00'); VALIDATE_NOT_NULL(response, HAILO_OUT_OF_HOST_MEMORY); MemoryView data_view(const_cast(reinterpret_cast(response->data())), length); @@ -162,7 +142,7 @@ py::bytes DeviceWrapper::i2c_read(hailo_i2c_slave_config_t *slave_config, uint32 { VALIDATE_NOT_NULL(slave_config, HAILO_INVALID_ARGUMENT); - std::unique_ptr response = make_unique_nothrow(length, '\x00'); + std::unique_ptr response = std::make_unique(length, '\x00'); VALIDATE_NOT_NULL(response, HAILO_OUT_OF_HOST_MEMORY); MemoryView data_view(const_cast(reinterpret_cast(response->data())), length); @@ -229,7 +209,7 @@ py::bytes DeviceWrapper::read_user_config() auto config_buffer = device().read_user_config(); VALIDATE_EXPECTED(config_buffer); - std::unique_ptr response = make_unique_nothrow( + std::unique_ptr response = std::make_unique( const_cast(reinterpret_cast(config_buffer->data())), config_buffer->size()); VALIDATE_NOT_NULL(response, HAILO_OUT_OF_HOST_MEMORY); @@ -255,7 +235,7 @@ py::bytes DeviceWrapper::read_board_config() auto config_buffer = device().read_board_config(); VALIDATE_EXPECTED(config_buffer); - std::unique_ptr response = make_unique_nothrow( + std::unique_ptr response = std::make_unique( const_cast(reinterpret_cast(config_buffer->data())), config_buffer->size()); VALIDATE_NOT_NULL(response, HAILO_OUT_OF_HOST_MEMORY); @@ -307,7 +287,7 @@ py::bytes DeviceWrapper::sensor_get_sections_info() auto buffer = device().sensor_get_sections_info(); VALIDATE_EXPECTED(buffer); - std::unique_ptr response = make_unique_nothrow( + std::unique_ptr response = std::make_unique( const_cast(reinterpret_cast(buffer->data())), buffer->size()); VALIDATE_NOT_NULL(response, HAILO_OUT_OF_HOST_MEMORY); diff --git a/hailort/libhailort/bindings/python/src/device_api.hpp b/hailort/libhailort/bindings/python/src/device_api.hpp index b216cce..d357316 100644 --- a/hailort/libhailort/bindings/python/src/device_api.hpp +++ b/hailort/libhailort/bindings/python/src/device_api.hpp @@ -12,10 +12,9 @@ #define _DEVICE_API_HPP_ #include "hailo/hailort.h" +#include #include "hailo/device.hpp" -#include "common/socket.hpp" - #include "utils.hpp" #include "hef_api.hpp" diff --git a/hailort/libhailort/bindings/python/src/hef_api.cpp b/hailort/libhailort/bindings/python/src/hef_api.cpp index f255768..5644f70 100644 --- a/hailort/libhailort/bindings/python/src/hef_api.cpp +++ b/hailort/libhailort/bindings/python/src/hef_api.cpp @@ -10,6 +10,7 @@ **/ #include "hef_api.hpp" +#include namespace hailort @@ -20,7 +21,7 @@ HefWrapper::HefWrapper(const std::string &hef_path) auto hef_expected = Hef::create(hef_path); VALIDATE_EXPECTED(hef_expected); - hef = make_unique_nothrow(hef_expected.release()); + hef = std::make_unique(hef_expected.release()); if (nullptr == hef) { THROW_STATUS_ERROR(HAILO_OUT_OF_HOST_MEMORY); } @@ -31,7 +32,7 @@ HefWrapper::HefWrapper(const MemoryView &hef_buffer) auto hef_expected = Hef::create(hef_buffer); VALIDATE_EXPECTED(hef_expected); - hef = make_unique_nothrow(hef_expected.release()); + hef = std::make_unique(hef_expected.release()); if (nullptr == hef) { THROW_STATUS_ERROR(HAILO_OUT_OF_HOST_MEMORY); } @@ -255,7 +256,11 @@ void HefWrapper::initialize_python_module(py::module &m) .def("get_networks_names", &HefWrapper::get_networks_names) ; - py::class_(m, "ConfiguredNetworkGroup") + py::class_>(m, "ConfiguredNetworkGroup") + .def("is_scheduled", [](ConfiguredNetworkGroup& self) + { + return self.is_scheduled(); + }) .def("get_name", [](ConfiguredNetworkGroup& self) { return self.name(); @@ -300,30 +305,18 @@ void HefWrapper::initialize_python_module(py::module &m) }) .def("before_fork", [](ConfiguredNetworkGroup& self) { -#ifdef HAILO_SUPPORT_MULTI_PROCESS auto status = self.before_fork(); VALIDATE_STATUS(status); -#else - (void)self; -#endif // HAILO_SUPPORT_MULTI_PROCESS }) .def("after_fork_in_parent", [](ConfiguredNetworkGroup& self) { -#ifdef HAILO_SUPPORT_MULTI_PROCESS auto status = self.after_fork_in_parent(); VALIDATE_STATUS(status); -#else - (void)self; -#endif // HAILO_SUPPORT_MULTI_PROCESS }) .def("after_fork_in_child", [](ConfiguredNetworkGroup& self) { -#ifdef HAILO_SUPPORT_MULTI_PROCESS auto status = self.after_fork_in_child(); VALIDATE_STATUS(status); -#else - (void)self; -#endif // HAILO_SUPPORT_MULTI_PROCESS }) .def("set_scheduler_timeout", [](ConfiguredNetworkGroup& self, int timeout, const std::string &network_name="") { @@ -341,6 +334,112 @@ void HefWrapper::initialize_python_module(py::module &m) auto status = self.set_scheduler_priority(priority); VALIDATE_STATUS(status); }) + .def("get_networks_names", [](ConfiguredNetworkGroup& self) + { + auto network_infos = self.get_network_infos(); + VALIDATE_EXPECTED(network_infos); + std::vector result; + result.reserve(network_infos->size()); + for (const auto &info : network_infos.value()) { + result.push_back(info.name); + } + return py::cast(result); + }) + .def("get_sorted_output_names", [](ConfiguredNetworkGroup& self) + { + auto names_list = self.get_sorted_output_names(); + VALIDATE_EXPECTED(names_list); + return py::cast(names_list.release()); + }) + .def("get_input_vstream_infos", [](ConfiguredNetworkGroup& self, const std::string &name) + { + auto result = self.get_input_vstream_infos(name); + VALIDATE_EXPECTED(result); + return py::cast(result.value()); + }) + .def("get_output_vstream_infos", [](ConfiguredNetworkGroup& self, const std::string &name) + { + auto result = self.get_output_vstream_infos(name); + VALIDATE_EXPECTED(result); + return py::cast(result.value()); + }) + .def("get_all_vstream_infos", [](ConfiguredNetworkGroup& self, const std::string &name) + { + auto result = self.get_all_vstream_infos(name); + VALIDATE_EXPECTED(result); + return py::cast(result.value()); + }) + .def("get_all_stream_infos", [](ConfiguredNetworkGroup& self, const std::string &name) + { + auto result = self.get_all_stream_infos(name); + VALIDATE_EXPECTED(result); + return py::cast(result.value()); + }) + .def("get_input_stream_infos", [](ConfiguredNetworkGroup& self, const std::string &name) + { + std::vector input_streams_infos; + auto all_streams = self.get_all_stream_infos(name); + VALIDATE_EXPECTED(all_streams); + for (auto &info : all_streams.value()) { + if (HAILO_H2D_STREAM == info.direction) { + input_streams_infos.push_back(std::move(info)); + } + } + return py::cast(input_streams_infos); + }) + .def("get_output_stream_infos", [](ConfiguredNetworkGroup& self, const std::string &name) + { + std::vector output_streams_infos; + auto all_streams = self.get_all_stream_infos(name); + VALIDATE_EXPECTED(all_streams); + for (auto &info : all_streams.value()) { + if (HAILO_D2H_STREAM == info.direction) { + output_streams_infos.push_back(std::move(info)); + } + } + return py::cast(output_streams_infos); + }) + .def("get_vstream_names_from_stream_name", [](ConfiguredNetworkGroup& self, const std::string &stream_name) + { + auto result = self.get_vstream_names_from_stream_name(stream_name); + VALIDATE_EXPECTED(result); + return py::cast(result.release()); + }) + .def("get_stream_names_from_vstream_name", [](ConfiguredNetworkGroup& self, const std::string &vstream_name) + { + auto result = self.get_stream_names_from_vstream_name(vstream_name); + VALIDATE_EXPECTED(result); + return py::cast(result.release()); + }) + .def("make_input_vstream_params", [](ConfiguredNetworkGroup& self, const std::string &name, bool quantized, hailo_format_type_t format_type, + uint32_t timeout_ms, uint32_t queue_size) + { + auto result = self.make_input_vstream_params(quantized, format_type, timeout_ms, queue_size, name); + VALIDATE_EXPECTED(result); + return py::cast(result.release()); + }) + .def("make_output_vstream_params", [](ConfiguredNetworkGroup& self, const std::string &name, bool quantized, hailo_format_type_t format_type, + uint32_t timeout_ms, uint32_t queue_size) + { + auto result = self.make_output_vstream_params(quantized, format_type, timeout_ms, queue_size, name); + VALIDATE_EXPECTED(result); + return py::cast(result.release()); + }) + .def(py::pickle( + [](const ConfiguredNetworkGroup &cng) { // __getstate__ + auto handle = cng.get_client_handle(); + VALIDATE_EXPECTED(handle); + return py::make_tuple(handle.value(), cng.name()); + }, + [](py::tuple t) { // __setstate__ + auto handle = t[0].cast(); + auto net_group_name = t[1].cast(); + auto net_group = ConfiguredNetworkGroup::duplicate_network_group_client(handle, net_group_name); + VALIDATE_EXPECTED(net_group); + + return net_group.value(); + } + )) ; ActivatedAppContextManagerWrapper::add_to_python_module(m); diff --git a/hailort/libhailort/bindings/python/src/hef_api.hpp b/hailort/libhailort/bindings/python/src/hef_api.hpp index 4de0a3c..87b4069 100644 --- a/hailort/libhailort/bindings/python/src/hef_api.hpp +++ b/hailort/libhailort/bindings/python/src/hef_api.hpp @@ -18,7 +18,6 @@ #include "vstream_api.hpp" #include "utils.hpp" -#include "common/logger_macros.hpp" #include #include diff --git a/hailort/libhailort/bindings/python/src/internal/pyhailort_internal.cpp b/hailort/libhailort/bindings/python/src/internal/pyhailort_internal.cpp index b81794f..fe7515d 100644 --- a/hailort/libhailort/bindings/python/src/internal/pyhailort_internal.cpp +++ b/hailort/libhailort/bindings/python/src/internal/pyhailort_internal.cpp @@ -173,12 +173,15 @@ py::array PyhailortInternal::get_yolov5_post_process_expected_buffer() auto buffer = get_expected_buffer_float32(); VALIDATE_EXPECTED(buffer); + auto type = py::dtype(HailoRTBindingsCommon::convert_format_type_to_string(HAILO_FORMAT_TYPE_FLOAT32)); + auto shape = *py::array::ShapeContainer({buffer->size()}); + // Note: The ownership of the buffer is transferred to Python wrapped as a py::array. // When the py::array isn't referenced anymore in Python and is destructed, the py::capsule's dtor // is called too (and it deletes the raw buffer) - auto type = py::dtype(HailoRTBindingsCommon::convert_format_type_to_string(HAILO_FORMAT_TYPE_FLOAT32)); - auto shape = *py::array::ShapeContainer({buffer->size()}); - const auto unmanaged_addr = buffer.release().release(); + auto unmanaged_addr_exp = buffer->storage().release(); + VALIDATE_EXPECTED(unmanaged_addr_exp); + const auto unmanaged_addr = unmanaged_addr_exp.release(); return py::array(type, shape, unmanaged_addr, py::capsule(unmanaged_addr, [](void *p) { delete reinterpret_cast(p); })); } @@ -277,7 +280,7 @@ py::list PyhailortInternal::get_all_layers_info(const HefWrapper &hef, const std auto core_op_metadata = hef.hef_ptr()->pimpl->get_core_op_metadata(net_group_name); VALIDATE_EXPECTED(core_op_metadata); - return py::cast(core_op_metadata->get_all_layer_infos()); + return py::cast(core_op_metadata.value()->get_all_layer_infos()); } PYBIND11_MODULE(_pyhailort_internal, m) { @@ -296,6 +299,13 @@ PYBIND11_MODULE(_pyhailort_internal, m) { .def_readonly("cluster_index", &BufferIndices::cluster_index) ; + py::enum_(m, "SensorConfigOpCode") + .value("SENSOR_CONFIG_OPCODES_WR", SENSOR_CONFIG_OPCODES_WR) + .value("SENSOR_CONFIG_OPCODES_RD", SENSOR_CONFIG_OPCODES_RD) + .value("SENSOR_CONFIG_OPCODES_RMW", SENSOR_CONFIG_OPCODES_RMW) + .value("SENSOR_CONFIG_OPCODES_DELAY", SENSOR_CONFIG_OPCODES_DELAY) + ; + py::class_(m, "HailoLayerInfo", py::module_local()) .def_readonly("is_mux", &LayerInfo::is_mux) .def_readonly("mux_predecessors", &LayerInfo::predecessor) diff --git a/hailort/libhailort/bindings/python/src/net_flow_api.hpp b/hailort/libhailort/bindings/python/src/net_flow_api.hpp deleted file mode 100644 index df29b36..0000000 --- a/hailort/libhailort/bindings/python/src/net_flow_api.hpp +++ /dev/null @@ -1,132 +0,0 @@ -/** - * Copyright (c) 2022 Hailo Technologies Ltd. All rights reserved. - * Distributed under the MIT license (https://opensource.org/licenses/MIT) -**/ -/** - * @file net_flow_api.hpp - * @brief Defines binding to a HailoRT++ ops usage over Python. - **/ - -#ifndef _HAILO_NET_FLOW_API_HPP_ -#define _HAILO_NET_FLOW_API_HPP_ - -#include "hailo/hailort.h" - -#include "net_flow/ops/yolo_post_process.hpp" - -#include "utils.hpp" -#include "bindings_common.hpp" - - -namespace hailort -{ -namespace net_flow -{ - -class YOLOv5PostProcessOpWrapper -{ -public: - static YOLOv5PostProcessOpWrapper create(const std::vector> &anchors, - const std::vector &shapes, const std::vector &formats, - const std::vector &quant_infos, float32_t image_height, float32_t image_width, float32_t confidence_threshold, - float32_t iou_threshold, uint32_t num_of_classes, uint32_t max_boxes, - bool cross_classes=true) - { - std::map inputs_metadata; - std::map outputs_metadata; - - net_flow::NmsPostProcessConfig nms_post_process_config{}; - nms_post_process_config.nms_score_th = confidence_threshold; - nms_post_process_config.nms_iou_th = iou_threshold; - nms_post_process_config.max_proposals_per_class = max_boxes; - nms_post_process_config.classes = num_of_classes; - nms_post_process_config.background_removal = false; - nms_post_process_config.background_removal_index = 0; - nms_post_process_config.cross_classes = cross_classes; - net_flow::YoloPostProcessConfig yolo_post_process_config{}; - yolo_post_process_config.image_height = image_height; - yolo_post_process_config.image_width = image_width; - // Each layer anchors vector is structured as {w,h} pairs. - for (size_t i = 0; i < anchors.size(); ++i) { - auto name = std::to_string(i); - yolo_post_process_config.anchors.insert({name, anchors[i]}); - BufferMetaData input_metadata = { - shapes[i], - shapes[i], - formats[i], - quant_infos[i] - }; - inputs_metadata.insert({name, input_metadata}); - } - auto op = YOLOv5PostProcessOp::create(inputs_metadata, outputs_metadata, nms_post_process_config, yolo_post_process_config); - VALIDATE_EXPECTED(op); - - return YOLOv5PostProcessOpWrapper(op.release(), num_of_classes, max_boxes); - } - - static void add_to_python_module(py::module &m) - { - py::class_(m, "YOLOv5PostProcessOp") - .def("create", &YOLOv5PostProcessOpWrapper::create) - .def("execute",[](YOLOv5PostProcessOpWrapper &self, const std::vector &tensors) - { - std::map data_views; - for (size_t i = 0; i < tensors.size(); ++i) { - data_views.insert({std::to_string(i), - MemoryView(const_cast(reinterpret_cast(tensors[i].data())), tensors[i].nbytes())}); - } - - hailo_nms_info_t nms_info = { - self.m_num_of_classes, - self.m_max_boxes, - sizeof(hailo_bbox_float32_t), - 1, - false, - hailo_nms_defuse_info_t() - }; - hailo_format_t output_format = { - HAILO_FORMAT_TYPE_FLOAT32, - HAILO_FORMAT_ORDER_HAILO_NMS, - HAILO_FORMAT_FLAGS_QUANTIZED, - }; - - auto buffer = Buffer::create(HailoRTCommon::get_nms_host_frame_size(nms_info, output_format), 0); - VALIDATE_STATUS(buffer.status()); - std::map outputs; - outputs.insert({"", MemoryView(buffer.value().data(), buffer.value().size())}); - auto status = self.m_post_processing_op->execute(data_views, outputs); - VALIDATE_STATUS(status); - - // Note: The ownership of the buffer is transferred to Python wrapped as a py::array. - // When the py::array isn't referenced anymore in Python and is destructed, the py::capsule's dtor - // is called too (and it deletes the raw buffer) - auto type = py::dtype(HailoRTBindingsCommon::convert_format_type_to_string(HAILO_FORMAT_TYPE_FLOAT32)); - auto shape = *py::array::ShapeContainer({buffer.value().size()}); - const auto unmanaged_addr = buffer.release().release(); - return py::array(type, shape, unmanaged_addr, - py::capsule(unmanaged_addr, [](void *p) { delete reinterpret_cast(p); })); - }) - ; - } - -private: - YOLOv5PostProcessOpWrapper(std::shared_ptr post_processing_op, uint32_t num_of_classes, uint32_t max_bboxes) - : m_post_processing_op(post_processing_op), - m_num_of_classes(num_of_classes), - m_max_boxes(max_bboxes) {} - - std::shared_ptr m_post_processing_op; - uint32_t m_num_of_classes = 0; - uint32_t m_max_boxes = 0; -}; - -void NetFlow_api_initialize_python_module(py::module &m) -{ - YOLOv5PostProcessOpWrapper::add_to_python_module(m); -} - - -} /* namespace net_flow */ -} /* namespace hailort */ - -#endif /* _HAILO_NET_FLOW_API_HPP_ */ diff --git a/hailort/libhailort/bindings/python/src/pyhailort.cpp b/hailort/libhailort/bindings/python/src/pyhailort.cpp index 2890c91..4cca840 100644 --- a/hailort/libhailort/bindings/python/src/pyhailort.cpp +++ b/hailort/libhailort/bindings/python/src/pyhailort.cpp @@ -10,23 +10,24 @@ using namespace std; #include "hailo/hailort.h" #include "hailo/hailort_defaults.hpp" +#include "hailo/network_rate_calculator.hpp" #include "hef_api.hpp" #include "vstream_api.hpp" #include "vdevice_api.hpp" #include "device_api.hpp" #include "quantization_api.hpp" -#include "net_flow_api.hpp" #include "utils.hpp" -#include "utils.h" #include "bindings_common.hpp" -#include "sensor_config_exports.h" -#if defined(__GNUC__) -#include "common/os/posix/traffic_control.hpp" -#endif +// should be same as socket.hpp +#define PADDING_BYTES_SIZE (6) +#define PADDING_ALIGN_BYTES (8 - PADDING_BYTES_SIZE) +#define MIN_UDP_PAYLOAD_SIZE (24) +#define MAX_UDP_PAYLOAD_SIZE (1456) +#define MAX_UDP_PADDED_PAYLOAD_SIZE (MAX_UDP_PAYLOAD_SIZE - PADDING_BYTES_SIZE - PADDING_ALIGN_BYTES) namespace hailort { @@ -102,36 +103,22 @@ std::string get_status_message(uint32_t status_in) } } -#if defined(__GNUC__) - -class TrafficControlUtilWrapper final +class NetworkRateLimiter final { public: - static TrafficControlUtilWrapper create(const std::string &ip, uint16_t port, uint32_t rate_bytes_per_sec) + static void set_rate_limit(const std::string &ip, uint16_t port, uint32_t rate_bytes_per_sec) { - auto tc_expected = TrafficControlUtil::create(ip, port, rate_bytes_per_sec); - VALIDATE_STATUS(tc_expected.status()); - - auto tc_ptr = make_unique_nothrow(tc_expected.release()); - if (nullptr == tc_ptr) { - VALIDATE_STATUS(HAILO_OUT_OF_HOST_MEMORY); - } - return TrafficControlUtilWrapper(std::move(tc_ptr)); + VALIDATE_STATUS(NetworkUdpRateCalculator::set_rate_limit(ip, port, rate_bytes_per_sec)); } - void set_rate_limit() + static void reset_rate_limit(const std::string &ip, uint16_t port) { - VALIDATE_STATUS(m_tc->set_rate_limit()); - } - - void reset_rate_limit() - { - VALIDATE_STATUS(m_tc->reset_rate_limit()); + VALIDATE_STATUS(NetworkUdpRateCalculator::reset_rate_limit(ip, port)); } static std::string get_interface_name(const std::string &ip) { - auto name = TrafficControlUtil::get_interface_name(ip); + auto name = NetworkUdpRateCalculator::get_interface_name(ip); VALIDATE_STATUS(name.status()); return name.value(); @@ -139,26 +126,16 @@ public: static void add_to_python_module(py::module &m) { - py::class_(m, "TrafficControlUtil") - .def(py::init(&TrafficControlUtilWrapper::create)) - .def("set_rate_limit", &TrafficControlUtilWrapper::set_rate_limit) - .def("reset_rate_limit", &TrafficControlUtilWrapper::reset_rate_limit) + py::class_(m, "NetworkRateLimiter") + .def("set_rate_limit", &NetworkRateLimiter::set_rate_limit) + .def("reset_rate_limit", &NetworkRateLimiter::reset_rate_limit) .def_static("get_interface_name", [](const std::string &ip) { - return TrafficControlUtilWrapper::get_interface_name(ip); + return NetworkRateLimiter::get_interface_name(ip); }) ; } - -private: - TrafficControlUtilWrapper(std::unique_ptr tc) : - m_tc(std::move(tc)) - {} - - std::unique_ptr m_tc; }; -#endif - static void validate_versions_match() { hailo_version_t libhailort_version = {}; @@ -437,13 +414,6 @@ PYBIND11_MODULE(_pyhailort, m) { .value("HAILO8_ISP", HAILO_SENSOR_TYPES_HAILO8_ISP) ; - py::enum_(m, "SensorConfigOpCode") - .value("SENSOR_CONFIG_OPCODES_WR", SENSOR_CONFIG_OPCODES_WR) - .value("SENSOR_CONFIG_OPCODES_RD", SENSOR_CONFIG_OPCODES_RD) - .value("SENSOR_CONFIG_OPCODES_RMW", SENSOR_CONFIG_OPCODES_RMW) - .value("SENSOR_CONFIG_OPCODES_DELAY", SENSOR_CONFIG_OPCODES_DELAY) - ; - py::class_(m, "I2CSlaveConfig") .def(py::init<>()) .def_readwrite("endianness", &hailo_i2c_slave_config_t::endianness) @@ -755,11 +725,45 @@ PYBIND11_MODULE(_pyhailort, m) { .value("MIPI", HAILO_STREAM_INTERFACE_MIPI) ; + py::enum_(m, "VStreamStatsFlags") + .value("NONE", hailo_vstream_stats_flags_t::HAILO_VSTREAM_STATS_NONE) + .value("MEASURE_FPS", hailo_vstream_stats_flags_t::HAILO_VSTREAM_STATS_MEASURE_FPS) + .value("MEASURE_LATENCY", hailo_vstream_stats_flags_t::HAILO_VSTREAM_STATS_MEASURE_LATENCY) + ; + + py::enum_(m, "PipelineElemStatsFlags") + .value("NONE", hailo_pipeline_elem_stats_flags_t::HAILO_PIPELINE_ELEM_STATS_NONE) + .value("MEASURE_FPS", hailo_pipeline_elem_stats_flags_t::HAILO_PIPELINE_ELEM_STATS_MEASURE_FPS) + .value("MEASURE_LATENCY", hailo_pipeline_elem_stats_flags_t::HAILO_PIPELINE_ELEM_STATS_MEASURE_LATENCY) + .value("MEASURE_QUEUE_SIZE", hailo_pipeline_elem_stats_flags_t::HAILO_PIPELINE_ELEM_STATS_MEASURE_QUEUE_SIZE) + ; + py::class_(m, "VStreamParams") .def(py::init<>()) .def_readwrite("user_buffer_format", &hailo_vstream_params_t::user_buffer_format) .def_readwrite("timeout_ms", &hailo_vstream_params_t::timeout_ms) .def_readwrite("queue_size", &hailo_vstream_params_t::queue_size) + .def_readonly("vstream_stats_flags", &hailo_vstream_params_t::vstream_stats_flags) + .def_readonly("pipeline_elements_stats_flags", &hailo_vstream_params_t::pipeline_elements_stats_flags) + .def(py::pickle( + [](const hailo_vstream_params_t &vstream_params) { // __getstate__ + return py::make_tuple( + vstream_params.user_buffer_format, + vstream_params.timeout_ms, + vstream_params.queue_size, + vstream_params.vstream_stats_flags, + vstream_params.pipeline_elements_stats_flags); + }, + [](py::tuple t) { // __setstate__ + hailo_vstream_params_t vstream_params; + vstream_params.user_buffer_format = t[0].cast(); + vstream_params.timeout_ms = t[1].cast(); + vstream_params.queue_size = t[2].cast(); + vstream_params.vstream_stats_flags = t[3].cast(); + vstream_params.pipeline_elements_stats_flags = t[4].cast(); + return vstream_params; + } + )) ; py::enum_(m, "LatencyMeasurementFlags") @@ -794,7 +798,7 @@ PYBIND11_MODULE(_pyhailort, m) { }, [](VDeviceParamsWrapper& params, const uint32_t& device_count) { params.orig_params.device_count = device_count; - } + } ) .def_property("scheduling_algorithm", [](const VDeviceParamsWrapper& params) -> uint32_t { @@ -802,7 +806,8 @@ PYBIND11_MODULE(_pyhailort, m) { }, [](VDeviceParamsWrapper& params, hailo_scheduling_algorithm_t scheduling_algorithm) { params.orig_params.scheduling_algorithm = scheduling_algorithm; - } + params.orig_params.multi_process_service = (HAILO_SCHEDULING_ALGORITHM_NONE != scheduling_algorithm); + } ) .def_property("group_id", [](const VDeviceParamsWrapper& params) -> py::str { @@ -813,12 +818,9 @@ PYBIND11_MODULE(_pyhailort, m) { params.orig_params.group_id = params.group_id_str.c_str(); } ) - .def_property("multi_process_service", - [](const VDeviceParamsWrapper& params) -> uint32_t { + .def_property_readonly("multi_process_service", + [](const VDeviceParamsWrapper& params) -> bool { return params.orig_params.multi_process_service; - }, - [](VDeviceParamsWrapper& params, bool multi_process_service) { - params.orig_params.multi_process_service = multi_process_service; } ) .def_static("default", []() { @@ -1103,11 +1105,8 @@ PYBIND11_MODULE(_pyhailort, m) { VStream_api_initialize_python_module(m); VDevice_api_initialize_python_module(m); DeviceWrapper::add_to_python_module(m); - hailort::net_flow::NetFlow_api_initialize_python_module(m); - #if defined(__GNUC__) - TrafficControlUtilWrapper::add_to_python_module(m); - #endif + NetworkRateLimiter::add_to_python_module(m); std::stringstream version; version << HAILORT_MAJOR_VERSION << "." << HAILORT_MINOR_VERSION << "." << HAILORT_REVISION_VERSION; diff --git a/hailort/libhailort/bindings/python/src/quantization_api.cpp b/hailort/libhailort/bindings/python/src/quantization_api.cpp index 9d6c8cd..893afb8 100644 --- a/hailort/libhailort/bindings/python/src/quantization_api.cpp +++ b/hailort/libhailort/bindings/python/src/quantization_api.cpp @@ -12,6 +12,8 @@ #include "quantization_api.hpp" #include "bindings_common.hpp" +#include + namespace hailort { @@ -32,8 +34,7 @@ void QuantizationBindings::dequantize_output_buffer_from_uint8(py::array src_buf static_cast(dst_buffer.mutable_data()), shape_size, quant_info); break; default: - LOGGER__ERROR("Output quantization isn't supported from src format type uint8 to dst format type = {}", - HailoRTBindingsCommon::convert_format_type_to_string(dst_dtype)); + std::cerr << "Output quantization isn't supported from src format type uint8 to dst format type = " << HailoRTBindingsCommon::convert_format_type_to_string(dst_dtype); THROW_STATUS_ERROR(HAILO_INVALID_ARGUMENT); break; } @@ -52,8 +53,7 @@ void QuantizationBindings::dequantize_output_buffer_from_uint16(py::array src_bu static_cast(dst_buffer.mutable_data()), shape_size, quant_info); break; default: - LOGGER__ERROR("Output quantization isn't supported from src dormat type uint16 to dst format type = {}", - HailoRTBindingsCommon::convert_format_type_to_string(dst_dtype)); + std::cerr << "Output quantization isn't supported from src dormat type uint16 to dst format type = " << HailoRTBindingsCommon::convert_format_type_to_string(dst_dtype); THROW_STATUS_ERROR(HAILO_INVALID_ARGUMENT); break; } @@ -68,8 +68,7 @@ void QuantizationBindings::dequantize_output_buffer_from_float32(py::array src_b static_cast(dst_buffer.mutable_data()), shape_size, quant_info); break; default: - LOGGER__ERROR("Output quantization isn't supported from src format type float32 to dst format type = {}", - HailoRTBindingsCommon::convert_format_type_to_string(dst_dtype)); + std::cerr << "Output quantization isn't supported from src format type float32 to dst format type = " << HailoRTBindingsCommon::convert_format_type_to_string(dst_dtype); THROW_STATUS_ERROR(HAILO_INVALID_ARGUMENT); break; } @@ -92,8 +91,7 @@ void QuantizationBindings::dequantize_output_buffer_from_uint8_in_place(py::arra static_cast(dst_buffer.mutable_data()), shape_size, quant_info); break; default: - LOGGER__ERROR("Output quantization isn't supported from src format type uint8 to dst format type = {}", - HailoRTBindingsCommon::convert_format_type_to_string(dst_dtype)); + std::cerr << "Output quantization isn't supported from src format type uint8 to dst format type = " << HailoRTBindingsCommon::convert_format_type_to_string(dst_dtype); THROW_STATUS_ERROR(HAILO_INVALID_ARGUMENT); break; } @@ -112,8 +110,7 @@ void QuantizationBindings::dequantize_output_buffer_from_uint16_in_place(py::arr static_cast(dst_buffer.mutable_data()), shape_size, quant_info); break; default: - LOGGER__ERROR("Output quantization isn't supported from src dormat type uint16 to dst format type = {}", - HailoRTBindingsCommon::convert_format_type_to_string(dst_dtype)); + std::cerr << "Output quantization isn't supported from src dormat type uint16 to dst format type = " << HailoRTBindingsCommon::convert_format_type_to_string(dst_dtype); THROW_STATUS_ERROR(HAILO_INVALID_ARGUMENT); break; } @@ -128,8 +125,7 @@ void QuantizationBindings::dequantize_output_buffer_from_float32_in_place(py::ar static_cast(dst_buffer.mutable_data()), shape_size, quant_info); break; default: - LOGGER__ERROR("Output quantization isn't supported from src format type float32 to dst format type = {}", - HailoRTBindingsCommon::convert_format_type_to_string(dst_dtype)); + std::cerr << "Output quantization isn't supported from src format type float32 to dst format type = " << HailoRTBindingsCommon::convert_format_type_to_string(dst_dtype); THROW_STATUS_ERROR(HAILO_INVALID_ARGUMENT); break; } @@ -149,7 +145,7 @@ void QuantizationBindings::dequantize_output_buffer_in_place(py::array dst_buffe QuantizationBindings::dequantize_output_buffer_from_float32_in_place(dst_buffer, dst_dtype, shape_size, quant_info); break; default: - LOGGER__ERROR("Unsupported src format type = {}", HailoRTBindingsCommon::convert_format_type_to_string(dst_dtype)); + std::cerr << "Unsupported src format type = " << HailoRTBindingsCommon::convert_format_type_to_string(dst_dtype); THROW_STATUS_ERROR(HAILO_INVALID_ARGUMENT); break; } @@ -169,7 +165,7 @@ void QuantizationBindings::dequantize_output_buffer(py::array src_buffer, py::ar QuantizationBindings::dequantize_output_buffer_from_float32(src_buffer, dst_buffer, dst_dtype, shape_size, quant_info); break; default: - LOGGER__ERROR("Unsupported src format type = {}", HailoRTBindingsCommon::convert_format_type_to_string(dst_dtype)); + std::cerr << "Unsupported src format type = " << HailoRTBindingsCommon::convert_format_type_to_string(dst_dtype); THROW_STATUS_ERROR(HAILO_INVALID_ARGUMENT); break; } @@ -184,7 +180,7 @@ void QuantizationBindings::quantize_input_buffer_from_uint8(py::array src_buffer static_cast(dst_buffer.mutable_data()), shape_size, quant_info); break; default: - LOGGER__ERROR("Input quantization isn't supported from src format type uint8 to dst format type = {}", HailoRTBindingsCommon::convert_format_type_to_string(dst_dtype)); + std::cerr << "Input quantization isn't supported from src format type uint8 to dst format type = " << HailoRTBindingsCommon::convert_format_type_to_string(dst_dtype); THROW_STATUS_ERROR(HAILO_INVALID_ARGUMENT); break; } @@ -203,8 +199,7 @@ void QuantizationBindings::quantize_input_buffer_from_uint16(py::array src_buffe static_cast(dst_buffer.mutable_data()), shape_size, quant_info); break; default: - LOGGER__ERROR("Input quantization isn't supported from src format type uint16 to dst format type = {}", - HailoRTBindingsCommon::convert_format_type_to_string(dst_dtype)); + std::cerr << "Input quantization isn't supported from src format type uint16 to dst format type = " << HailoRTBindingsCommon::convert_format_type_to_string(dst_dtype); THROW_STATUS_ERROR(HAILO_INVALID_ARGUMENT); break; } @@ -223,8 +218,8 @@ void QuantizationBindings::quantize_input_buffer_from_float32(py::array src_buff static_cast(dst_buffer.mutable_data()), shape_size, quant_info); break; default: - LOGGER__ERROR("Input quantization isn't supported from src format type float32 to dst format type = {}", - HailoRTBindingsCommon::convert_format_type_to_string(dst_dtype)); + std::cerr << "Input quantization isn't supported from src format type float32 to dst format type = " << + HailoRTBindingsCommon::convert_format_type_to_string(dst_dtype); THROW_STATUS_ERROR(HAILO_INVALID_ARGUMENT); break; } @@ -244,7 +239,7 @@ void QuantizationBindings::quantize_input_buffer(py::array src_buffer, py::array QuantizationBindings::quantize_input_buffer_from_float32(src_buffer, dst_buffer, dst_dtype, shape_size, quant_info); break; default: - LOGGER__ERROR("Input quantization isn't supported for src format type = {}", HailoRTBindingsCommon::convert_format_type_to_string(dst_dtype)); + std::cerr << "Input quantization isn't supported for src format type = " << HailoRTBindingsCommon::convert_format_type_to_string(dst_dtype); THROW_STATUS_ERROR(HAILO_INVALID_ARGUMENT); break; } diff --git a/hailort/libhailort/bindings/python/src/utils.hpp b/hailort/libhailort/bindings/python/src/utils.hpp index e5356e1..da26368 100644 --- a/hailort/libhailort/bindings/python/src/utils.hpp +++ b/hailort/libhailort/bindings/python/src/utils.hpp @@ -68,11 +68,11 @@ class HailoRTStatusException : public HailoRTException { [](hailo_stream_parameters_t& self) -> const __property_type& \ { \ if (__interface_value != self.stream_interface) { \ - LOGGER__ERROR("Stream params interface is not {}.", #__interface_value); \ + std::cerr << "Stream params interface is not " << __interface_value << "."; \ THROW_STATUS_ERROR(HAILO_INVALID_OPERATION); \ } \ if (__direction_value != self.direction) { \ - LOGGER__ERROR("Stream params direction is not {}.", #__direction_value); \ + std::cerr << "Stream params direction is not " << __direction_value << "."; \ THROW_STATUS_ERROR(HAILO_INVALID_OPERATION); \ } \ return self.__property_name; \ @@ -80,11 +80,11 @@ class HailoRTStatusException : public HailoRTException { [](hailo_stream_parameters_t& self, const __property_type& value) \ { \ if (__interface_value != self.stream_interface) { \ - LOGGER__ERROR("Stream params interface is not {}.", #__interface_value); \ + std::cerr << "Stream params interface is not " << __interface_value << "."; \ THROW_STATUS_ERROR(HAILO_INVALID_OPERATION); \ } \ if (__direction_value != self.direction) { \ - LOGGER__ERROR("Stream params direction is not {}.", #__direction_value); \ + std::cerr << "Stream params direction is not " << __direction_value << "."; \ THROW_STATUS_ERROR(HAILO_INVALID_OPERATION); \ } \ self.__property_name = value; \ diff --git a/hailort/libhailort/bindings/python/src/vdevice_api.hpp b/hailort/libhailort/bindings/python/src/vdevice_api.hpp index 02a7ee6..eae48d7 100644 --- a/hailort/libhailort/bindings/python/src/vdevice_api.hpp +++ b/hailort/libhailort/bindings/python/src/vdevice_api.hpp @@ -16,14 +16,8 @@ #include "hailo/vdevice.hpp" #include "hailo/hailort_common.hpp" -#include "common/logger_macros.hpp" - -#ifdef HAILO_SUPPORT_MULTI_PROCESS -#include "service/rpc_client_utils.hpp" -#endif // HAILO_SUPPORT_MULTI_PROCESS - #include "utils.hpp" - +#include #include #include #include @@ -57,7 +51,7 @@ public: static VDeviceWrapper create(const VDeviceParamsWrapper ¶ms, const std::vector &device_ids) { if (params.orig_params.device_ids != nullptr && (!device_ids.empty())) { - LOGGER__ERROR("VDevice device_ids can be set in params or device_ids argument. Both parameters were passed to the c'tor"); + std::cerr << "VDevice device_ids can be set in params or device_ids argument. Both parameters were passed to the c'tor"; throw HailoRTStatusException(std::to_string(HAILO_INVALID_OPERATION)); } auto modified_params = params; @@ -124,32 +118,26 @@ public: void before_fork() { -#ifdef HAILO_SUPPORT_MULTI_PROCESS if (m_vdevice != nullptr) { auto status = m_vdevice->before_fork(); VALIDATE_STATUS(status); } -#endif // HAILO_SUPPORT_MULTI_PROCESS } void after_fork_in_parent() { -#ifdef HAILO_SUPPORT_MULTI_PROCESS if (m_vdevice != nullptr) { auto status = m_vdevice->after_fork_in_parent(); VALIDATE_STATUS(status); } -#endif // HAILO_SUPPORT_MULTI_PROCESS } void after_fork_in_child() { -#ifdef HAILO_SUPPORT_MULTI_PROCESS if (m_vdevice != nullptr) { auto status = m_vdevice->after_fork_in_child(); VALIDATE_STATUS(status); } -#endif // HAILO_SUPPORT_MULTI_PROCESS } private: diff --git a/hailort/libhailort/bindings/python/src/vstream_api.cpp b/hailort/libhailort/bindings/python/src/vstream_api.cpp index a1b651b..a17b6fd 100644 --- a/hailort/libhailort/bindings/python/src/vstream_api.cpp +++ b/hailort/libhailort/bindings/python/src/vstream_api.cpp @@ -7,12 +7,10 @@ * @brief Implementation of binding to virtual stream usage over Python. **/ -#include "common/logger_macros.hpp" -#include "common/utils.hpp" - #include "vstream_api.hpp" #include "bindings_common.hpp" #include "utils.hpp" +#include namespace hailort @@ -87,7 +85,7 @@ InputVStreamsWrapper InputVStreamsWrapper::create(ConfiguredNetworkGroup &net_gr std::unordered_map> input_vstreams; for (auto &input : input_vstreams_expected.value()) { auto input_name = input.name(); - input_vstreams.emplace(input_name, make_shared_nothrow(std::move(input))); + input_vstreams.emplace(input_name, std::make_unique(std::move(input))); } return InputVStreamsWrapper(input_vstreams); } @@ -106,7 +104,7 @@ std::shared_ptr InputVStreamsWrapper::get_input_by_name(const std: { auto input = m_input_vstreams.find(name); if (m_input_vstreams.end() == input) { - LOGGER__ERROR("Input virtual stream for name={} not found", name); + std::cerr << "Input virtual stream for name=" << name << " not found"; THROW_STATUS_ERROR(HAILO_NOT_FOUND); } @@ -210,7 +208,9 @@ void OutputVStreamWrapper::add_to_python_module(py::module &m) // Note: The ownership of the buffer is transferred to Python wrapped as a py::array. // When the py::array isn't referenced anymore in Python and is destructed, the py::capsule's dtor // is called too (and it deletes the raw buffer) - const auto unmanaged_addr = buffer.release().release(); + auto unmanaged_addr_exp = buffer->storage().release(); + VALIDATE_EXPECTED(unmanaged_addr_exp); + const auto unmanaged_addr = unmanaged_addr_exp.release(); return py::array(get_dtype(self), get_shape(self), unmanaged_addr, py::capsule(unmanaged_addr, [](void *p) { delete reinterpret_cast(p); })); }) @@ -263,7 +263,7 @@ OutputVStreamsWrapper OutputVStreamsWrapper::create(ConfiguredNetworkGroup &net_ std::unordered_map> output_vstreams; for (auto &output : output_vstreams_expected.value()) { auto output_name = output.name(); - output_vstreams.emplace(output_name, make_shared_nothrow(std::move(output))); + output_vstreams.emplace(output_name, std::make_unique(std::move(output))); } return OutputVStreamsWrapper(output_vstreams); } @@ -272,7 +272,7 @@ std::shared_ptr OutputVStreamsWrapper::get_output_by_name(const s { auto output = m_output_vstreams.find(name); if (m_output_vstreams.end() == output) { - LOGGER__ERROR("Output virtual stream for name={} not found", name); + std::cerr << "Output virtual stream for name=" << name << " not found"; THROW_STATUS_ERROR(HAILO_NOT_FOUND); } @@ -361,7 +361,7 @@ InferVStreamsWrapper InferVStreamsWrapper::create(ConfiguredNetworkGroup &networ { auto infer_pipeline = InferVStreams::create(network_group, input_vstreams_params, output_vstreams_params); VALIDATE_EXPECTED(infer_pipeline); - auto infer_vstream_ptr = make_shared_nothrow(std::move(infer_pipeline.value())); + auto infer_vstream_ptr = std::make_shared(std::move(infer_pipeline.value())); return InferVStreamsWrapper(infer_vstream_ptr); } @@ -426,7 +426,7 @@ std::vector InferVStreamsWrapper::get_shape(const std::string &stream_na return HailoRTBindingsCommon::get_pybind_shape(output->get().get_info(), output->get().get_user_buffer_format()); } - LOGGER__ERROR("Stream {} not found", stream_name); + std::cerr << "Stream " << stream_name << " not found"; THROW_STATUS_ERROR(HAILO_NOT_FOUND); } diff --git a/hailort/libhailort/bindings/python/src/vstream_api.hpp b/hailort/libhailort/bindings/python/src/vstream_api.hpp index 83f3bc3..d39c11e 100644 --- a/hailort/libhailort/bindings/python/src/vstream_api.hpp +++ b/hailort/libhailort/bindings/python/src/vstream_api.hpp @@ -10,8 +10,6 @@ #ifndef _VSTREAM_API_HPP_ #define _VSTREAM_API_HPP_ -#include "common/logger_macros.hpp" -#include "common/utils.hpp" #include "hailo/vstream.hpp" #include "hailo/inference_pipeline.hpp" #include "utils.hpp" diff --git a/hailort/libhailort/cmake/toolchains/toolchains.yaml b/hailort/libhailort/cmake/toolchains/toolchains.yaml index 6218084..1f142d3 100644 --- a/hailort/libhailort/cmake/toolchains/toolchains.yaml +++ b/hailort/libhailort/cmake/toolchains/toolchains.yaml @@ -19,15 +19,15 @@ python_versions: - version: '3.8' installation: manual - package_name: https://launchpad.net/ubuntu/+source/python3.8/3.8.2-1ubuntu1/+build/18834117/+files/libpython3.8-dev_3.8.2-1ubuntu1_arm64.deb + package_name: https://launchpad.net/ubuntu/+source/python3.8/3.8.2-1ubuntu1/+build/18834117/+files/libpython3.8-dev_3.8.2-1ubuntu1_arm64.deb package_dest: /usr/include/aarch64-linux-gnu - version: '3.9' installation: manual - package_name: https://launchpad.net/~deadsnakes/+archive/ubuntu/ppa/+build/24906233/+files/libpython3.9-dev_3.9.16-1+bionic1_arm64.deb + package_name: https://launchpad.net/~deadsnakes/+archive/ubuntu/ppa/+build/26280901/+files/libpython3.9-dev_3.9.17-1+focal1_arm64.deb package_dest: /usr/include/aarch64-linux-gnu - version: '3.10' installation: manual - package_name: https://launchpadlibrarian.net/569418529/libpython3.10-dev_3.10.0-5_arm64.deb + package_name: https://launchpadlibrarian.net/569418529/libpython3.10-dev_3.10.0-5_arm64.deb package_dest: /usr/include/aarch64-linux-gnu - name: linux.armv7l required_packages: diff --git a/hailort/libhailort/examples/CMakeLists.txt b/hailort/libhailort/examples/CMakeLists.txt index b0cfdda..ae2aacd 100644 --- a/hailort/libhailort/examples/CMakeLists.txt +++ b/hailort/libhailort/examples/CMakeLists.txt @@ -2,9 +2,33 @@ cmake_minimum_required(VERSION 3.0.0) project(hailort-examples) +if(WIN32) + add_compile_options(/W4) +elseif(UNIX) + if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU" OR CMAKE_CXX_COMPILER_ID STREQUAL "QCC") + add_compile_options(-Wall -Wextra -Wconversion) + elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Clang") + add_compile_options(-Wall -Wextra -Wconversion -Wno-missing-braces) + endif() +else() + message(FATAL_ERROR "Unexpeced host, stopping build") +endif() + +if (HAILO_COMPILE_WARNING_AS_ERROR) + # Treat warnings as errors for all examples + if(WIN32) + add_compile_options(/WX) + elseif(UNIX) + add_compile_options(-Werror) + else() + message(FATAL_ERROR "Unexpeced host, stopping build") + endif() +endif() + add_subdirectory(cpp) add_subdirectory(c) # We add a costum target in order to compile all of the hailort examples add_custom_target(hailort_examples) -add_dependencies(hailort_examples c_hailort_examples cpp_hailort_examples) \ No newline at end of file + +add_dependencies(hailort_examples c_hailort_examples cpp_hailort_examples) diff --git a/hailort/libhailort/examples/README.md b/hailort/libhailort/examples/README.md index 04dd107..dd15bce 100644 --- a/hailort/libhailort/examples/README.md +++ b/hailort/libhailort/examples/README.md @@ -26,6 +26,11 @@ The following examples are provided, demonstrating the HailoRT API: - this example uses udp device. - `raw_streams_example` - Basic inference of a shortcut network using raw stream api. - The data is transformed before sent and after received in the same thread sending/receiving using the transformation api. + - `raw_async_streams_single_thread_example` - Basic inference of a shortcut network using raw stream async api with + a single thread. + - Each async read operation will re-launch some new async read operation. + - Each async write operation will re-launch some new async write operation. + - The main thread will stop the async operations by deactivating the network group. - `notification_callback_example` - Demonstrates how to work with notification callbacks. - C++ examples: @@ -38,9 +43,19 @@ The following examples are provided, demonstrating the HailoRT API: - `infer_pipeline_example` - Basic inference of a shortcut network using inference pipeline (blocking) api. - same as `infer_pipeline_example` C example, uses HailoRT C++ api. - `raw_streams_example` - Basic inference of a shortcut network, same as `raw_streams_example` C example, uses HailoRT C++ api. - - `multi_process_example` - Demonstrates how to work with HailoRT as a service and using the HailoRT Model Scheduler for network groups switching. + - `raw_async_streams_single_thread_example` - Basic inference of a shortcut network using raw stream async api with + a single thread. + - Each async read operation will re-launch some new async read operation. + - Each async write operation will re-launch some new async write operation. + - The main thread will stop the async operations by deactivating the network group. + - `raw_async_streams_multi_thread_example` - Basic inference of a shortcut network using raw stream async api with + a thread for each stream. + - The threads will continuously initiate an async read or write operations. + - The main thread will stop the async operations and the threads by deactivating the network group. + - `multi_process_example` - Demonstrates how to work with HailoRT multi-process service and using the HailoRT Model Scheduler for network groups switching. Using the script `multi_process_example.sh` one can specify the number of processes to run each hef, see `multi_process_example.sh -h` for more information. - `notification_callback_example` - Demonstrates how to work with notification callbacks, same as `notification_callback_example` C example. +You can find more details about each example in the HailoRT user guide. ## Compiling with CMake Examples are configured and compiled using the following commands: ```sh @@ -58,9 +73,10 @@ cmake --build build --config release --target cpp_vstreams_example ## Running the examples -Before running an example, download the HEFs using the [download script](../../scripts/download_hefs.sh): +Before running an example, download the HEFs using the [download script](../../scripts/download_hefs.sh) from the scripts directory: ```sh - ../../scripts/download_hefs.sh + cd ../../scripts + ./download_hefs.sh ``` To run an example, use (from this examples directory): diff --git a/hailort/libhailort/examples/c/CMakeLists.txt b/hailort/libhailort/examples/c/CMakeLists.txt index 46e0ce7..f006e35 100644 --- a/hailort/libhailort/examples/c/CMakeLists.txt +++ b/hailort/libhailort/examples/c/CMakeLists.txt @@ -11,8 +11,7 @@ add_subdirectory(multi_device_example) add_subdirectory(power_measurement_example) add_subdirectory(notification_callback_example) -add_custom_target(c_hailort_examples) -add_dependencies(c_hailort_examples +set(C_EXAMPLE_TARGETS c_data_quantization_example c_raw_streams_example c_vstreams_example @@ -22,4 +21,14 @@ add_dependencies(c_hailort_examples c_switch_network_groups_manually_example c_multi_device_example c_power_measurement_example - c_notification_callback_example) \ No newline at end of file + c_notification_callback_example +) + +if(NOT CMAKE_SYSTEM_NAME STREQUAL QNX) + # TODO: HRT-10956 support QNX async examples + add_subdirectory(raw_async_streams_single_thread_example) + set(C_EXAMPLE_TARGETS ${C_EXAMPLE_TARGETS} c_raw_async_streams_single_thread_example) +endif() + +add_custom_target(c_hailort_examples) +add_dependencies(c_hailort_examples ${C_EXAMPLE_TARGETS}) \ No newline at end of file diff --git a/hailort/libhailort/examples/c/common/common.h b/hailort/libhailort/examples/c/common/common.h index aafb298..76f973c 100644 --- a/hailort/libhailort/examples/c/common/common.h +++ b/hailort/libhailort/examples/c/common/common.h @@ -36,7 +36,14 @@ #define ARRAY_LENGTH(__array) (sizeof((__array)) / sizeof((__array)[0])) -#define NSEC_IN_SEC (1e+9) + +#if defined(__unix__) +#define hailo_sleep(seconds) sleep((seconds)) +#elif defined(_MSC_VER) +#define hailo_sleep(seconds) Sleep((seconds) * 1000) +#else /* defined(_MSC_VER) */ +#pragma error("sleep not supported") +#endif #endif /* _EXAMPLE_COMMON_H_ */ diff --git a/hailort/libhailort/examples/c/common/hailo_thread.h b/hailort/libhailort/examples/c/common/hailo_thread.h index 2c0d3be..9d2121a 100644 --- a/hailort/libhailort/examples/c/common/hailo_thread.h +++ b/hailort/libhailort/examples/c/common/hailo_thread.h @@ -12,11 +12,17 @@ #include "hailo/hailort.h" -#if defined(__unix__) || defined(__QNX__) +#if defined(__unix__) || defined(__QNX__) #include +#include +#include + typedef pthread_t hailo_thread; typedef void* thread_return_type; +typedef atomic_int hailo_atomic_int; + +#define MICROSECONDS_PER_MILLISECOND (1000) hailo_status hailo_create_thread(thread_return_type(*func_ptr)(void*), void* args, hailo_thread *thread_out) { @@ -34,11 +40,37 @@ hailo_status hailo_join_thread(hailo_thread *thread) return (hailo_status)results; } +void hailo_atomic_init(hailo_atomic_int *atomic, int value) +{ + atomic_init(atomic, value); +} + +int hailo_atomic_load(hailo_atomic_int *atomic) +{ + return atomic_load(atomic); +} + +int hailo_atomic_fetch_add(hailo_atomic_int *atomic, int value) +{ + return atomic_fetch_add(atomic, value); +} + +void hailo_atomic_increment(hailo_atomic_int *atomic) +{ + atomic_fetch_add(atomic, 1); +} + +void hailo_atomic_store(hailo_atomic_int *atomic, int value) +{ + atomic_store(atomic, value); +} + #elif defined _MSC_VER // __unix__ || __QNX__ #include typedef HANDLE hailo_thread; typedef DWORD thread_return_type; +typedef LONG hailo_atomic_int; hailo_status hailo_create_thread(thread_return_type(func_ptr)(void*), void* args, hailo_thread *thread_out) { @@ -61,6 +93,32 @@ hailo_status hailo_join_thread(hailo_thread *thread) return (hailo_status)result; } +void hailo_atomic_init(hailo_atomic_int *atomic, int value) +{ + InterlockedExchange(atomic, (LONG)value); +} + +int hailo_atomic_load(hailo_atomic_int *atomic) +{ + return InterlockedExchangeAdd(atomic, (LONG)0); +} + +int hailo_atomic_fetch_add(hailo_atomic_int *atomic, int value) +{ + return InterlockedExchangeAdd(atomic, (LONG)value); +} + +void hailo_atomic_increment(hailo_atomic_int *atomic) +{ + InterlockedIncrement(atomic); +} + +void hailo_atomic_store(hailo_atomic_int *atomic, int value) +{ + InterlockedExchange(atomic, value); +} + + #endif #endif /* _HAILO_THREAD_H_ */ diff --git a/hailort/libhailort/examples/c/data_quantization_example/CMakeLists.txt b/hailort/libhailort/examples/c/data_quantization_example/CMakeLists.txt index 4933fbd..0264495 100644 --- a/hailort/libhailort/examples/c/data_quantization_example/CMakeLists.txt +++ b/hailort/libhailort/examples/c/data_quantization_example/CMakeLists.txt @@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.0.0) find_package(Threads REQUIRED) set(THREADS_PREFER_PTHREAD_FLAG ON) -find_package(HailoRT 4.13.0 EXACT REQUIRED) +find_package(HailoRT 4.14.0 EXACT REQUIRED) SET_SOURCE_FILES_PROPERTIES(data_quantization_example.c PROPERTIES LANGUAGE C) diff --git a/hailort/libhailort/examples/c/data_quantization_example/data_quantization_example.c b/hailort/libhailort/examples/c/data_quantization_example/data_quantization_example.c index 7688d22..bbdf614 100644 --- a/hailort/libhailort/examples/c/data_quantization_example/data_quantization_example.c +++ b/hailort/libhailort/examples/c/data_quantization_example/data_quantization_example.c @@ -269,7 +269,7 @@ int main(int argc, char **argv) status = hailo_create_hef_file(&hef, HEF_FILE); REQUIRE_SUCCESS(status, l_release_vdevice, "Failed reading hef file"); - status = hailo_init_configure_params(hef, HAILO_STREAM_INTERFACE_PCIE, &config_params); + status = hailo_init_configure_params_by_vdevice(hef, vdevice, &config_params); REQUIRE_SUCCESS(status, l_release_hef, "Failed initializing configure parameters"); status = hailo_configure_vdevice(vdevice, hef, &config_params, &network_group, &network_group_size); @@ -295,5 +295,5 @@ l_release_hef: l_release_vdevice: (void) hailo_release_vdevice(vdevice); l_exit: - return status; + return (int)status; } diff --git a/hailort/libhailort/examples/c/infer_pipeline_example/CMakeLists.txt b/hailort/libhailort/examples/c/infer_pipeline_example/CMakeLists.txt index e7523d7..0a807a5 100644 --- a/hailort/libhailort/examples/c/infer_pipeline_example/CMakeLists.txt +++ b/hailort/libhailort/examples/c/infer_pipeline_example/CMakeLists.txt @@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.0.0) find_package(Threads REQUIRED) set(THREADS_PREFER_PTHREAD_FLAG ON) -find_package(HailoRT 4.13.0 EXACT REQUIRED) +find_package(HailoRT 4.14.0 EXACT REQUIRED) SET_SOURCE_FILES_PROPERTIES(infer_pipeline_example.c PROPERTIES LANGUAGE C) diff --git a/hailort/libhailort/examples/c/infer_pipeline_example/infer_pipeline_example.c b/hailort/libhailort/examples/c/infer_pipeline_example/infer_pipeline_example.c index 51478c8..0a682d2 100644 --- a/hailort/libhailort/examples/c/infer_pipeline_example/infer_pipeline_example.c +++ b/hailort/libhailort/examples/c/infer_pipeline_example/infer_pipeline_example.c @@ -115,7 +115,7 @@ int main(int argc, char **argv) status = hailo_create_hef_file(&hef, HEF_FILE); REQUIRE_SUCCESS(status, l_release_device, "Failed reading hef file"); - status = hailo_init_configure_params(hef, HAILO_STREAM_INTERFACE_ETH, &config_params); + status = hailo_init_configure_params_by_device(hef, device, &config_params); REQUIRE_SUCCESS(status, l_release_hef, "Failed initializing configure parameters"); status = hailo_configure_device(device, hef, &config_params, &network_group, &network_group_size); @@ -156,5 +156,5 @@ l_release_hef: l_release_device: (void) hailo_release_device(device); l_exit: - return status; + return (int)status; } diff --git a/hailort/libhailort/examples/c/multi_device_example/CMakeLists.txt b/hailort/libhailort/examples/c/multi_device_example/CMakeLists.txt index 9729466..0b501c9 100644 --- a/hailort/libhailort/examples/c/multi_device_example/CMakeLists.txt +++ b/hailort/libhailort/examples/c/multi_device_example/CMakeLists.txt @@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.0.0) find_package(Threads REQUIRED) set(THREADS_PREFER_PTHREAD_FLAG ON) -find_package(HailoRT 4.13.0 EXACT REQUIRED) +find_package(HailoRT 4.14.0 EXACT REQUIRED) SET_SOURCE_FILES_PROPERTIES(multi_device_example.c PROPERTIES LANGUAGE C) diff --git a/hailort/libhailort/examples/c/multi_device_example/multi_device_example.c b/hailort/libhailort/examples/c/multi_device_example/multi_device_example.c index 78cc1fc..a676779 100644 --- a/hailort/libhailort/examples/c/multi_device_example/multi_device_example.c +++ b/hailort/libhailort/examples/c/multi_device_example/multi_device_example.c @@ -16,6 +16,7 @@ #define INFER_FRAME_COUNT (100) #define MAX_EDGE_LAYERS (16) #define MAX_DEVICES (16) +#define BATCH_SIZE (1) #define HEF_FILE ("hefs/shortcut_net.hef") @@ -133,6 +134,7 @@ int main() hailo_vdevice_params_t params = {0}; hailo_hef hef = NULL; hailo_configure_params_t config_params = {0}; + uint16_t batch_size = BATCH_SIZE; hailo_configured_network_group network_group = NULL; size_t network_group_size = 1; hailo_input_vstream_params_by_name_t input_vstream_params[MAX_EDGE_LAYERS] = {0}; @@ -144,6 +146,7 @@ int main() status = hailo_scan_devices(NULL, device_ids, &actual_count); REQUIRE_SUCCESS(status, l_exit, "Failed to scan devices"); + printf("Found %zu devices\n", actual_count); status = hailo_init_vdevice_params(¶ms); REQUIRE_SUCCESS(status, l_exit, "Failed init vdevice_params"); @@ -155,9 +158,15 @@ int main() status = hailo_create_hef_file(&hef, HEF_FILE); REQUIRE_SUCCESS(status, l_release_vdevice, "Failed reading hef file"); - status = hailo_init_configure_params(hef, HAILO_STREAM_INTERFACE_PCIE, &config_params); + status = hailo_init_configure_params_by_vdevice(hef, vdevice, &config_params); REQUIRE_SUCCESS(status, l_release_hef, "Failed initializing configure parameters"); + // Modify batch_size and power_mode for each network group + for (size_t i = 0; i < config_params.network_group_params_count; i++) { + config_params.network_group_params[i].batch_size = batch_size; + config_params.network_group_params[i].power_mode = HAILO_POWER_MODE_ULTRA_PERFORMANCE; + } + status = hailo_configure_vdevice(vdevice, hef, &config_params, &network_group, &network_group_size); REQUIRE_SUCCESS(status, l_release_hef, "Failed configure vdevcie from hef"); REQUIRE_ACTION(network_group_size == 1, status = HAILO_INVALID_ARGUMENT, l_release_hef, @@ -196,5 +205,5 @@ l_release_hef: l_release_vdevice: (void) hailo_release_vdevice(vdevice); l_exit: - return status; + return (int)status; } diff --git a/hailort/libhailort/examples/c/multi_network_vstream_example/CMakeLists.txt b/hailort/libhailort/examples/c/multi_network_vstream_example/CMakeLists.txt index fd39710..0ca96b4 100644 --- a/hailort/libhailort/examples/c/multi_network_vstream_example/CMakeLists.txt +++ b/hailort/libhailort/examples/c/multi_network_vstream_example/CMakeLists.txt @@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.0.0) find_package(Threads REQUIRED) set(THREADS_PREFER_PTHREAD_FLAG ON) -find_package(HailoRT 4.13.0 EXACT REQUIRED) +find_package(HailoRT 4.14.0 EXACT REQUIRED) SET_SOURCE_FILES_PROPERTIES(multi_network_vstream_example.c PROPERTIES LANGUAGE C) diff --git a/hailort/libhailort/examples/c/multi_network_vstream_example/multi_network_vstream_example.c b/hailort/libhailort/examples/c/multi_network_vstream_example/multi_network_vstream_example.c index 552d07c..e9cdbe2 100644 --- a/hailort/libhailort/examples/c/multi_network_vstream_example/multi_network_vstream_example.c +++ b/hailort/libhailort/examples/c/multi_network_vstream_example/multi_network_vstream_example.c @@ -180,7 +180,7 @@ int main() status = hailo_create_hef_file(&hef, HEF_FILE); REQUIRE_SUCCESS(status, l_release_vdevice, "Failed reading hef file"); - status = hailo_init_configure_params(hef, HAILO_STREAM_INTERFACE_PCIE, &config_params); + status = hailo_init_configure_params_by_vdevice(hef, vdevice, &config_params); REQUIRE_SUCCESS(status, l_release_hef, "Failed initializing configure parameters"); // Modify batch_size for each network @@ -254,5 +254,5 @@ l_release_hef: l_release_vdevice: (void) hailo_release_vdevice(vdevice); l_exit: - return status; + return (int)status; } diff --git a/hailort/libhailort/examples/c/notification_callback_example/CMakeLists.txt b/hailort/libhailort/examples/c/notification_callback_example/CMakeLists.txt index 0081620..6345906 100644 --- a/hailort/libhailort/examples/c/notification_callback_example/CMakeLists.txt +++ b/hailort/libhailort/examples/c/notification_callback_example/CMakeLists.txt @@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.0.0) find_package(Threads REQUIRED) set(THREADS_PREFER_PTHREAD_FLAG ON) -find_package(HailoRT 4.13.0 EXACT REQUIRED) +find_package(HailoRT 4.14.0 EXACT REQUIRED) SET_SOURCE_FILES_PROPERTIES(notification_callback_example.c PROPERTIES LANGUAGE C) diff --git a/hailort/libhailort/examples/c/notification_callback_example/notification_callback_example.c b/hailort/libhailort/examples/c/notification_callback_example/notification_callback_example.c index 9c7d15c..b95f55a 100644 --- a/hailort/libhailort/examples/c/notification_callback_example/notification_callback_example.c +++ b/hailort/libhailort/examples/c/notification_callback_example/notification_callback_example.c @@ -73,5 +73,5 @@ int main() l_release_device: (void) hailo_release_device(device); l_exit: - return status; + return (int)status; } diff --git a/hailort/libhailort/examples/c/power_measurement_example/CMakeLists.txt b/hailort/libhailort/examples/c/power_measurement_example/CMakeLists.txt index 6545a54..d3921a3 100644 --- a/hailort/libhailort/examples/c/power_measurement_example/CMakeLists.txt +++ b/hailort/libhailort/examples/c/power_measurement_example/CMakeLists.txt @@ -1,6 +1,6 @@ cmake_minimum_required(VERSION 3.0.0) -find_package(HailoRT 4.13.0 EXACT REQUIRED) +find_package(HailoRT 4.14.0 EXACT REQUIRED) SET_SOURCE_FILES_PROPERTIES(power_measurement_example.c PROPERTIES LANGUAGE C) diff --git a/hailort/libhailort/examples/c/power_measurement_example/power_measurement_example.c b/hailort/libhailort/examples/c/power_measurement_example/power_measurement_example.c index b68687f..750cc73 100644 --- a/hailort/libhailort/examples/c/power_measurement_example/power_measurement_example.c +++ b/hailort/libhailort/examples/c/power_measurement_example/power_measurement_example.c @@ -134,5 +134,5 @@ int main(int argc, char **argv) l_release_vdevice: (void) hailo_release_vdevice(vdevice); l_exit: - return status; + return (int)status; } \ No newline at end of file diff --git a/hailort/libhailort/examples/c/raw_async_streams_single_thread_example/CMakeLists.txt b/hailort/libhailort/examples/c/raw_async_streams_single_thread_example/CMakeLists.txt new file mode 100644 index 0000000..2fe6c27 --- /dev/null +++ b/hailort/libhailort/examples/c/raw_async_streams_single_thread_example/CMakeLists.txt @@ -0,0 +1,20 @@ +cmake_minimum_required(VERSION 3.0.0) + +find_package(Threads REQUIRED) +set(THREADS_PREFER_PTHREAD_FLAG ON) + +find_package(HailoRT 4.14.0 EXACT REQUIRED) + +SET_SOURCE_FILES_PROPERTIES(raw_async_streams_single_thread_example.c PROPERTIES LANGUAGE C) + +add_executable(c_raw_async_streams_single_thread_example raw_async_streams_single_thread_example.c) +target_link_libraries(c_raw_async_streams_single_thread_example PRIVATE HailoRT::libhailort Threads::Threads) +target_include_directories(c_raw_async_streams_single_thread_example PRIVATE "${CMAKE_CURRENT_LIST_DIR}/../common") + +if(WIN32) + target_compile_options(c_raw_async_streams_single_thread_example PRIVATE + /DWIN32_LEAN_AND_MEAN + /DNOMINMAX # NOMINMAX is required in order to play nice with std::min/std::max (otherwise Windows.h defines it's own) + /wd4201 /wd4251 + ) +endif() \ No newline at end of file diff --git a/hailort/libhailort/examples/c/raw_async_streams_single_thread_example/raw_async_streams_single_thread_example.c b/hailort/libhailort/examples/c/raw_async_streams_single_thread_example/raw_async_streams_single_thread_example.c new file mode 100644 index 0000000..b5ce769 --- /dev/null +++ b/hailort/libhailort/examples/c/raw_async_streams_single_thread_example/raw_async_streams_single_thread_example.c @@ -0,0 +1,253 @@ +/** + * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved. + * Distributed under the MIT license (https://opensource.org/licenses/MIT) + **/ +/** + * @file raw_async_streams_single_thread_example.c + * This example demonstrates basic usage of HailoRT async streaming api with a single thread. + **/ + +#include "common.h" +#include "hailo/hailort.h" + +#include + +#if defined(__unix__) +#include +#endif + + +#define HEF_FILE ("hefs/shortcut_net.hef") +#define MAX_EDGE_LAYERS_PER_DIR (16) +#define MAX_EDGE_LAYERS (MAX_EDGE_LAYERS_PER_DIR * 2) +#define MAX_ONGOING_TRANSFERS (16) +#define INFER_TIME_SECONDS (5) + +#if defined(__unix__) +#define INVALID_ADDR (MAP_FAILED) +#define page_aligned_alloc(size) mmap(NULL, (size), PROT_WRITE | PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0) +#define page_aligned_free(addr, size) munmap((addr), (size)) +#elif defined(_MSC_VER) +#define INVALID_ADDR (NULL) +#define page_aligned_alloc(size) VirtualAlloc(NULL, (size), MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE) +#define page_aligned_free(addr, size) VirtualFree((addr), 0, MEM_RELEASE) +#else /* defined(_MSC_VER) */ +#pragma error("Aligned alloc not supported") +#endif + +#ifndef MIN +#define MIN(a, b) (((a) < (b)) ? (a) : (b)) +#endif + + +static void output_done_callback(const hailo_stream_read_async_completion_info_t *completion_info) +{ + hailo_output_stream stream = (hailo_output_stream)completion_info->opaque; + hailo_status status = HAILO_UNINITIALIZED; + + switch (completion_info->status) { + case HAILO_SUCCESS: + // Real applications can forward the buffer to post-process/display. Here we just re-launch new async reads. + status = hailo_stream_read_raw_buffer_async(stream, completion_info->buffer_addr, completion_info->buffer_size, + output_done_callback, stream); + if ((HAILO_SUCCESS != status) && (HAILO_STREAM_NOT_ACTIVATED != status)) { + fprintf(stderr, "Failed read async with status=%d\n", status); + } + break; + case HAILO_STREAM_ABORTED_BY_USER: + // Transfer was canceled, finish gracefully. + break; + default: + fprintf(stderr, "Got an unexpected status on callback. status=%d\n", completion_info->status); + } +} + +static void input_done_callback(const hailo_stream_write_async_completion_info_t *completion_info) +{ + hailo_input_stream stream = (hailo_input_stream)completion_info->opaque; + hailo_status status = HAILO_UNINITIALIZED; + + switch (completion_info->status) { + case HAILO_SUCCESS: + // Real applications may free the buffer and replace it with new buffer ready to be sent. Here we just re-launch + // new async writes. + status = hailo_stream_write_raw_buffer_async(stream, completion_info->buffer_addr, completion_info->buffer_size, + input_done_callback, stream); + if ((HAILO_SUCCESS != status) && (HAILO_STREAM_NOT_ACTIVATED != status)) { + fprintf(stderr, "Failed write async with status=%d\n", status); + } + break; + case HAILO_STREAM_ABORTED_BY_USER: + // Transfer was canceled, finish gracefully. + break; + default: + fprintf(stderr, "Got an unexpected status on callback. status=%d\n", completion_info->status); + } +} + +static hailo_status infer(hailo_configured_network_group network_group, size_t number_input_streams, + hailo_input_stream *input_streams, size_t number_output_streams, hailo_output_stream *output_streams, + size_t ongoing_transfers) +{ + hailo_status status = HAILO_UNINITIALIZED; + hailo_activated_network_group activated_network_group = NULL; + size_t i = 0; + size_t frame_index = 0; + size_t frame_size = 0; + size_t stream_index = 0; + void *current_buffer = NULL; + void *buffers[MAX_EDGE_LAYERS * MAX_ONGOING_TRANSFERS] = {0}; + size_t allocated_buffers = 0; + + status = hailo_activate_network_group(network_group, NULL, &activated_network_group); + REQUIRE_SUCCESS(status, l_exit, "Failed activate network group status=%d", status); + + // We launch "ongoing_transfers" async operations for both input and output streams. On each async callback, we launch + // some new operation with the same buffer. + for (stream_index = 0; stream_index < number_output_streams; stream_index++) { + frame_size = hailo_get_output_stream_frame_size(output_streams[stream_index]); + + // ongoing_transfers is less than or equal to the stream's max async queue size, so we can start parallel reads. + for (frame_index = 0; frame_index < ongoing_transfers; frame_index++) { + // Buffers read from async operation must be page aligned. + current_buffer = page_aligned_alloc(frame_size); + REQUIRE_ACTION(INVALID_ADDR != current_buffer, status=HAILO_OUT_OF_HOST_MEMORY, l_deactivate, "allocation failed"); + buffers[allocated_buffers++] = current_buffer; + + status = hailo_stream_read_raw_buffer_async(output_streams[stream_index], current_buffer, frame_size, + output_done_callback, output_streams[stream_index]); + REQUIRE_SUCCESS(status, l_deactivate, "Failed read async with status=%d", status); + } + } + + for (stream_index = 0; stream_index < number_input_streams; stream_index++) { + frame_size = hailo_get_input_stream_frame_size(input_streams[stream_index]); + + // ongoing_transfers is less than or equal to the stream's max async queue size, so we can start parallel writes. + for (frame_index = 0; frame_index < ongoing_transfers; frame_index++) { + // Buffers written to async operation must be page aligned. + current_buffer = page_aligned_alloc(frame_size); + REQUIRE_ACTION(INVALID_ADDR != current_buffer, status=HAILO_OUT_OF_HOST_MEMORY, l_deactivate, "allocation failed"); + buffers[allocated_buffers++] = current_buffer; + + status = hailo_stream_write_raw_buffer_async(input_streams[stream_index], current_buffer, frame_size, + input_done_callback, input_streams[stream_index]); + REQUIRE_SUCCESS(status, l_deactivate, "Failed write async with status=%d", status); + } + } + + // After all async operations are launched, the inference will continue until we deactivate the network. + hailo_sleep(INFER_TIME_SECONDS); + + status = HAILO_SUCCESS; +l_deactivate: + // Calling hailo_deactivate_network_group will make sure that all async operations are done. All pending async I/O + // operations will be canceled and their callbacks called with status=HAILO_STREAM_ABORTED_BY_USER. + (void) hailo_deactivate_network_group(activated_network_group); + + // There are no async I/O operations ongoing so it is safe to free the buffers now. + for (i = 0; i < allocated_buffers; i++) page_aligned_free(buffers[i], frame_size); + +l_exit: + return status; +} + +static hailo_status configure_device(hailo_device device, const char *hef_file, + hailo_configured_network_group *network_group) +{ + hailo_status status = HAILO_UNINITIALIZED; + hailo_hef hef = NULL; + hailo_configure_params_t configure_params = {0}; + size_t i = 0; + size_t network_group_size = 1; + + // Load HEF file. + status = hailo_create_hef_file(&hef, hef_file); + REQUIRE_SUCCESS(status, l_exit, "Failed reading hef file %s", hef_file); + + // Create configure params + status = hailo_init_configure_params_by_device(hef, device, &configure_params); + REQUIRE_SUCCESS(status, l_exit, "Failed init configure params"); + REQUIRE_ACTION(configure_params.network_group_params_count == 1, status=HAILO_INVALID_ARGUMENT, l_exit, + "Unexpected network group size"); + + // Set HAILO_STREAM_FLAGS_ASYNC for all streams in order to use async api. + for (i = 0; i < configure_params.network_group_params[0].stream_params_by_name_count; i++) { + configure_params.network_group_params[0].stream_params_by_name[i].stream_params.flags = HAILO_STREAM_FLAGS_ASYNC; + } + + status = hailo_configure_device(device, hef, &configure_params, network_group, &network_group_size); + REQUIRE_SUCCESS(status, l_release_hef, "Failed configuring device"); + + status = HAILO_SUCCESS; +l_release_hef: + (void) hailo_release_hef(hef); +l_exit: + return status; +} + +int main() +{ + hailo_status status = HAILO_UNINITIALIZED; + hailo_device device = NULL; + hailo_configured_network_group network_group = NULL; + hailo_stream_info_t input_streams_info[MAX_EDGE_LAYERS_PER_DIR] = {0}; + hailo_stream_info_t output_streams_info[MAX_EDGE_LAYERS_PER_DIR] = {0}; + hailo_input_stream input_streams[MAX_EDGE_LAYERS_PER_DIR] = {NULL}; + hailo_output_stream output_streams[MAX_EDGE_LAYERS_PER_DIR] = {NULL}; + size_t number_input_streams = 0; + size_t number_output_streams = 0; + size_t index = 0; + size_t queue_size = 0; + size_t ongoing_transfers = MAX_ONGOING_TRANSFERS; + + // Create device object. + status = hailo_create_device_by_id(NULL, &device); + REQUIRE_SUCCESS(status, l_exit, "Failed to create device"); + + // Configure device with HEF. + status = configure_device(device, HEF_FILE, &network_group); + REQUIRE_SUCCESS(status, l_release_device, "Failed configure_device"); + + // Get input/output stream objects. + status = hailo_network_group_get_input_stream_infos(network_group, input_streams_info, MAX_EDGE_LAYERS_PER_DIR, + &number_input_streams); + REQUIRE_SUCCESS(status, l_release_device, "Failed getting input streams infos"); + + status = hailo_network_group_get_output_stream_infos(network_group, output_streams_info, MAX_EDGE_LAYERS_PER_DIR, + &number_output_streams); + REQUIRE_SUCCESS(status, l_release_device, "Failed getting output streams infos"); + + for (index = 0; index < number_input_streams; index++) { + status = hailo_get_input_stream(network_group, input_streams_info[index].name, &input_streams[index]); + REQUIRE_SUCCESS(status, l_release_device, "Failed getting input stream %s", input_streams_info[index].name); + + status = hailo_input_stream_get_async_max_queue_size(input_streams[index], &queue_size); + REQUIRE_SUCCESS(status, l_release_device, "Failed getting queue size"); + + ongoing_transfers = MIN(queue_size, ongoing_transfers); + } + + for (index = 0; index < number_output_streams; index++) { + status = hailo_get_output_stream(network_group, output_streams_info[index].name, &output_streams[index]); + REQUIRE_SUCCESS(status, l_release_device, "Failed getting output stream %s", output_streams_info[index].name); + + status = hailo_output_stream_get_async_max_queue_size(output_streams[index], &queue_size); + REQUIRE_SUCCESS(status, l_release_device, "Failed getting queue size"); + + ongoing_transfers = MIN(queue_size, ongoing_transfers); + } + + // Run infer. + status = infer(network_group, number_input_streams, input_streams, number_output_streams, output_streams, + ongoing_transfers); + REQUIRE_SUCCESS(status, l_release_device, "Failed performing inference"); + + status = HAILO_SUCCESS; + printf("Inference ran successfully\n"); + +l_release_device: + (void) hailo_release_device(device); +l_exit: + return (int)status; +} diff --git a/hailort/libhailort/examples/c/raw_streams_example/CMakeLists.txt b/hailort/libhailort/examples/c/raw_streams_example/CMakeLists.txt index 8ed9ef7..b92b40c 100644 --- a/hailort/libhailort/examples/c/raw_streams_example/CMakeLists.txt +++ b/hailort/libhailort/examples/c/raw_streams_example/CMakeLists.txt @@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.0.0) find_package(Threads REQUIRED) set(THREADS_PREFER_PTHREAD_FLAG ON) -find_package(HailoRT 4.13.0 EXACT REQUIRED) +find_package(HailoRT 4.14.0 EXACT REQUIRED) SET_SOURCE_FILES_PROPERTIES(raw_streams_example.c PROPERTIES LANGUAGE C) diff --git a/hailort/libhailort/examples/c/raw_streams_example/raw_streams_example.c b/hailort/libhailort/examples/c/raw_streams_example/raw_streams_example.c index 04093fd..dfad815 100644 --- a/hailort/libhailort/examples/c/raw_streams_example/raw_streams_example.c +++ b/hailort/libhailort/examples/c/raw_streams_example/raw_streams_example.c @@ -198,7 +198,7 @@ int main() status = hailo_create_hef_file(&hef, HEF_FILE); REQUIRE_SUCCESS(status, l_release_device, "Failed creating hef file %s", HEF_FILE); - status = hailo_init_configure_params(hef, HAILO_STREAM_INTERFACE_PCIE, &configure_params); + status = hailo_init_configure_params_by_device(hef, device, &configure_params); REQUIRE_SUCCESS(status, l_release_hef, "Failed init configure params"); status = hailo_configure_device(device, hef, &configure_params, &network_group, &network_group_size); @@ -239,5 +239,5 @@ l_release_hef: l_release_device: (void) hailo_release_device(device); l_exit: - return status; + return (int)status; } diff --git a/hailort/libhailort/examples/c/switch_network_groups_example/CMakeLists.txt b/hailort/libhailort/examples/c/switch_network_groups_example/CMakeLists.txt index 7aee572..05ee65e 100644 --- a/hailort/libhailort/examples/c/switch_network_groups_example/CMakeLists.txt +++ b/hailort/libhailort/examples/c/switch_network_groups_example/CMakeLists.txt @@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.0.0) find_package(Threads REQUIRED) set(THREADS_PREFER_PTHREAD_FLAG ON) -find_package(HailoRT 4.13.0 EXACT REQUIRED) +find_package(HailoRT 4.14.0 EXACT REQUIRED) SET_SOURCE_FILES_PROPERTIES(switch_network_groups_example.c PROPERTIES LANGUAGE C) diff --git a/hailort/libhailort/examples/c/switch_network_groups_example/switch_network_groups_example.c b/hailort/libhailort/examples/c/switch_network_groups_example/switch_network_groups_example.c index df4e39b..2efcf9a 100644 --- a/hailort/libhailort/examples/c/switch_network_groups_example/switch_network_groups_example.c +++ b/hailort/libhailort/examples/c/switch_network_groups_example/switch_network_groups_example.c @@ -20,6 +20,8 @@ #define INFER_FRAME_COUNT (100) #define HEF_COUNT (2) #define DEVICE_COUNT (1) +#define BATCH_SIZE_1 (1) +#define BATCH_SIZE_2 (2) #define SCHEDULER_TIMEOUT_MS (100) #define SCHEDULER_THRESHOLD (3) @@ -136,7 +138,7 @@ hailo_status build_vstreams(hailo_configured_network_group network_group, for (size_t frame_index = 0; frame_index < input_frame_sizes[i]; frame_index++) { src_data[i][frame_index] = (uint8_t)(rand() % 256); } - } + } for (size_t i = 0; i < output_vstream_size; i++) { status = hailo_get_output_vstream_frame_size(output_vstreams[i], &output_frame_sizes[i]); @@ -190,6 +192,7 @@ int main() read_thread_args_t read_args[HEF_COUNT][MAX_EDGE_LAYERS]; char HEF_FILES[HEF_COUNT][MAX_HEF_PATH_LEN] = {"hefs/multi_network_shortcut_net.hef", "hefs/shortcut_net.hef"}; + uint16_t batch_sizes[HEF_COUNT] = {BATCH_SIZE_1, BATCH_SIZE_2}; status = hailo_init_vdevice_params(¶ms); REQUIRE_SUCCESS(status, l_exit, "Failed init vdevice_params"); @@ -203,21 +206,34 @@ int main() status = hailo_create_hef_file(&hef[hef_index], HEF_FILES[hef_index]); REQUIRE_SUCCESS(status, l_release_hef, "Failed creating hef file %s", HEF_FILES[hef_index]); - status = hailo_init_configure_params(hef[hef_index], HAILO_STREAM_INTERFACE_PCIE, &configure_params); + status = hailo_init_configure_params_by_vdevice(hef[hef_index], vdevice, &configure_params); REQUIRE_SUCCESS(status, l_release_hef, "Failed init configure params"); + // Modify batch_size for each network group + for (size_t i = 0; i < configure_params.network_group_params_count; i++) { + configure_params.network_group_params[i].batch_size = batch_sizes[hef_index]; + configure_params.network_group_params[i].power_mode = HAILO_POWER_MODE_ULTRA_PERFORMANCE; + } + status = hailo_configure_vdevice(vdevice, hef[hef_index], &configure_params, &network_groups[hef_index], &network_groups_size); REQUIRE_SUCCESS(status, l_release_hef, "Failed configuring vdevcie"); REQUIRE_ACTION(network_groups_size == 1, status = HAILO_INVALID_ARGUMENT, l_release_hef, "Unexpected network group size"); - // Set scheduler's timeout and threshold for the first network group, in order to give priority to the second network group if (0 == hef_index) { + // Set scheduler's timeout and threshold for the first network group, it will give priority to the second network group status = hailo_set_scheduler_timeout(network_groups[hef_index], SCHEDULER_TIMEOUT_MS, NULL); REQUIRE_SUCCESS(status, l_release_hef, "Failed setting scheduler timeout"); status = hailo_set_scheduler_threshold(network_groups[hef_index], SCHEDULER_THRESHOLD, NULL); REQUIRE_SUCCESS(status, l_release_hef, "Failed setting scheduler threshold"); + + // Setting higher priority to the first network-group directly. + // The practical meaning is that the first network will be ready to run only if ``SCHEDULER_THRESHOLD`` send requests have been accumulated, + // or more than ``SCHEDULER_TIMEOUT_MS`` time has passed and at least one send request has been accumulated. + // However when both the first and the second networks are ready to run, the first network will be preferred over the second network. + status = hailo_set_scheduler_priority(network_groups[hef_index], HAILO_SCHEDULER_PRIORITY_NORMAL+1, NULL); + REQUIRE_SUCCESS(status, l_release_hef, "Failed setting scheduler priority"); } status = build_vstreams(network_groups[hef_index], @@ -282,10 +298,10 @@ l_release_vstreams: l_release_hef: for (hef_index = 0; hef_index < HEF_COUNT; hef_index++) { if (NULL != hef[hef_index]) { - (void)hailo_release_hef(hef[hef_index]); + (void)hailo_release_hef(hef[hef_index]); } } (void)hailo_release_vdevice(vdevice); l_exit: - return status; + return (int)status; } \ No newline at end of file diff --git a/hailort/libhailort/examples/c/switch_network_groups_manually_example/CMakeLists.txt b/hailort/libhailort/examples/c/switch_network_groups_manually_example/CMakeLists.txt index e0768bc..7687b0a 100644 --- a/hailort/libhailort/examples/c/switch_network_groups_manually_example/CMakeLists.txt +++ b/hailort/libhailort/examples/c/switch_network_groups_manually_example/CMakeLists.txt @@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.0.0) find_package(Threads REQUIRED) set(THREADS_PREFER_PTHREAD_FLAG ON) -find_package(HailoRT 4.13.0 EXACT REQUIRED) +find_package(HailoRT 4.14.0 EXACT REQUIRED) SET_SOURCE_FILES_PROPERTIES(switch_network_groups_manually_example.c PROPERTIES LANGUAGE C) diff --git a/hailort/libhailort/examples/c/switch_network_groups_manually_example/switch_network_groups_manually_example.c b/hailort/libhailort/examples/c/switch_network_groups_manually_example/switch_network_groups_manually_example.c index f7872e0..07bc2c7 100644 --- a/hailort/libhailort/examples/c/switch_network_groups_manually_example/switch_network_groups_manually_example.c +++ b/hailort/libhailort/examples/c/switch_network_groups_manually_example/switch_network_groups_manually_example.c @@ -5,7 +5,7 @@ /** * @file switch_network_groups_manually_example.c * This example demonstrates basic usage of HailoRT streaming api over multiple network groups, using vstreams. - * It loads several HEF networks with a single input and a single output into a Hailo VDevice and performs a inference on each one. + * It loads several HEF networks with a single input and a single output into a Hailo VDevice and performs a inference on each one. * After inference is finished, the example switches to the next HEF and start inference again. **/ @@ -116,14 +116,14 @@ thread_return_type output_vstream_thread_func(void *args) status = hailo_vstream_read_raw_buffer(output_vstreams[hef_index], dst_data[hef_index], output_frame_size[hef_index]); REQUIRE_SUCCESS(status, l_deactivate_network_group, "Failed reading output frame from device"); - + // Process data here } - + // Deavticate network after finishing inference status = hailo_deactivate_network_group(*(output_vstream_args->activated_network_group)); REQUIRE_SUCCESS(status, l_deactivate_network_group, "Failed Deactivating network"); - + // Dont activate on last iteration if (hef_index < HEF_COUNT - 1) { // Activate next network so input thread can start sending again @@ -192,25 +192,25 @@ int main() status = hailo_create_hef_file(&hef[hef_index], HEF_FILES[hef_index]); REQUIRE_SUCCESS(status, l_release_hef, "Failed creating hef file %s", HEF_FILES[hef_index]); - status = hailo_init_configure_params(hef[hef_index], HAILO_STREAM_INTERFACE_PCIE, &configure_params); + status = hailo_init_configure_params_by_vdevice(hef[hef_index], vdevice, &configure_params); REQUIRE_SUCCESS(status, l_release_hef, "Failed init configure params"); status = hailo_configure_vdevice(vdevice, hef[hef_index], &configure_params, &network_groups[hef_index], &network_groups_size); REQUIRE_SUCCESS(status, l_release_hef, "Failed configuring vdevcie"); - REQUIRE_ACTION(network_groups_size == 1, status = HAILO_INVALID_ARGUMENT, l_release_hef, + REQUIRE_ACTION(network_groups_size == 1, status = HAILO_INVALID_ARGUMENT, l_release_hef, "Unexpected network group size"); // Mae sure each hef is single input single output status = hailo_make_input_vstream_params(network_groups[hef_index], true, HAILO_FORMAT_TYPE_AUTO, &input_vstream_params[hef_index], &input_vstream_size); REQUIRE_SUCCESS(status, l_release_hef, "Failed making input virtual stream params"); - REQUIRE_ACTION(input_vstream_size == 1, status = HAILO_INVALID_ARGUMENT, l_release_hef, + REQUIRE_ACTION(input_vstream_size == 1, status = HAILO_INVALID_ARGUMENT, l_release_hef, "INVALID HEF - Only hefs with single input vstream are allowed"); status = hailo_make_output_vstream_params(network_groups[hef_index], true, HAILO_FORMAT_TYPE_AUTO, &output_vstream_params[hef_index], &output_vstream_size); REQUIRE_SUCCESS(status, l_release_hef, "Failed making output virtual stream params"); - REQUIRE_ACTION(output_vstream_size == 1, status = HAILO_INVALID_ARGUMENT, l_release_hef, + REQUIRE_ACTION(output_vstream_size == 1, status = HAILO_INVALID_ARGUMENT, l_release_hef, "INVALID HEF - Only hefs with single output vstream are allowed"); } @@ -251,10 +251,10 @@ l_join_input_thread: l_release_hef: for (hef_index = 0; hef_index < HEF_COUNT; hef_index++) { if (NULL != hef[hef_index]) { - (void)hailo_release_hef(hef[hef_index]); + (void)hailo_release_hef(hef[hef_index]); } } (void)hailo_release_vdevice(vdevice); l_exit: - return status; + return (int)status; } diff --git a/hailort/libhailort/examples/c/vstreams_example/CMakeLists.txt b/hailort/libhailort/examples/c/vstreams_example/CMakeLists.txt index 1c32dfc..fb4af1b 100644 --- a/hailort/libhailort/examples/c/vstreams_example/CMakeLists.txt +++ b/hailort/libhailort/examples/c/vstreams_example/CMakeLists.txt @@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.0.0) find_package(Threads REQUIRED) set(THREADS_PREFER_PTHREAD_FLAG ON) -find_package(HailoRT 4.13.0 EXACT REQUIRED) +find_package(HailoRT 4.14.0 EXACT REQUIRED) SET_SOURCE_FILES_PROPERTIES(vstreams_example.c PROPERTIES LANGUAGE C) diff --git a/hailort/libhailort/examples/c/vstreams_example/vstreams_example.c b/hailort/libhailort/examples/c/vstreams_example/vstreams_example.c index 04c5076..6338492 100644 --- a/hailort/libhailort/examples/c/vstreams_example/vstreams_example.c +++ b/hailort/libhailort/examples/c/vstreams_example/vstreams_example.c @@ -137,6 +137,7 @@ int main() size_t output_vstreams_size = MAX_EDGE_LAYERS; hailo_input_vstream input_vstreams[MAX_EDGE_LAYERS] = {NULL}; hailo_output_vstream output_vstreams[MAX_EDGE_LAYERS] = {NULL}; + bool quantized = true; status = hailo_create_vdevice(NULL, &vdevice); REQUIRE_SUCCESS(status, l_exit, "Failed to create vdevice"); @@ -144,19 +145,31 @@ int main() status = hailo_create_hef_file(&hef, HEF_FILE); REQUIRE_SUCCESS(status, l_release_vdevice, "Failed reading hef file"); - status = hailo_init_configure_params(hef, HAILO_STREAM_INTERFACE_PCIE, &config_params); + status = hailo_init_configure_params_by_vdevice(hef, vdevice, &config_params); REQUIRE_SUCCESS(status, l_release_hef, "Failed initializing configure parameters"); status = hailo_configure_vdevice(vdevice, hef, &config_params, &network_group, &network_group_size); - REQUIRE_SUCCESS(status, l_release_hef, "Failed configure vdevcie from hef"); + REQUIRE_SUCCESS(status, l_release_hef, "Failed configure vdevice from hef"); REQUIRE_ACTION(network_group_size == 1, status = HAILO_INVALID_ARGUMENT, l_release_hef, "Invalid network group size"); - status = hailo_make_input_vstream_params(network_group, true, HAILO_FORMAT_TYPE_AUTO, + + // Set input format type to auto, and mark the data as quantized - libhailort will not scale the data before writing to the HW + quantized = true; + status = hailo_make_input_vstream_params(network_group, quantized, HAILO_FORMAT_TYPE_AUTO, input_vstream_params, &input_vstreams_size); REQUIRE_SUCCESS(status, l_release_hef, "Failed making input virtual stream params"); - status = hailo_make_output_vstream_params(network_group, true, HAILO_FORMAT_TYPE_AUTO, + /* The input format order in the example HEF is NHWC in the user-side (may be seen using 'hailortcli parse-hef ). + Here we override the user-side format order to be NCHW */ + for (size_t i = 0 ; i < input_vstreams_size; i++) { + input_vstream_params[i].params.user_buffer_format.order = HAILO_FORMAT_ORDER_NCHW; + } + + // Set output format type to float32, and mark the data as not quantized - libhailort will de-quantize the data after reading from the HW + // Note: this process might affect the overall performance + quantized = false; + status = hailo_make_output_vstream_params(network_group, quantized, HAILO_FORMAT_TYPE_FLOAT32, output_vstream_params, &output_vstreams_size); REQUIRE_SUCCESS(status, l_release_hef, "Failed making output virtual stream params"); @@ -186,5 +199,5 @@ l_release_hef: l_release_vdevice: (void) hailo_release_vdevice(vdevice); l_exit: - return status; + return (int)status; } diff --git a/hailort/libhailort/examples/cpp/CMakeLists.txt b/hailort/libhailort/examples/cpp/CMakeLists.txt index 10eea7a..ba966bf 100644 --- a/hailort/libhailort/examples/cpp/CMakeLists.txt +++ b/hailort/libhailort/examples/cpp/CMakeLists.txt @@ -3,7 +3,6 @@ cmake_minimum_required(VERSION 3.0.0) add_subdirectory(vstreams_example) add_subdirectory(infer_pipeline_example) add_subdirectory(raw_streams_example) -add_subdirectory(raw_async_streams_example) add_subdirectory(multi_network_vstream_example) add_subdirectory(switch_network_groups_example) add_subdirectory(switch_network_groups_manually_example) @@ -12,16 +11,28 @@ add_subdirectory(power_measurement_example) add_subdirectory(multi_process_example) add_subdirectory(notification_callback_example) -add_custom_target(cpp_hailort_examples) -add_dependencies(cpp_hailort_examples + +set(CPP_EXAMPLE_TARGETS cpp_vstreams_example cpp_infer_pipeline_example cpp_raw_streams_example - cpp_raw_async_streams_example cpp_multi_network_vstream_example cpp_switch_network_groups_example cpp_switch_network_groups_manually_example cpp_multi_device_example cpp_power_measurement_example cpp_multi_process_example - cpp_notification_callback_example) \ No newline at end of file + cpp_notification_callback_example +) + +if(NOT CMAKE_SYSTEM_NAME STREQUAL QNX) + # TODO: HRT-10956 support QNX async examples + add_subdirectory(raw_async_streams_multi_thread_example) + add_subdirectory(raw_async_streams_single_thread_example) + set(CPP_EXAMPLE_TARGETS ${C_EXAMPLE_TARGETS} + cpp_raw_async_streams_multi_thread_example + cpp_raw_async_streams_single_thread_example) +endif() + +add_custom_target(cpp_hailort_examples) +add_dependencies(cpp_hailort_examples ${CPP_EXAMPLE_TARGETS}) \ No newline at end of file diff --git a/hailort/libhailort/examples/cpp/infer_pipeline_example/CMakeLists.txt b/hailort/libhailort/examples/cpp/infer_pipeline_example/CMakeLists.txt index 9f13d63..8967d42 100644 --- a/hailort/libhailort/examples/cpp/infer_pipeline_example/CMakeLists.txt +++ b/hailort/libhailort/examples/cpp/infer_pipeline_example/CMakeLists.txt @@ -1,6 +1,6 @@ cmake_minimum_required(VERSION 3.0.0) -find_package(HailoRT 4.13.0 EXACT REQUIRED) +find_package(HailoRT 4.14.0 EXACT REQUIRED) add_executable(cpp_infer_pipeline_example infer_pipeline_example.cpp) target_link_libraries(cpp_infer_pipeline_example PRIVATE HailoRT::libhailort) diff --git a/hailort/libhailort/examples/cpp/multi_device_example/CMakeLists.txt b/hailort/libhailort/examples/cpp/multi_device_example/CMakeLists.txt index 99fd22e..d766e49 100644 --- a/hailort/libhailort/examples/cpp/multi_device_example/CMakeLists.txt +++ b/hailort/libhailort/examples/cpp/multi_device_example/CMakeLists.txt @@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.0.0) find_package(Threads REQUIRED) set(THREADS_PREFER_PTHREAD_FLAG ON) -find_package(HailoRT 4.13.0 EXACT REQUIRED) +find_package(HailoRT 4.14.0 EXACT REQUIRED) add_executable(cpp_multi_device_example multi_device_example.cpp) target_link_libraries(cpp_multi_device_example PRIVATE HailoRT::libhailort Threads::Threads) diff --git a/hailort/libhailort/examples/cpp/multi_device_example/multi_device_example.cpp b/hailort/libhailort/examples/cpp/multi_device_example/multi_device_example.cpp index 2d8b9c6..5bb6b47 100644 --- a/hailort/libhailort/examples/cpp/multi_device_example/multi_device_example.cpp +++ b/hailort/libhailort/examples/cpp/multi_device_example/multi_device_example.cpp @@ -14,6 +14,7 @@ #define HEF_FILE ("hefs/shortcut_net.hef") +constexpr size_t BATCH_SIZE = 1; constexpr size_t FRAMES_COUNT = 100; constexpr bool QUANTIZED = true; constexpr hailo_format_type_t FORMAT_TYPE = HAILO_FORMAT_TYPE_AUTO; @@ -21,20 +22,23 @@ constexpr size_t MAX_LAYER_EDGES = 16; using namespace hailort; -Expected> configure_network_group(VDevice &vdevice) +Expected> configure_network_group(VDevice &vdevice, Hef &hef, uint16_t batch_size) { - auto hef = Hef::create(HEF_FILE); - if (!hef) { - return make_unexpected(hef.status()); - } - - auto configure_params = vdevice.create_configure_params(hef.value()); + auto configure_params = vdevice.create_configure_params(hef); if (!configure_params) { + std::cerr << "Failed to create configure params" << std::endl; return make_unexpected(configure_params.status()); } - auto network_groups = vdevice.configure(hef.value(), configure_params.value()); + // Modify batch_size and power_mode for each network group + for (auto& network_group_params : configure_params.value()) { + network_group_params.second.batch_size = batch_size; + network_group_params.second.power_mode = HAILO_POWER_MODE_ULTRA_PERFORMANCE; + } + + auto network_groups = vdevice.configure(hef, configure_params.value()); if (!network_groups) { + std::cerr << "Failed to configure vdevice" << std::endl; return make_unexpected(network_groups.status()); } @@ -82,7 +86,6 @@ void read_all(OutputVStream &output, hailo_status &status) hailo_status infer(std::vector &input_streams, std::vector &output_streams) { - hailo_status status = HAILO_SUCCESS; // Success oriented hailo_status input_status[MAX_LAYER_EDGES] = {HAILO_UNINITIALIZED}; hailo_status output_status[MAX_LAYER_EDGES] = {HAILO_UNINITIALIZED}; @@ -128,11 +131,14 @@ hailo_status infer(std::vector &input_streams, std::vectorat(0)); + auto device = Device::create(); if (!device) { std::cerr << "Failed to create device " << device.status() << std::endl; return device.status(); diff --git a/hailort/libhailort/examples/cpp/power_measurement_example/CMakeLists.txt b/hailort/libhailort/examples/cpp/power_measurement_example/CMakeLists.txt index 0f412ee..17522af 100644 --- a/hailort/libhailort/examples/cpp/power_measurement_example/CMakeLists.txt +++ b/hailort/libhailort/examples/cpp/power_measurement_example/CMakeLists.txt @@ -1,6 +1,6 @@ cmake_minimum_required(VERSION 3.0.0) -find_package(HailoRT 4.13.0 EXACT REQUIRED) +find_package(HailoRT 4.14.0 EXACT REQUIRED) add_executable(cpp_power_measurement_example power_measurement_example.cpp) target_link_libraries(cpp_power_measurement_example PRIVATE HailoRT::libhailort) diff --git a/hailort/libhailort/examples/cpp/raw_async_streams_example/CMakeLists.txt b/hailort/libhailort/examples/cpp/raw_async_streams_example/CMakeLists.txt deleted file mode 100644 index b453e64..0000000 --- a/hailort/libhailort/examples/cpp/raw_async_streams_example/CMakeLists.txt +++ /dev/null @@ -1,19 +0,0 @@ -cmake_minimum_required(VERSION 3.0.0) - -find_package(Threads REQUIRED) -set(THREADS_PREFER_PTHREAD_FLAG ON) - -find_package(HailoRT 4.13.0 EXACT REQUIRED) - -add_executable(cpp_raw_async_streams_example buffer_pool.cpp raw_async_streams_example.cpp) -target_link_libraries(cpp_raw_async_streams_example PRIVATE HailoRT::libhailort Threads::Threads) - -if(WIN32) - target_compile_options(cpp_raw_async_streams_example PRIVATE - /DWIN32_LEAN_AND_MEAN - /DNOMINMAX # NOMINMAX is required in order to play nice with std::min/std::max (otherwise Windows.h defines it's own) - /wd4201 /wd4251 - ) -endif() - -set_target_properties(cpp_raw_async_streams_example PROPERTIES CXX_STANDARD 14) \ No newline at end of file diff --git a/hailort/libhailort/examples/cpp/raw_async_streams_example/buffer_pool.cpp b/hailort/libhailort/examples/cpp/raw_async_streams_example/buffer_pool.cpp deleted file mode 100644 index 706fbe8..0000000 --- a/hailort/libhailort/examples/cpp/raw_async_streams_example/buffer_pool.cpp +++ /dev/null @@ -1,76 +0,0 @@ -/** - * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved. - * Distributed under the MIT license (https://opensource.org/licenses/MIT) -**/ -/** - * @file buffer_pool.cpp - * @brief Implementation of vdma buffer pool - **/ - -#include "buffer_pool.hpp" -#include "hailo/hailort.h" -#include "hailo/expected.hpp" - -Expected BufferPool::create(size_t num_buffers, size_t buffer_size, - hailo_vdma_buffer_direction_flags_t data_direction_flags, Device &device) -{ - std::queue> queue; - for (auto i = 0; i < num_buffers; i++) { - auto mapped_buffer = DmaMappedBuffer::create(buffer_size, data_direction_flags, device); - if (!mapped_buffer) { - return make_unexpected(mapped_buffer.status()); - } - - auto mapped_buffer_ptr = std::make_shared(mapped_buffer.release()); - if (nullptr == mapped_buffer_ptr) { - return make_unexpected(HAILO_OUT_OF_HOST_MEMORY); - } - - queue.push(mapped_buffer_ptr); - } - - auto result = std::make_shared(num_buffers, std::move(queue)); - if (nullptr == result) { - return make_unexpected(HAILO_OUT_OF_HOST_MEMORY); - } - - return result; -} - -BufferPool::BufferPool(size_t max_size, std::queue> &&queue) : - m_max_size(max_size), - m_mutex(), - m_cv(), - m_queue(queue) -{} - -BufferPool::~BufferPool() -{ - m_cv.notify_all(); -} - -std::shared_ptr BufferPool::dequeue() -{ - std::unique_lock lock(m_mutex); - m_cv.wait(lock, [this] { return m_queue.size() > 0; }); - auto buffer = m_queue.front(); - m_queue.pop(); - - return buffer; -} -void BufferPool::enqueue(std::shared_ptr buffer) -{ - { - std::unique_lock lock(m_mutex); - m_cv.wait(lock, [this] { return m_max_size > m_queue.size(); }); - m_queue.push(buffer); - } - - m_cv.notify_one(); -} - -void BufferPool::wait_for_pending_buffers() -{ - std::unique_lock lock(m_mutex); - m_cv.wait(lock, [this] { return m_max_size == m_queue.size(); }); -} diff --git a/hailort/libhailort/examples/cpp/raw_async_streams_example/buffer_pool.hpp b/hailort/libhailort/examples/cpp/raw_async_streams_example/buffer_pool.hpp deleted file mode 100644 index 4ff5f63..0000000 --- a/hailort/libhailort/examples/cpp/raw_async_streams_example/buffer_pool.hpp +++ /dev/null @@ -1,50 +0,0 @@ -/** - * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved. - * Distributed under the MIT license (https://opensource.org/licenses/MIT) -**/ -/** - * @file buffer_pool.hpp - * @brief Pool of vdma mapped buffers, allowing FIFO queue access to buffers - **/ - -#ifndef _HAILO_BUFFER_POOL_HPP_ -#define _HAILO_BUFFER_POOL_HPP_ - -#include "hailo/hailort.hpp" -#include "hailo/expected.hpp" - -#include -#include -#include -#include - - -using namespace hailort; - -class BufferPool; -using BufferPoolPtr = std::shared_ptr; - -class BufferPool final -{ -public: - static Expected create(size_t num_buffers, size_t buffer_size, - hailo_vdma_buffer_direction_flags_t data_direction_flags, Device &device); - BufferPool(size_t max_size, std::queue> &&queue); - BufferPool(BufferPool &&) = delete; - BufferPool(const BufferPool &) = delete; - BufferPool &operator=(BufferPool &&) = delete; - BufferPool &operator=(const BufferPool &) = delete; - ~BufferPool(); - - std::shared_ptr dequeue(); - void enqueue(std::shared_ptr buffer); - void wait_for_pending_buffers(); - -private: - const size_t m_max_size; - std::mutex m_mutex; - std::condition_variable m_cv; - std::queue> m_queue; -}; - -#endif /* _HAILO_BUFFER_POOL_HPP_ */ diff --git a/hailort/libhailort/examples/cpp/raw_async_streams_example/raw_async_streams_example.cpp b/hailort/libhailort/examples/cpp/raw_async_streams_example/raw_async_streams_example.cpp deleted file mode 100644 index fa0f1b4..0000000 --- a/hailort/libhailort/examples/cpp/raw_async_streams_example/raw_async_streams_example.cpp +++ /dev/null @@ -1,146 +0,0 @@ -/** - * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved. - * Distributed under the MIT license (https://opensource.org/licenses/MIT) - **/ -/** - * @file raw_async_streams_example - * This example demonstrates using low level async streams over c++ - **/ - -#include "hailo/hailort.hpp" -#include "buffer_pool.hpp" - -#include -#include - - -constexpr size_t FRAMES_COUNT = 10000; -constexpr size_t BUFFER_POOL_SIZE = 10; -constexpr auto TIMEOUT = std::chrono::milliseconds(1000); - -using namespace hailort; - -Expected> configure_network_group(Device &device, const std::string &hef_path) -{ - auto hef = Hef::create(hef_path); - if (!hef) { - return make_unexpected(hef.status()); - } - - auto configure_params = device.create_configure_params(hef.value()); - if (!configure_params) { - return make_unexpected(configure_params.status()); - } - - // change stream_params here - for (auto &ng_name_params_pair : *configure_params) { - for (auto &stream_params_name_pair : ng_name_params_pair.second.stream_params_by_name) { - stream_params_name_pair.second.flags = HAILO_STREAM_FLAGS_ASYNC; - } - } - - auto network_groups = device.configure(hef.value(), configure_params.value()); - if (!network_groups) { - return make_unexpected(network_groups.status()); - } - - if (1 != network_groups->size()) { - std::cerr << "Invalid amount of network groups" << std::endl; - return make_unexpected(HAILO_INTERNAL_FAILURE); - } - - return std::move(network_groups->at(0)); -} - -void read_all(OutputStream &output, BufferPoolPtr buffer_pool, size_t frames_to_read, hailo_status &status) -{ - for (size_t i = 0; i < frames_to_read; i++) { - status = output.wait_for_ready(output.get_frame_size(), TIMEOUT); - if (HAILO_SUCCESS != status) { - return; - } - status = output.read_async(buffer_pool->dequeue(), - [buffer_pool](std::shared_ptr buffer, const hailo_async_transfer_completion_info_t &, void *) { - buffer_pool->enqueue(buffer); - }); - if (HAILO_SUCCESS != status) { - return; - } - } -} - -void write_all(InputStream &input, BufferPoolPtr buffer_pool, size_t frames_to_write, hailo_status &status) -{ - for (size_t i = 0; i < frames_to_write; i++) { - status = input.wait_for_ready(input.get_frame_size(), TIMEOUT); - if (HAILO_SUCCESS != status) { - return; - } - status = input.write_async(buffer_pool->dequeue(), - [buffer_pool](std::shared_ptr buffer, const hailo_async_transfer_completion_info_t &, void *) { - buffer_pool->enqueue(buffer); - }); - if (HAILO_SUCCESS != status) { - return; - } - } -} - -int main() -{ - auto device = Device::create(); - if (!device) { - std::cerr << "Failed create device " << device.status() << std::endl; - return device.status(); - } - - static const auto HEF_FILE = "hefs/shortcut_net.hef"; - auto network_group = configure_network_group(*device.value(), HEF_FILE); - if (!network_group) { - std::cerr << "Failed to configure network group" << HEF_FILE << std::endl; - return network_group.status(); - } - - auto activated_network_group = network_group.value()->activate(); - if (!activated_network_group) { - std::cerr << "Failed to activate network group " << activated_network_group.status() << std::endl; - return activated_network_group.status(); - } - - // Assume one input and output - auto output = network_group->get()->get_output_streams()[0]; - auto input = network_group->get()->get_input_streams()[0]; - - auto output_buffer_pool = BufferPool::create(BUFFER_POOL_SIZE, output.get().get_frame_size(), HAILO_VDMA_BUFFER_DIRECTION_FLAGS_D2H, *device.value()); - if (!output_buffer_pool) { - std::cerr << "Failed to create output buffer pool" << std::endl; - return output_buffer_pool.status(); - } - hailo_status output_status = HAILO_UNINITIALIZED; - auto output_thread = std::make_unique(read_all, output, output_buffer_pool.value(), FRAMES_COUNT, std::ref(output_status)); - - auto input_buffer_pool = BufferPool::create(BUFFER_POOL_SIZE, input.get().get_frame_size(), HAILO_VDMA_BUFFER_DIRECTION_FLAGS_H2D, *device.value()); - if (!input_buffer_pool) { - std::cerr << "Failed to create input buffer pool" << std::endl; - return input_buffer_pool.status(); - } - hailo_status input_status = HAILO_UNINITIALIZED; - auto input_thread = std::make_unique(write_all, input, input_buffer_pool.value(), FRAMES_COUNT, std::ref(input_status)); - - // Join threads - input_thread->join(); - output_thread->join(); - if (HAILO_SUCCESS != input_status) { - return input_status; - } - if (HAILO_SUCCESS != output_status) { - return output_status; - } - - // The read/write threads have completed but the transfers issued by them haven't necessarily completed. - // We'll wait for the output buffer queue to fill back up, since the callback we registered enqueues buffers - // back to the pool + we issued the same number of reads as writes - output_buffer_pool.value()->wait_for_pending_buffers(); - - return HAILO_SUCCESS; -} diff --git a/hailort/libhailort/examples/cpp/raw_async_streams_multi_thread_example/CMakeLists.txt b/hailort/libhailort/examples/cpp/raw_async_streams_multi_thread_example/CMakeLists.txt new file mode 100644 index 0000000..d89940f --- /dev/null +++ b/hailort/libhailort/examples/cpp/raw_async_streams_multi_thread_example/CMakeLists.txt @@ -0,0 +1,19 @@ +cmake_minimum_required(VERSION 3.0.0) + +find_package(Threads REQUIRED) +set(THREADS_PREFER_PTHREAD_FLAG ON) + +find_package(HailoRT 4.14.0 EXACT REQUIRED) + +add_executable(cpp_raw_async_streams_multi_thread_example raw_async_streams_multi_thread_example.cpp) +target_link_libraries(cpp_raw_async_streams_multi_thread_example PRIVATE HailoRT::libhailort Threads::Threads) + +if(WIN32) + target_compile_options(cpp_raw_async_streams_multi_thread_example PRIVATE + /DWIN32_LEAN_AND_MEAN + /DNOMINMAX # NOMINMAX is required in order to play nice with std::min/std::max (otherwise Windows.h defines it's own) + /wd4201 /wd4251 + ) +endif() + +set_target_properties(cpp_raw_async_streams_multi_thread_example PROPERTIES CXX_STANDARD 14) \ No newline at end of file diff --git a/hailort/libhailort/examples/cpp/raw_async_streams_multi_thread_example/raw_async_streams_multi_thread_example.cpp b/hailort/libhailort/examples/cpp/raw_async_streams_multi_thread_example/raw_async_streams_multi_thread_example.cpp new file mode 100644 index 0000000..c423a3a --- /dev/null +++ b/hailort/libhailort/examples/cpp/raw_async_streams_multi_thread_example/raw_async_streams_multi_thread_example.cpp @@ -0,0 +1,164 @@ +/** + * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved. + * Distributed under the MIT license (https://opensource.org/licenses/MIT) + **/ +/** + * @file raw_async_streams_multi_thread_example + * This example demonstrates using low level async streams over c++ + **/ + +#include "hailo/hailort.hpp" + +#include +#include + +#if defined(__unix__) +#include +#endif + +constexpr auto TIMEOUT = std::chrono::milliseconds(1000); + +using namespace hailort; + +using AlignedBuffer = std::shared_ptr; +static AlignedBuffer page_aligned_alloc(size_t size) +{ +#if defined(__unix__) + auto addr = mmap(NULL, size, PROT_WRITE | PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + if (MAP_FAILED == addr) throw std::bad_alloc(); + return AlignedBuffer(reinterpret_cast(addr), [size](void *addr) { munmap(addr, size); }); +#elif defined(_MSC_VER) + auto addr = VirtualAlloc(NULL, size, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE); + if (!addr) throw std::bad_alloc(); + return AlignedBuffer(reinterpret_cast(addr), [](void *addr){ VirtualFree(addr, 0, MEM_RELEASE); }); +#else +#pragma error("Aligned alloc not supported") +#endif +} + +Expected> configure_network_group(Device &device, const std::string &hef_path) +{ + auto hef = Hef::create(hef_path); + if (!hef) { + return make_unexpected(hef.status()); + } + + auto configure_params = device.create_configure_params(hef.value()); + if (!configure_params) { + return make_unexpected(configure_params.status()); + } + + // change stream_params here + for (auto &ng_name_params_pair : *configure_params) { + for (auto &stream_params_name_pair : ng_name_params_pair.second.stream_params_by_name) { + stream_params_name_pair.second.flags = HAILO_STREAM_FLAGS_ASYNC; + } + } + + auto network_groups = device.configure(hef.value(), configure_params.value()); + if (!network_groups) { + return make_unexpected(network_groups.status()); + } + + if (1 != network_groups->size()) { + std::cerr << "Invalid amount of network groups" << std::endl; + return make_unexpected(HAILO_INTERNAL_FAILURE); + } + + return std::move(network_groups->at(0)); +} + +static void output_async_callback(const OutputStream::CompletionInfo &completion_info) +{ + // Real applications can free the buffer or forward it to post-process/display. + if ((HAILO_SUCCESS != completion_info.status) && (HAILO_STREAM_ABORTED_BY_USER != completion_info.status)) { + // We will get HAILO_STREAM_ABORTED_BY_USER when activated_network_group is destructed. + std::cerr << "Got an unexpected status on callback. status=" << completion_info.status << std::endl; + } +} + +static void input_async_callback(const InputStream::CompletionInfo &completion_info) +{ + // Real applications can free the buffer or reuse it for next transfer. + if ((HAILO_SUCCESS != completion_info.status) && (HAILO_STREAM_ABORTED_BY_USER != completion_info.status)) { + // We will get HAILO_STREAM_ABORTED_BY_USER when activated_network_group is destructed. + std::cerr << "Got an unexpected status on callback. status=" << completion_info.status << std::endl; + } +} + +int main() +{ + auto device = Device::create(); + if (!device) { + std::cerr << "Failed create device " << device.status() << std::endl; + return EXIT_FAILURE; + } + + static const auto HEF_FILE = "hefs/shortcut_net.hef"; + auto network_group = configure_network_group(*device.value(), HEF_FILE); + if (!network_group) { + std::cerr << "Failed to configure network group " << HEF_FILE << std::endl; + return EXIT_FAILURE; + } + + // Assume one input and output + auto &output = network_group->get()->get_output_streams()[0].get(); + auto &input = network_group->get()->get_input_streams()[0].get(); + + // Allocate buffers. The buffers sent to the async API must be page aligned. + // For simplicity, in this example, we pass one buffer for each stream (It may be problematic in output since the + // buffer will be overridden on each read). + // Note - the buffers are allocated before we activate the network group. This will ensure that they won't be freed + // until the network group will become inactive. + auto output_buffer = page_aligned_alloc(output.get_frame_size()); + auto input_buffer = page_aligned_alloc(input.get_frame_size()); + + // The destructor of activated_network_group will make sure that all async operations are done. All pending + // operations will be canceled and their callbacks will be called with status=HAILO_STREAM_ABORTED_BY_USER. + // Be sure to capture variables in the callbacks that will be destructed after the activated_network_group. + // Otherwise, the lambda would have access an uninitialized data. + auto activated_network_group = network_group.value()->activate(); + if (!activated_network_group) { + std::cerr << "Failed to activate network group " << activated_network_group.status() << std::endl; + return EXIT_FAILURE; + } + + std::atomic output_status(HAILO_UNINITIALIZED); + std::thread output_thread([&]() { + while (true) { + output_status = output.wait_for_async_ready(output.get_frame_size(), TIMEOUT); + if (HAILO_SUCCESS != output_status) { return; } + + output_status = output.read_async(output_buffer.get(), output.get_frame_size(), output_async_callback); + if (HAILO_SUCCESS != output_status) { return; } + } + }); + + std::atomic input_status(HAILO_UNINITIALIZED); + std::thread input_thread([&]() { + while (true) { + input_status = input.wait_for_async_ready(input.get_frame_size(), TIMEOUT); + if (HAILO_SUCCESS != input_status) { return; } + + input_status = input.write_async(input_buffer.get(), input.get_frame_size(), input_async_callback); + if (HAILO_SUCCESS != input_status) { return; } + } + }); + + // After all async operations are launched, the inference is running. + std::this_thread::sleep_for(std::chrono::seconds(5)); + + // Make it stop. We explicitly destruct activated_network_group to stop all async I/O. + activated_network_group->reset(); + + // Thread should be stopped with HAILO_STREAM_NOT_ACTIVATED status. + output_thread.join(); + input_thread.join(); + if ((HAILO_STREAM_NOT_ACTIVATED != output_status) || (HAILO_STREAM_NOT_ACTIVATED != input_status)) { + std::cerr << "Got unexpected statues from thread: " << output_status << ", " << input_status << std::endl; + return EXIT_FAILURE; + } + + std::cout << "Inference finished successfully" << std::endl; + return EXIT_SUCCESS; +} \ No newline at end of file diff --git a/hailort/libhailort/examples/cpp/raw_async_streams_single_thread_example/CMakeLists.txt b/hailort/libhailort/examples/cpp/raw_async_streams_single_thread_example/CMakeLists.txt new file mode 100644 index 0000000..0c24087 --- /dev/null +++ b/hailort/libhailort/examples/cpp/raw_async_streams_single_thread_example/CMakeLists.txt @@ -0,0 +1,19 @@ +cmake_minimum_required(VERSION 3.0.0) + +find_package(Threads REQUIRED) +set(THREADS_PREFER_PTHREAD_FLAG ON) + +find_package(HailoRT 4.14.0 EXACT REQUIRED) + +add_executable(cpp_raw_async_streams_single_thread_example raw_async_streams_single_thread_example.cpp) +target_link_libraries(cpp_raw_async_streams_single_thread_example PRIVATE HailoRT::libhailort Threads::Threads) + +if(WIN32) + target_compile_options(cpp_raw_async_streams_single_thread_example PRIVATE + /DWIN32_LEAN_AND_MEAN + /DNOMINMAX # NOMINMAX is required in order to play nice with std::min/std::max (otherwise Windows.h defines it's own) + /wd4201 /wd4251 + ) +endif() + +set_target_properties(cpp_raw_async_streams_single_thread_example PROPERTIES CXX_STANDARD 14) \ No newline at end of file diff --git a/hailort/libhailort/examples/cpp/raw_async_streams_single_thread_example/raw_async_streams_single_thread_example.cpp b/hailort/libhailort/examples/cpp/raw_async_streams_single_thread_example/raw_async_streams_single_thread_example.cpp new file mode 100644 index 0000000..219b2da --- /dev/null +++ b/hailort/libhailort/examples/cpp/raw_async_streams_single_thread_example/raw_async_streams_single_thread_example.cpp @@ -0,0 +1,193 @@ +/** + * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved. + * Distributed under the MIT license (https://opensource.org/licenses/MIT) + **/ +/** + * @file raw_async_streams_single_thread_example + * This example demonstrates using low level async streams using single thread over c++. + **/ + +#include "hailo/hailort.hpp" + +#include +#include +#include +#include + +#if defined(__unix__) +#include +#endif + +using namespace hailort; + +using AlignedBuffer = std::shared_ptr; +static AlignedBuffer page_aligned_alloc(size_t size) +{ +#if defined(__unix__) + auto addr = mmap(NULL, size, PROT_WRITE | PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + if (MAP_FAILED == addr) throw std::bad_alloc(); + return AlignedBuffer(reinterpret_cast(addr), [size](void *addr) { munmap(addr, size); }); +#elif defined(_MSC_VER) + auto addr = VirtualAlloc(NULL, size, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE); + if (!addr) throw std::bad_alloc(); + return AlignedBuffer(reinterpret_cast(addr), [](void *addr){ VirtualFree(addr, 0, MEM_RELEASE); }); +#else +#pragma error("Aligned alloc not supported") +#endif +} + +static hailo_status infer(ConfiguredNetworkGroup &network_group, InputStream &input, OutputStream &output) +{ + auto input_queue_size = input.get_async_max_queue_size(); + auto output_queue_size = output.get_async_max_queue_size(); + if (!input_queue_size || !output_queue_size) { + std::cerr << "Failed getting async queue size" << std::endl; + return HAILO_INTERNAL_FAILURE; + } + + // We store buffers vector here as a guard for the memory. The buffer will be freed only after + // activated_network_group will be released. + std::vector buffer_guards; + + OutputStream::TransferDoneCallback read_done = [&output, &read_done](const OutputStream::CompletionInfo &completion_info) { + hailo_status status = HAILO_UNINITIALIZED; + switch (completion_info.status) { + case HAILO_SUCCESS: + // Real applications can forward the buffer to post-process/display. Here we just re-launch new async read. + status = output.read_async(completion_info.buffer_addr, completion_info.buffer_size, read_done); + if ((HAILO_SUCCESS != status) && (HAILO_STREAM_NOT_ACTIVATED != status)) { + std::cerr << "Failed read async with status=" << status << std::endl; + } + break; + case HAILO_STREAM_ABORTED_BY_USER: + // Transfer was canceled, finish gracefully. + break; + default: + std::cerr << "Got an unexpected status on callback. status=" << completion_info.status << std::endl; + } + }; + + InputStream::TransferDoneCallback write_done = [&input, &write_done](const InputStream::CompletionInfo &completion_info) { + hailo_status status = HAILO_UNINITIALIZED; + switch (completion_info.status) { + case HAILO_SUCCESS: + // Real applications may free the buffer and replace it with new buffer ready to be sent. Here we just + // re-launch new async write. + status = input.write_async(completion_info.buffer_addr, completion_info.buffer_size, write_done); + if ((HAILO_SUCCESS != status) && (HAILO_STREAM_NOT_ACTIVATED != status)) { + std::cerr << "Failed read async with status=" << status << std::endl; + } + break; + case HAILO_STREAM_ABORTED_BY_USER: + // Transfer was canceled, finish gracefully. + break; + default: + std::cerr << "Got an unexpected status on callback. status=" << completion_info.status << std::endl; + } + }; + + // The destructor of activated_network_group will make sure that all async operations are done. All pending + // operations will be canceled and their callbacks will be called with status=HAILO_STREAM_ABORTED_BY_USER. + // Be sure to capture variables in the callbacks that will be destructed after the activated_network_group. + // Otherwise, the lambda would have access an uninitialized data. + auto activated_network_group = network_group.activate(); + if (!activated_network_group) { + std::cerr << "Failed to activate network group " << activated_network_group.status() << std::endl; + return activated_network_group.status(); + } + + // We launch "*output_queue_size" async read operation. On each async callback, we launch a new async read operation. + for (size_t i = 0; i < *output_queue_size; i++) { + // Buffers read from async operation must be page aligned. + auto buffer = page_aligned_alloc(output.get_frame_size()); + auto status = output.read_async(buffer.get(), output.get_frame_size(), read_done); + if (HAILO_SUCCESS != status) { + std::cerr << "read_async failed with status=" << status << std::endl; + return status; + } + + buffer_guards.emplace_back(buffer); + } + + // We launch "*input_queue_size" async write operation. On each async callback, we launch a new async write operation. + for (size_t i = 0; i < *input_queue_size; i++) { + // Buffers written to async operation must be page aligned. + auto buffer = page_aligned_alloc(input.get_frame_size()); + auto status = input.write_async(buffer.get(), input.get_frame_size(), write_done); + if (HAILO_SUCCESS != status) { + std::cerr << "write_async failed with status=" << status << std::endl; + return status; + } + + buffer_guards.emplace_back(buffer); + } + + // After all async operations are launched, the inference will continue until the activated_network_group + // destructor is called. + std::this_thread::sleep_for(std::chrono::seconds(5)); + + return HAILO_SUCCESS; +} + + +static Expected> configure_network_group(Device &device, const std::string &hef_path) +{ + auto hef = Hef::create(hef_path); + if (!hef) { + return make_unexpected(hef.status()); + } + + auto configure_params = device.create_configure_params(hef.value()); + if (!configure_params) { + return make_unexpected(configure_params.status()); + } + + // change stream_params to operate in async mode + for (auto &ng_name_params_pair : *configure_params) { + for (auto &stream_params_name_pair : ng_name_params_pair.second.stream_params_by_name) { + stream_params_name_pair.second.flags = HAILO_STREAM_FLAGS_ASYNC; + } + } + + auto network_groups = device.configure(hef.value(), configure_params.value()); + if (!network_groups) { + return make_unexpected(network_groups.status()); + } + + if (1 != network_groups->size()) { + std::cerr << "Invalid amount of network groups" << std::endl; + return make_unexpected(HAILO_INTERNAL_FAILURE); + } + + return std::move(network_groups->at(0)); +} + +int main() +{ + auto device = Device::create(); + if (!device) { + std::cerr << "Failed to create device " << device.status() << std::endl; + return device.status(); + } + + static const auto HEF_FILE = "hefs/shortcut_net.hef"; + auto network_group = configure_network_group(*device.value(), HEF_FILE); + if (!network_group) { + std::cerr << "Failed to configure network group" << HEF_FILE << std::endl; + return network_group.status(); + } + + // Assume one input and output + auto output = network_group->get()->get_output_streams()[0]; + auto input = network_group->get()->get_input_streams()[0]; + + // Now start the inference + auto status = infer(*network_group.value(), input.get(), output.get()); + if (HAILO_SUCCESS != status) { + std::cerr << "Inference failed with " << status << std::endl; + return status; + } + + std::cout << "Inference finished successfully" << std::endl; + return HAILO_SUCCESS; +} diff --git a/hailort/libhailort/examples/cpp/raw_streams_example/CMakeLists.txt b/hailort/libhailort/examples/cpp/raw_streams_example/CMakeLists.txt index 4184783..d30f854 100644 --- a/hailort/libhailort/examples/cpp/raw_streams_example/CMakeLists.txt +++ b/hailort/libhailort/examples/cpp/raw_streams_example/CMakeLists.txt @@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.0.0) find_package(Threads REQUIRED) set(THREADS_PREFER_PTHREAD_FLAG ON) -find_package(HailoRT 4.13.0 EXACT REQUIRED) +find_package(HailoRT 4.14.0 EXACT REQUIRED) add_executable(cpp_raw_streams_example raw_streams_example.cpp) target_link_libraries(cpp_raw_streams_example PRIVATE HailoRT::libhailort Threads::Threads) diff --git a/hailort/libhailort/examples/cpp/raw_streams_example/raw_streams_example.cpp b/hailort/libhailort/examples/cpp/raw_streams_example/raw_streams_example.cpp index a73fd7c..c780cad 100644 --- a/hailort/libhailort/examples/cpp/raw_streams_example/raw_streams_example.cpp +++ b/hailort/libhailort/examples/cpp/raw_streams_example/raw_streams_example.cpp @@ -145,16 +145,7 @@ hailo_status infer(InputStreamRefVector &input_streams, OutputStreamRefVector &o int main() { - auto device_ids = Device::scan(); - if (!device_ids) { - std::cerr << "Failed to scan, status = " << device_ids.status() << std::endl; - return device_ids.status(); - } - if (device_ids->size() < 1){ - std::cerr << "Failed to find a connected hailo device." << std::endl; - return HAILO_INVALID_OPERATION; - } - auto device = Device::create(device_ids->at(0)); + auto device = Device::create(); if (!device) { std::cerr << "Failed to create device " << device.status() << std::endl; return device.status(); diff --git a/hailort/libhailort/examples/cpp/switch_network_groups_example/CMakeLists.txt b/hailort/libhailort/examples/cpp/switch_network_groups_example/CMakeLists.txt index a0e9c7e..3338ff1 100644 --- a/hailort/libhailort/examples/cpp/switch_network_groups_example/CMakeLists.txt +++ b/hailort/libhailort/examples/cpp/switch_network_groups_example/CMakeLists.txt @@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.0.0) find_package(Threads REQUIRED) set(THREADS_PREFER_PTHREAD_FLAG ON) -find_package(HailoRT 4.13.0 EXACT REQUIRED) +find_package(HailoRT 4.14.0 EXACT REQUIRED) add_executable(cpp_switch_network_groups_example switch_network_groups_example.cpp) target_link_libraries(cpp_switch_network_groups_example PRIVATE HailoRT::libhailort Threads::Threads) diff --git a/hailort/libhailort/examples/cpp/switch_network_groups_example/switch_network_groups_example.cpp b/hailort/libhailort/examples/cpp/switch_network_groups_example/switch_network_groups_example.cpp index e7c4d9f..156d523 100644 --- a/hailort/libhailort/examples/cpp/switch_network_groups_example/switch_network_groups_example.cpp +++ b/hailort/libhailort/examples/cpp/switch_network_groups_example/switch_network_groups_example.cpp @@ -20,6 +20,8 @@ constexpr bool QUANTIZED = true; constexpr hailo_format_type_t FORMAT_TYPE = HAILO_FORMAT_TYPE_AUTO; constexpr size_t INFER_FRAME_COUNT = 100; constexpr uint32_t DEVICE_COUNT = 1; +constexpr size_t BATCH_SIZE_1 = 1; +constexpr size_t BATCH_SIZE_2 = 2; constexpr std::chrono::milliseconds SCHEDULER_TIMEOUT_MS(100); constexpr uint32_t SCHEDULER_THRESHOLD = 3; @@ -102,10 +104,13 @@ Expected> create_vdevice() return VDevice::create(params); } -Expected>> configure_hefs(VDevice &vdevice, std::vector &hef_paths) +Expected>> configure_hefs(VDevice &vdevice, std::vector &hef_paths, + const std::vector &batch_sizes) { std::vector> results; + assert(hef_paths.size() == batch_sizes.size()); + size_t i = 0; for (const auto &path : hef_paths) { auto hef_exp = Hef::create(path); if (!hef_exp) { @@ -113,6 +118,19 @@ Expected>> configure_hefs(VD } auto hef = hef_exp.release(); + auto configure_params = vdevice.create_configure_params(hef); + if (!configure_params) { + std::cerr << "Failed to create configure params" << std::endl; + return make_unexpected(configure_params.status()); + } + + // Modify batch_size for each network group + for (auto& network_group_params : configure_params.value()) { + network_group_params.second.batch_size = batch_sizes[i]; + network_group_params.second.power_mode = HAILO_POWER_MODE_ULTRA_PERFORMANCE; + } + i++; + auto added_network_groups = vdevice.configure(hef); if (!added_network_groups) { return make_unexpected(added_network_groups.status()); @@ -132,15 +150,17 @@ int main() } auto vdevice = vdevice_exp.release(); + std::vector batch_sizes { BATCH_SIZE_1, BATCH_SIZE_2 }; std::vector hef_paths = {"hefs/multi_network_shortcut_net.hef", "hefs/shortcut_net.hef"}; - auto configured_network_groups_exp = configure_hefs(*vdevice, hef_paths); + + auto configured_network_groups_exp = configure_hefs(*vdevice, hef_paths, batch_sizes); if (!configured_network_groups_exp) { std::cerr << "Failed to configure HEFs, status = " << configured_network_groups_exp.status() << std::endl; return configured_network_groups_exp.status(); } auto configured_network_groups = configured_network_groups_exp.release(); - // Set scheduler's timeout and threshold for the first network group, in order to give priority to the second network group + // Set scheduler's timeout and threshold for the first network group, it will give priority to the second network group auto status = configured_network_groups[0]->set_scheduler_timeout(SCHEDULER_TIMEOUT_MS); if (HAILO_SUCCESS != status) { std::cerr << "Failed to set scheduler timeout, status = " << status << std::endl; @@ -153,6 +173,16 @@ int main() return status; } + // Setting higher priority to the first network-group directly. + // The practical meaning is that the first network will be ready to run only if ``SCHEDULER_THRESHOLD`` send requests have been accumulated, + // or more than ``SCHEDULER_TIMEOUT_MS`` time has passed and at least one send request has been accumulated. + // However when both the first and the second networks are ready to run, the first network will be preferred over the second network. + status = configured_network_groups[0]->set_scheduler_priority(HAILO_SCHEDULER_PRIORITY_NORMAL+1); + if (HAILO_SUCCESS != status) { + std::cerr << "Failed to set scheduler priority, status = " << status << std::endl; + return status; + } + auto vstreams_per_network_group_exp = build_vstreams(configured_network_groups); if (!vstreams_per_network_group_exp) { std::cerr << "Failed to create vstreams, status = " << vstreams_per_network_group_exp.status() << std::endl; diff --git a/hailort/libhailort/examples/cpp/switch_network_groups_manually_example/CMakeLists.txt b/hailort/libhailort/examples/cpp/switch_network_groups_manually_example/CMakeLists.txt index 9c6114e..8ef520b 100644 --- a/hailort/libhailort/examples/cpp/switch_network_groups_manually_example/CMakeLists.txt +++ b/hailort/libhailort/examples/cpp/switch_network_groups_manually_example/CMakeLists.txt @@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.0.0) find_package(Threads REQUIRED) set(THREADS_PREFER_PTHREAD_FLAG ON) -find_package(HailoRT 4.13.0 EXACT REQUIRED) +find_package(HailoRT 4.14.0 EXACT REQUIRED) add_executable(cpp_switch_network_groups_manually_example switch_network_groups_manually_example.cpp) target_link_libraries(cpp_switch_network_groups_manually_example PRIVATE HailoRT::libhailort Threads::Threads) diff --git a/hailort/libhailort/examples/cpp/vstreams_example/CMakeLists.txt b/hailort/libhailort/examples/cpp/vstreams_example/CMakeLists.txt index 522ea6a..7715164 100644 --- a/hailort/libhailort/examples/cpp/vstreams_example/CMakeLists.txt +++ b/hailort/libhailort/examples/cpp/vstreams_example/CMakeLists.txt @@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.0.0) find_package(Threads REQUIRED) set(THREADS_PREFER_PTHREAD_FLAG ON) -find_package(HailoRT 4.13.0 EXACT REQUIRED) +find_package(HailoRT 4.14.0 EXACT REQUIRED) add_executable(cpp_vstreams_example vstreams_example.cpp) target_link_libraries(cpp_vstreams_example PRIVATE HailoRT::libhailort Threads::Threads) diff --git a/hailort/libhailort/examples/cpp/vstreams_example/vstreams_example.cpp b/hailort/libhailort/examples/cpp/vstreams_example/vstreams_example.cpp index 097d8a1..5baae18 100644 --- a/hailort/libhailort/examples/cpp/vstreams_example/vstreams_example.cpp +++ b/hailort/libhailort/examples/cpp/vstreams_example/vstreams_example.cpp @@ -15,8 +15,6 @@ #define HEF_FILE ("hefs/shortcut_net.hef") constexpr size_t FRAMES_COUNT = 100; -constexpr bool QUANTIZED = true; -constexpr hailo_format_type_t FORMAT_TYPE = HAILO_FORMAT_TYPE_AUTO; constexpr size_t MAX_LAYER_EDGES = 16; using namespace hailort; @@ -140,19 +138,49 @@ int main() return network_group.status(); } - auto vstreams = VStreamsBuilder::create_vstreams(*network_group.value(), QUANTIZED, FORMAT_TYPE); - if (!vstreams) { - std::cerr << "Failed creating vstreams " << vstreams.status() << std::endl; - return vstreams.status(); + // Set input format type to auto, and mark the data as quantized - libhailort will not scale the data before writing to the HW + bool quantized = true; + auto input_vstream_params = network_group.value()->make_input_vstream_params(quantized, HAILO_FORMAT_TYPE_AUTO, HAILO_DEFAULT_VSTREAM_TIMEOUT_MS, + HAILO_DEFAULT_VSTREAM_QUEUE_SIZE); + if (!input_vstream_params) { + std::cerr << "Failed creating input vstreams params " << input_vstream_params.status() << std::endl; + return input_vstream_params.status(); } - if (vstreams->first.size() > MAX_LAYER_EDGES || vstreams->second.size() > MAX_LAYER_EDGES) { + /* The input format order in the example HEF is NHWC in the user-side (may be seen using 'hailortcli parse-hef ). + Here we override the user-side format order to be NCHW */ + for (auto ¶ms_pair : *input_vstream_params) { + params_pair.second.user_buffer_format.order = HAILO_FORMAT_ORDER_NCHW; + } + + auto input_vstreams = VStreamsBuilder::create_input_vstreams(*network_group.value(), *input_vstream_params); + if (!input_vstreams) { + std::cerr << "Failed creating input vstreams " << input_vstreams.status() << std::endl; + return input_vstreams.status(); + } + + // Set output format type to float32, and mark the data as not quantized - libhailort will de-quantize the data after reading from the HW + // Note: this process might affect the overall performance + quantized = false; + auto output_vstream_params = network_group.value()->make_output_vstream_params(quantized, HAILO_FORMAT_TYPE_FLOAT32, HAILO_DEFAULT_VSTREAM_TIMEOUT_MS, + HAILO_DEFAULT_VSTREAM_QUEUE_SIZE); + if (!output_vstream_params) { + std::cerr << "Failed creating output vstreams params " << output_vstream_params.status() << std::endl; + return output_vstream_params.status(); + } + auto output_vstreams = VStreamsBuilder::create_output_vstreams(*network_group.value(), *output_vstream_params); + if (!output_vstreams) { + std::cerr << "Failed creating output vstreams " << output_vstreams.status() << std::endl; + return output_vstreams.status(); + } + + if (input_vstreams->size() > MAX_LAYER_EDGES || output_vstreams->size() > MAX_LAYER_EDGES) { std::cerr << "Trying to infer network with too many input/output virtual streams, Maximum amount is " << MAX_LAYER_EDGES << " (either change HEF or change the definition of MAX_LAYER_EDGES)"<< std::endl; return HAILO_INVALID_OPERATION; } - auto status = infer(vstreams->first, vstreams->second); + auto status = infer(*input_vstreams, *output_vstreams); if (HAILO_SUCCESS != status) { std::cerr << "Inference failed " << status << std::endl; return status; diff --git a/hailort/libhailort/hef.proto b/hailort/libhailort/hef.proto index 3407238..3c5e909 100644 --- a/hailort/libhailort/hef.proto +++ b/hailort/libhailort/hef.proto @@ -41,6 +41,14 @@ enum ProtoHEFExtensionType { HAILO_NET_FLOW_YOLOX_NMS = 15; HAILO_NET_FLOW_SSD_NMS = 16; HAILO_NET_FLOW_IOU_NMS = 17; + WRITE_DATA_BY_TYPE = 18; + NMS_OUTPUT_BURST = 19; + DUAL_DIRECTION_STREAM_INDEX = 20; + HAILO_NET_FLOW_ARGMAX = 21; + HAILO_NET_FLOW_SOFTMAX = 22; + ALIGNED_FORMAT_TYPE = 23; + OUTPUT_SCALE_PER_FEATURE = 25; + PERIPH_CALCULATION_IN_HAILORT = 26; UNUSED = 0XFFFF; } @@ -163,6 +171,29 @@ message ProtoHEFSSDBboxDecoder { uint32 cls_pad_index = 4; }; +message ProtoHEFYoloxBboxDecoder { + // Pixels stride for given bbox + uint32 stride = 1; + + // Index of the pad connected to the encoded layer in the decoder (reg layer) + uint32 reg_pad_index = 2; + + // Index of the pad connected to the classes scores layer in the decoder (cls layer) + uint32 cls_pad_index = 3; + + // Index of the pad connected to the objectness scores layer in the decoder (objectness layer) + uint32 obj_pad_index = 4; +}; + +message ProtoHEFYoloxNmsOp { + // Input image dimensions + double image_height = 1; + double image_width = 2; + + // List of bbox decoders (anchors) for the NMS layer. Each model has its own number of boxes per anchor + repeated ProtoHEFYoloxBboxDecoder bbox_decoders = 3; +}; + message ProtoHEFSSDNmsOp { // Input image dimensions double image_height = 1; @@ -208,12 +239,22 @@ message ProtoHEFNmsOp { // Additional information needed for specific NMS types oneof nms_op { ProtoHEFYoloNmsOp yolo_nms_op = 7; // YOLOv5 post process - ProtoHEFYoloNmsOp yolox_nms_op = 8; // YOLO-X post process (ignores bbox decoder coordinations) + ProtoHEFYoloxNmsOp yolox_nms_op = 8; // YOLO-X post process ProtoHEFSSDNmsOp ssd_nms_op = 9; // SSD post process ProtoHEFIOUNmsOp iou_op = 10; // IoU only } }; +enum ProtoHEFLogitsType { + PROTO_HEF_ARGMAX_TYPE = 0; + PROTO_HEF_SOFTMAX_TYPE = 1; +} + +message ProtoHEFLogitsOp { + // Logits type (softmax/argmax) + ProtoHEFLogitsType logits_type = 1; +}; + enum ProtoHEFFormatOrder { PROTO__FORMAT__ORDER__AUTO = 0; PROTO__FORMAT__ORDER__NHWC = 1; @@ -240,6 +281,13 @@ enum ProtoHEFDataType { PROTO__UINT16 = 1; }; +enum ProtoHEFFormatType { + PROTO__FORMAT__TYPE__AUTO = 0; + PROTO__FORMAT__TYPE__UINT8 = 1; + PROTO__FORMAT__TYPE__UINT16 = 2; + PROTO__FORMAT__TYPE__MAX_ENUM = 0XFFFF; +}; + message ProtoHEFTensorShape { uint32 height = 1; uint32 padded_height = 2; @@ -268,8 +316,9 @@ message ProtoHEFPad { string name = 2; // Additional information describing the data going through this pad's interface - ProtoHEFFormatOrder format = 3; - ProtoHEFDataType data_bytes = 4; + ProtoHEFFormatOrder format_order = 3; + ProtoHEFDataType data_bytes = 4; // Unused (kept for compatibility). Should use format_type field + ProtoHEFFormatType format_type = 8; ProtoHEFEdgeLayerNumericInfo numeric_info = 5; oneof shape_info { ProtoHEFTensorShape tensor_shape = 6; @@ -291,6 +340,9 @@ message ProtoHEFOp { // Op type for NMS post-process ProtoHEFNmsOp nms_op = 5; + + // Op type for Logits post-processing + ProtoHEFLogitsOp logits_op = 6; } }; @@ -464,6 +516,7 @@ message ProtoHEFAction { ProtoHEFActionWaitForModuleConfigDone wait_for_module_config_done = 11; ProtoHEFActionDebugSleep debug_sleep = 12; ProtoHEFActionEnableNMS enable_nms = 13; + ProtoHEFActionWriteDataByType write_data_by_type = 14; } } @@ -494,6 +547,31 @@ message ProtoHEFActionDebugSleep { uint64 duration_in_usec = 1; } +enum ProtoHEFWriteDataType { + DATA_FROM_ACTION = 0; + BATCH_SIZE = 1; +}; + +message ProtoHEFActionWriteDataByType { + // The address to write the data + uint64 address = 1; + + // Data type - the data to write + ProtoHEFWriteDataType data_type = 2; + + // The data that would be written if data_type=DATA_FROM_ACTION + bytes data = 3; + + // The mask to use - ignore if data_type=DATA_FROM_ACTION and data size > 4 + uint32 mask = 4; + + // Network index + uint32 network_index = 5; + + // data shift + uint32 shift = 6; +} + message InitialL3 { // L3 cut index sequencer should start from uint32 initial_l3_index = 1; @@ -572,6 +650,12 @@ message ProtoHEFActionEnableNMS { // Index of the network uint32 network_index = 2; + + // Number of classes + uint32 number_of_classes = 3; + + // Burst-size + uint32 burst_size = 4; } // None action - Do not do anything @@ -740,6 +824,17 @@ message ProtoHEFAdditionalInfo { ProtoHEFNmsInfo nms_info = 1; } +enum ProtoHEFNmsBurstType { + // No burst + PROTO__NMS_BURST_TYPE__NO_BURST = 0; + // No image delimiter, burst per class + PROTO__NMS_BURST_TYPE__H8_PER_CLASS = 1; + // Image delimiter and burst per class + PROTO__NMS_BURST_TYPE__H15_PER_CLASS = 2; + // Image delimiter and burst per image + PROTO__NMS_BURST_TYPE__H15_PER_FRAME = 3; +} + // NMS specific parameters message ProtoHEFNmsInfo { uint32 type_index = 1; @@ -749,6 +844,8 @@ message ProtoHEFNmsInfo { bool is_defused = 5; ProtoHEFNmsDefuseInfo defuse_info = 6; uint64 input_division_factor = 7; + uint32 burst_size = 8; + ProtoHEFNmsBurstType burst_type = 9; } message ProtoHEFNmsDefuseInfo { @@ -757,10 +854,12 @@ message ProtoHEFNmsDefuseInfo { } message ProtoHEFEdgeLayerNumericInfo { - float qp_zp = 1; - float qp_scale = 2; + float qp_zp = 1; // TODO: Remove, use vector + float qp_scale = 2; // TODO: Remove, use vector float limvals_min = 3; float limvals_max = 4; + repeated double qp_zps = 5; // zp per feature + repeated double qp_scales = 6; // scale per feature } // An object that can be repeated in order to provide the order of the triggers. @@ -781,4 +880,4 @@ message ProtoHEFHwPackageInfo { uint32 dense_alignment_size = 1; uint32 axi_width = 2; uint32 memory_width = 3; -} \ No newline at end of file +} diff --git a/hailort/libhailort/include/hailo/buffer.hpp b/hailort/libhailort/include/hailo/buffer.hpp index 311185e..e0693ba 100644 --- a/hailort/libhailort/include/hailo/buffer.hpp +++ b/hailort/libhailort/include/hailo/buffer.hpp @@ -11,6 +11,7 @@ #define _HAILO_BUFFER_HPP_ #include "hailo/expected.hpp" +#include "hailo/buffer_storage.hpp" #include #include @@ -19,6 +20,7 @@ #include +/** hailort namespace */ namespace hailort { @@ -48,6 +50,8 @@ public: // Empty buffer (points to null, size is zero) Buffer(); + // Buffer backed by the storage param + Buffer(BufferStoragePtr storage); ~Buffer() = default; Buffer(const Buffer& other) = delete; @@ -60,18 +64,20 @@ public: * Create functions, may fail be due to out of memory */ // Creates a buffer size bytes long, without setting the memory - static Expected create(size_t size); + static Expected create(size_t size, const BufferStorageParams ¶ms = {}); // Creates a buffer size bytes long, setting the memory to default_value - static Expected create(size_t size, uint8_t default_value); + static Expected create(size_t size, uint8_t default_value, const BufferStorageParams ¶ms = {}); // Creates a copy of the data pointed to by src, size bytes long - static Expected create(const uint8_t *src, size_t size); + static Expected create(const uint8_t *src, size_t size, const BufferStorageParams ¶ms = {}); // Creates a new buffer with the contents of the initializer_list - static Expected create(std::initializer_list init); - + static Expected create(std::initializer_list init, const BufferStorageParams ¶ms = {}); + // Creates a buffer size bytes long, without setting the memory - static Expected create_shared(size_t size); + static Expected create_shared(size_t size, const BufferStorageParams ¶ms = {}); // Creates a buffer size bytes long, setting the memory to default_value - static Expected create_shared(size_t size, uint8_t default_value); + static Expected create_shared(size_t size, uint8_t default_value, const BufferStorageParams ¶ms = {}); + // Creates a copy of the data pointed to by src, size bytes long + static Expected create_shared(const uint8_t *src, size_t size, const BufferStorageParams ¶ms = {}); // Moves the data pointed to by other into the lvalue: // * other is invalidated. @@ -92,16 +98,14 @@ public: iterator begin(); iterator end(); + BufferStorage &storage(); + // Returns a pointer to the start of the buffer uint8_t* data() noexcept; const uint8_t* data() const noexcept; // Returns the size of the buffer size_t size() const noexcept; - - // Returns a pointer to the start of the buffer and releases the ownership - // Free the returned pointer with `delete` - uint8_t* release() noexcept; // Casts the buffer to a string of length size(). // If there's a null char in the buffer, the string will terminate at the null char @@ -117,7 +121,7 @@ public: T* as_pointer() const { assert(m_size >= sizeof(T)); - return reinterpret_cast(m_data.get()); + return reinterpret_cast(m_data); } // Returns a copy of the data at the start of the buffer, cast to T @@ -126,11 +130,11 @@ public: T as_type() const { assert(m_size >= sizeof(T)); - return *(reinterpret_cast(m_data.get())); + return *(reinterpret_cast(m_data)); } // The following functions return a copy of the data at the start of the buffer, cast to uint16/32/64_t - // Note: If this->size() is less than the size of the ineger type, then the copy will hold data + // Note: If this->size() is less than the size of the integer type, then the copy will hold data // that isn't from the buffer! uint16_t as_uint16() const; uint32_t as_uint32() const; @@ -146,16 +150,16 @@ public: } // The following functions return references of the data at the start of the buffer, cast to uint16/32/64_t - // Note: If this->size() is less than the size of the ineger type, then the copy will hold data + // Note: If this->size() is less than the size of the integer type, then the copy will hold data // that isn't from the buffer! uint16_t& as_uint16(); uint32_t& as_uint32(); uint64_t& as_uint64(); private: - Buffer(std::unique_ptr data, size_t size); - - std::unique_ptr m_data; + // Initialization dependency + BufferStoragePtr m_storage; + uint8_t *m_data; size_t m_size; }; @@ -170,7 +174,7 @@ public: explicit MemoryView(Buffer &buffer); MemoryView(void *data, size_t size); ~MemoryView() = default; - + MemoryView& operator=(MemoryView&& other) = default; MemoryView(const MemoryView &) = default; MemoryView& operator=(MemoryView &) = default; diff --git a/hailort/libhailort/include/hailo/buffer_storage.hpp b/hailort/libhailort/include/hailo/buffer_storage.hpp new file mode 100644 index 0000000..6a9bd45 --- /dev/null +++ b/hailort/libhailort/include/hailo/buffer_storage.hpp @@ -0,0 +1,240 @@ +/** + * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved. + * Distributed under the MIT license (https://opensource.org/licenses/MIT) +**/ +/** + * @file buffer_storage.hpp + * @brief TODO: fill me (HRT-10026) + **/ + +#ifndef _HAILO_BUFFER_STORAGE_HPP_ +#define _HAILO_BUFFER_STORAGE_HPP_ + +#include "hailo/hailort.h" +#include "hailo/expected.hpp" + +#include +#include +#include +#include +#include +#include + + +/** hailort namespace */ +namespace hailort +{ + +// Forward declarations +class Device; +class VDevice; +class BufferStorage; +class HeapStorage; +class DmaStorage; +class HailoRTDriver; + +namespace vdma { + class DmaAbleBuffer; + using DmaAbleBufferPtr = std::shared_ptr; + + class MappedBuffer; + using MappedBufferPtr = std::shared_ptr; +} + + +/*! Buffer storage parameters. Analogical to hailo_buffer_parameters_t */ +struct HAILORTAPI BufferStorageParams +{ +public: + struct HeapParams + { + public: + HeapParams(); + }; + + struct DmaMappingParams + { + public: + static Expected create(const hailo_buffer_dma_mapping_params_t ¶ms); + // DmaMappingParams for a buffer to be mapped to device + DmaMappingParams(Device &device, hailo_dma_buffer_direction_t data_direction); + // DmaMappingParams for a buffer to be mapped to all the underlying devices held by vdevice + DmaMappingParams(VDevice &vdevice, hailo_dma_buffer_direction_t data_direction); + // DmaMappingParams for a buffer to be lazily mapped upon it's first async transfer to a given device + DmaMappingParams(); + + // Note: We hold a pointer to a Device/VDevice/neither, since DmaMappingParams support mapping to + // a device, vdevice or lazy mapping + Device *device; + VDevice *vdevice; + hailo_dma_buffer_direction_t data_direction; + + private: + DmaMappingParams(const hailo_buffer_dma_mapping_params_t ¶ms); + }; + + static Expected create(const hailo_buffer_parameters_t ¶ms); + // Dma buffer params for lazy mapping + static BufferStorageParams create_dma(); + // Dma buffer params for mapping to device in data_direction + static BufferStorageParams create_dma(Device &device, hailo_dma_buffer_direction_t data_direction); + // Dma buffer params for mapping to vdevice in data_direction + static BufferStorageParams create_dma(VDevice &vdevice, hailo_dma_buffer_direction_t data_direction); + + // Defaults to heap params + BufferStorageParams(); + + hailo_buffer_flags_t flags; + union { + HeapParams heap_params; + DmaMappingParams dma_mapping_params; + }; +}; + +using BufferStoragePtr = std::shared_ptr; + +class HAILORTAPI BufferStorage +{ +public: + enum class Type { + HEAP, + DMA + }; + + static Expected create(size_t size, const BufferStorageParams ¶ms); + + BufferStorage(BufferStorage&& other) noexcept = default; + BufferStorage(const BufferStorage &) = delete; + BufferStorage &operator=(BufferStorage &&) = delete; + BufferStorage &operator=(const BufferStorage &) = delete; + virtual ~BufferStorage() = default; + + Type type() const; + virtual size_t size() const = 0; + virtual void *user_address() = 0; + // Returns the pointer managed by this object and releases ownership + // TODO: Add a free function pointer? (HRT-10024) + // // Free the returned pointer with `delete` + // TODO: after release the containing buffer will hold pointers to values that were released. + // Document that this can happen? Disable this behavior somehow? (HRT-10024) + virtual Expected release() noexcept = 0; + // Maps the storage to device in data_direction. + // - If the mapping is new - true is returned. + // - If the mapping already exists - false is returned. + // - Otherwise - Unexpected with a failure status is returned. + virtual Expected dma_map(Device &device, hailo_dma_buffer_direction_t data_direction) = 0; + // Maps the backing buffer to a device via driver in data_direction, returning a pointer to it. + // - If the mapping is new - true is returned. + // - If the mapping already exists - false is returned. + // - Otherwise - Unexpected with a failure status is returned. + virtual Expected dma_map(HailoRTDriver &driver, hailo_dma_buffer_direction_t data_direction) = 0; + + // Internal functions + virtual Expected get_dma_mapped_buffer(const std::string &device_id) = 0; + +protected: + explicit BufferStorage(Type type); + + const Type m_type; +}; + +using HeapStoragePtr = std::shared_ptr; + +class HAILORTAPI HeapStorage : public BufferStorage +{ +public: + static Expected create(size_t size); + HeapStorage(std::unique_ptr data, size_t size); + HeapStorage(HeapStorage&& other) noexcept; + HeapStorage(const HeapStorage &) = delete; + HeapStorage &operator=(HeapStorage &&) = delete; + HeapStorage &operator=(const HeapStorage &) = delete; + virtual ~HeapStorage() = default; + + virtual size_t size() const override; + virtual void *user_address() override; + virtual Expected release() noexcept override; + virtual Expected dma_map(Device &device, hailo_dma_buffer_direction_t data_direction) override; + virtual Expected dma_map(HailoRTDriver &driver, hailo_dma_buffer_direction_t data_direction) override; + + // Internal functions + virtual Expected get_dma_mapped_buffer(const std::string &device_id) override; + +private: + std::unique_ptr m_data; + size_t m_size; +}; + +// ************************************* NOTE - START ************************************* // +// DmaStorage isn't currently supported and is for internal use only // +// **************************************************************************************** // +using DmaStoragePtr = std::shared_ptr; + +// TODO: HRT-10026 doc this +class HAILORTAPI DmaStorage : public BufferStorage +{ +public: + // Creates a DmaStorage instance holding a dma-able buffer size bytes large. + // The buffer isn't mapped to dma until dma_map is called. + static Expected create(size_t size); + // Creates a DmaStorage instance holding a dma-able buffer size bytes large. + // The buffer is mapped to device in data_direction. + static Expected create(size_t size, + hailo_dma_buffer_direction_t data_direction, Device &device); + // Creates a DmaStorage instance holding a dma-able buffer size bytes large. + // The buffer is mapped to vdevice.get_physical_devices() in data_direction. + static Expected create(size_t size, + hailo_dma_buffer_direction_t data_direction, VDevice &vdevice); + + // TODO: doc that the addr needs to be on a new page and aligned to 64B (HRT-9559) + // probably best just to call mmap + // Creates a DmaStorage instance backed by the size bytes large buffer pointed to by user_address. + // The buffer isn't mapped to dma until dma_map is called. + static Expected create_from_user_address(void *user_address, size_t size); + // Creates a DmaStorage instance backed by the size bytes large buffer pointed to by user_address. + // The buffer is mapped to device in data_direction. + static Expected create_from_user_address(void *user_address, size_t size, + hailo_dma_buffer_direction_t data_direction, Device &device); + // Creates a DmaStorage instance backed by the size bytes large buffer pointed to by user_address. + // The buffer is mapped to vdevice.get_physical_devices() in data_direction. + static Expected create_from_user_address(void *user_address, size_t size, + hailo_dma_buffer_direction_t data_direction, VDevice &device); + + DmaStorage(const DmaStorage &other) = delete; + DmaStorage &operator=(const DmaStorage &other) = delete; + DmaStorage(DmaStorage &&other) noexcept = default; + DmaStorage &operator=(DmaStorage &&other) = delete; + virtual ~DmaStorage() = default; + + virtual size_t size() const override; + virtual void *user_address() override; + virtual Expected release() noexcept override; + // TODO: thread safety (HRT-10669) + virtual Expected dma_map(Device &device, hailo_dma_buffer_direction_t data_direction) override; + virtual Expected dma_map(HailoRTDriver &driver, hailo_dma_buffer_direction_t data_direction) override; + + // Internal functions + DmaStorage(vdma::DmaAbleBufferPtr &&dma_able_buffer); + virtual Expected get_dma_mapped_buffer(const std::string &device_id) override; + +private: + // Creates a backing dma-able buffer (either user or hailort allocated). + // Maps said buffer to physical_devices in data_direction. + // By default (if physical_devices is empty), no mapping will occur + static Expected create(void *user_address, size_t size, + hailo_dma_buffer_direction_t data_direction = HAILO_DMA_BUFFER_DIRECTION_MAX_ENUM, + std::vector> &&physical_devices = {}); + + vdma::DmaAbleBufferPtr m_dma_able_buffer; + + // For each device (key is device_id), we store some vdma mapping. + // TODO: use (device_id, direction) as key - HRT-10656 + std::unordered_map m_mappings; +}; +// ************************************** NOTE - END ************************************** // +// DmaStorage isn't currently supported and is for internal use only // +// **************************************************************************************** // + +} /* namespace hailort */ + +#endif /* _HAILO_BUFFER_STORAGE_HPP_ */ diff --git a/hailort/libhailort/include/hailo/device.hpp b/hailort/libhailort/include/hailo/device.hpp index 904b37e..bb12702 100644 --- a/hailort/libhailort/include/hailo/device.hpp +++ b/hailort/libhailort/include/hailo/device.hpp @@ -22,6 +22,7 @@ #include +/** hailort namespace */ namespace hailort { @@ -89,8 +90,8 @@ public: std::chrono::milliseconds timeout); /** - * Creates a device if there is only one system device detected in the system. - * + * Creates a device. If there are more than one device detected in the system, an arbitrary device is returned. + * * @return Upon success, returns Expected of a unique_ptr to Device object. * Otherwise, returns Unexpected of ::hailo_status error. */ @@ -98,19 +99,20 @@ public: /** * Creates a device by the given device id. - * + * * @param[in] device_id Device id string, can represent several device types: * [-] for pcie devices - pcie bdf (XXXX:XX:XX.X) * [-] for ethernet devices - ip address (xxx.xxx.xxx.xxx) - * + * * @return Upon success, returns Expected of a unique_ptr to Device object. * Otherwise, returns Unexpected of ::hailo_status error. */ static Expected> create(const std::string &device_id); /** - * Creates pcie device if there is only one pcie device connected - * + * Creates pcie device. If there are more than one device detected in the system, an arbitrary pcie device is + * returned. + * * @return Upon success, returns Expected of a unique_ptr to Device object. * Otherwise, returns Unexpected of ::hailo_status error. */ @@ -118,7 +120,7 @@ public: /** * Creates a PCIe device by the given info. - * + * * @param[in] device_info Information about the device to open. * @return Upon success, returns Expected of a unique_ptr to Device object. * Otherwise, returns Unexpected of ::hailo_status error. @@ -127,7 +129,7 @@ public: /** * Creates an ethernet device by the given info. - * + * * @param[in] device_info Information about the device to open. * @return Upon success, returns Expected of a unique_ptr to Device object. * Otherwise, returns Unexpected of ::hailo_status error. @@ -136,16 +138,28 @@ public: /** * Creates an ethernet device by IP address. - * + * * @param[in] ip_addr The device IP address. * @return Upon success, returns Expected of a unique_ptr to Device object. * Otherwise, returns Unexpected of ::hailo_status error. */ static Expected> create_eth(const std::string &ip_addr); + /** + * Creates an ethernet device by IP address, port number, timeout duration and max number of attempts + * + * @param[in] device_address The device IP address. + * @param[in] port The port number that the device will use for the Ethernet communication. + * @param[in] timeout_milliseconds The time in milliseconds to scan devices. + * @param[in] max_number_of_attempts The number of attempts to find a device. + * @return Upon success, returns Expected of a unique_ptr to Device object. + * Otherwise, returns Unexpected of ::hailo_status error. + */ + static Expected> create_eth(const std::string &device_address, uint16_t port, uint32_t timeout_milliseconds, uint8_t max_number_of_attempts); + /** * Parse PCIe device BDF string into hailo device info structure. - * + * * @param[in] device_info_str BDF device info, format [\].\.\.\, same format as in lspci. * @return Upon success, returns Expected of ::hailo_pcie_device_info_t containing the information. * Otherwise, returns Unexpected of ::hailo_status error. @@ -154,7 +168,7 @@ public: /** * Returns a string of pcie device info. - * + * * @param[in] device_info A ::hailo_pcie_device_info_t containing the pcie device information. * @return Upon success, returns Expected of a string containing the information. * Otherwise, returns Unexpected of ::hailo_status error. @@ -163,13 +177,22 @@ public: /** * Returns the device type of the given device id string. - * + * * @param[in] device_id A std::string device id to check. * @return Upon success, returns Expected of the device type. * Otherwise, returns Unexpected of ::hailo_status error. */ static Expected get_device_type(const std::string &device_id); + /** + * Checks if 2 device ids represents the same device. + * + * @param[in] first A std::string first device id to check. + * @param[in] second A std::string second device id to check. + * @return true if the device ids represents the same device. + */ + static bool device_ids_equal(const std::string &first, const std::string &second); + /** * Create the default configure params from an hef. * diff --git a/hailort/libhailort/include/hailo/dma_mapped_buffer.hpp b/hailort/libhailort/include/hailo/dma_mapped_buffer.hpp deleted file mode 100644 index f25ac37..0000000 --- a/hailort/libhailort/include/hailo/dma_mapped_buffer.hpp +++ /dev/null @@ -1,76 +0,0 @@ -/** - * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved. - * Distributed under the MIT license (https://opensource.org/licenses/MIT) - **/ -/** - * @file dma_mapped_buffer.hpp - * @brief The mapped buffer that is continuous in virtual memory, but not on physical memory. - * We map the buffer to the IOMMU. - * - * The buffer can be used only with the help of a descriptors list that contains pointers to a physical - * continuous "dma pages". - * - * There are 2 options to allocated the buffer: - * 1. User mode allocation - the user mode calls `malloc` or `mmap` to allocate the buffer, then - * using HailoRTDriver we map the driver to the IOMMU (and pin the pages to avoid pagigs). - * This is the default option - * 2. Kernel mode allocation - on some systems, the user mode doesn't allocate the memory in a "dma-able" address, - * so we need to allocate the pages in driver. - **/ - -#ifndef _HAILO_DMA_MAPPED_BUFFER_HPP_ -#define _HAILO_DMA_MAPPED_BUFFER_HPP_ - -#include "hailo/expected.hpp" -#include "hailo/device.hpp" - - -namespace hailort { - -// Forward deceleration across namespaces -namespace vdma { - class DescriptorList; - class MappedBufferFactory; - class BufferedChannel; -} - -// ******************************************** NOTE ******************************************** // -// Async Stream API and DmaMappedBuffer are currently not supported and are for internal use only // -// ********************************************************************************************** // -class HAILORTAPI DmaMappedBuffer final -{ -public: - static Expected create(size_t size, - hailo_vdma_buffer_direction_flags_t data_direction_flags, Device &device); - // TODO: doc that the addr needs to be on a new page and aligned to 64B (HRT-9559) - // probably best just to call mmap - static Expected create_from_user_address(void *user_address, size_t size, - hailo_vdma_buffer_direction_flags_t data_direction_flags, Device &device); - - DmaMappedBuffer(const DmaMappedBuffer &other) = delete; - DmaMappedBuffer &operator=(const DmaMappedBuffer &other) = delete; - DmaMappedBuffer(DmaMappedBuffer &&other) noexcept; - DmaMappedBuffer &operator=(DmaMappedBuffer &&other) = delete; - ~DmaMappedBuffer(); - - void *user_address(); - size_t size() const; - hailo_status synchronize(); - -private: - static Expected create(void *user_address, size_t size, - hailo_vdma_buffer_direction_flags_t data_direction_flags, Device &device); - - // Need access to pimpl - friend class vdma::DescriptorList; - friend class vdma::MappedBufferFactory; - friend class vdma::BufferedChannel; - - class Impl; - explicit DmaMappedBuffer(std::unique_ptr pimpl); - std::unique_ptr pimpl; -}; - -} /* namespace hailort */ - -#endif /* _HAILO_DMA_MAPPED_BUFFER_HPP_ */ \ No newline at end of file diff --git a/hailort/libhailort/include/hailo/event.hpp b/hailort/libhailort/include/hailo/event.hpp index e46517b..28ebf61 100644 --- a/hailort/libhailort/include/hailo/event.hpp +++ b/hailort/libhailort/include/hailo/event.hpp @@ -31,6 +31,7 @@ namespace neosmart { } #endif // defined (__QNX__) +/** hailort namespace */ namespace hailort { @@ -50,7 +51,7 @@ using WaitablePtrList = std::vector; class HAILORTAPI Waitable { -public: +public: explicit Waitable(underlying_waitable_handle_t handle); virtual ~Waitable(); Waitable(Waitable&& other); @@ -60,30 +61,31 @@ public: Waitable& operator=(Waitable&&) = delete; // Blocks the current thread until the waitable is signaled - // * If this->is_auto_reset(), then the Waitable is reset after wait returns with HAILO_SUCCESS + // * If this->is_auto_reset(), then the Waitable is reset after wait returns with HAILO_SUCCESS // * Otherwise, the Waitable is not reset - virtual hailo_status wait(std::chrono::milliseconds timeout) = 0; + virtual hailo_status wait(std::chrono::milliseconds timeout); virtual hailo_status signal() = 0; virtual bool is_auto_reset() = 0; underlying_waitable_handle_t get_underlying_handle(); -#if defined(__QNX__) - virtual void post_wait() = 0; -#endif // defined (__QNX__) static constexpr auto INIFINITE_TIMEOUT() { return std::chrono::milliseconds(HAILO_INFINITE); } protected: - #if defined(_MSC_VER) || defined(__QNX__) + virtual hailo_status post_wait() = 0; + static hailo_status wait_for_single_object(underlying_waitable_handle_t handle, std::chrono::milliseconds timeout); - #else + +#if defined(__linux__) // Waits on the fd until the waitable is signaled static hailo_status eventfd_poll(underlying_waitable_handle_t fd, std::chrono::milliseconds timeout); // Expected to be called after eventfd_poll returns HAILO_SUCCESS static hailo_status eventfd_read(underlying_waitable_handle_t fd); static hailo_status eventfd_write(underlying_waitable_handle_t fd); - #endif +#endif underlying_waitable_handle_t m_handle; + + friend class WaitableGroup; }; class Event; @@ -105,15 +107,15 @@ public: static Expected create(const State& initial_state); static EventPtr create_shared(const State& initial_state); - virtual hailo_status wait(std::chrono::milliseconds timeout) override; virtual hailo_status signal() override; virtual bool is_auto_reset() override; hailo_status reset(); -#if defined(__QNX__) - virtual void post_wait() override; -#endif // defined (__QNX__) + +protected: + virtual hailo_status post_wait() override { return HAILO_SUCCESS; } private: + static underlying_waitable_handle_t open_event_handle(const State& initial_state); }; @@ -129,16 +131,18 @@ public: static Expected create(uint32_t initial_count); static SemaphorePtr create_shared(uint32_t initial_count); - virtual hailo_status wait(std::chrono::milliseconds timeout) override; virtual hailo_status signal() override; virtual bool is_auto_reset() override; + #if defined(__QNX__) Semaphore(underlying_waitable_handle_t handle, uint32_t initial_count); Semaphore(Semaphore&& other); - virtual void post_wait() override; #endif // defined (__QNX__) +protected: + virtual hailo_status post_wait() override; + private: static underlying_waitable_handle_t open_semaphore_handle(uint32_t initial_count); #if defined (__QNX__) diff --git a/hailort/libhailort/include/hailo/expected.hpp b/hailort/libhailort/include/hailo/expected.hpp index 6c766f7..d911539 100644 --- a/hailort/libhailort/include/hailo/expected.hpp +++ b/hailort/libhailort/include/hailo/expected.hpp @@ -168,6 +168,7 @@ #include +/** hailort namespace */ namespace hailort { diff --git a/hailort/libhailort/include/hailo/hailort.h b/hailort/libhailort/include/hailo/hailort.h index 2ac0702..67e57b9 100644 --- a/hailort/libhailort/include/hailo/hailort.h +++ b/hailort/libhailort/include/hailo/hailort.h @@ -48,7 +48,7 @@ extern "C" { #define HAILO_DEFAULT_INIT_AVERAGING_FACTOR (HAILO_AVERAGE_FACTOR_256) #define HAILO_DEFAULT_BUFFERS_THRESHOLD (0) #define HAILO_DEFAULT_MAX_ETHERNET_BANDWIDTH_BYTES_PER_SEC (106300000) -#define HAILO_MAX_STREAMS_COUNT (32) +#define HAILO_MAX_STREAMS_COUNT (40) #define HAILO_DEFAULT_BATCH_SIZE (0) #define HAILO_MAX_NETWORK_GROUPS (8) #define HAILO_MAX_NETWORK_GROUP_NAME_SIZE (HAILO_MAX_NAME_SIZE) @@ -159,6 +159,10 @@ typedef uint16_t nms_bbox_counter_t; HAILO_STATUS__X(77, HAILO_RPC_FAILED /*!< RPC failed */)\ HAILO_STATUS__X(78, HAILO_INVALID_SERVICE_VERSION /*!< Invalid service version */)\ HAILO_STATUS__X(79, HAILO_NOT_SUPPORTED /*!< Not supported operation */)\ + HAILO_STATUS__X(80, HAILO_NMS_BURST_INVALID_DATA /*!< Invalid data in NMS burst */)\ + HAILO_STATUS__X(81, HAILO_OUT_OF_HOST_CMA_MEMORY /*!< Cannot allocate more CMA memory at host */)\ + HAILO_STATUS__X(82, HAILO_QUEUE_IS_FULL /*!< Cannot push more items into the queue */)\ + HAILO_STATUS__X(83, HAILO_DMA_MAPPING_ALREADY_EXISTS /*!< DMA mapping already exists */)\ typedef enum { #define HAILO_STATUS__X(value, name) name = value, @@ -167,7 +171,7 @@ typedef enum { /** Must be last! */ HAILO_STATUS_COUNT, - + /** Max enum value to maintain ABI Integrity */ HAILO_STATUS_MAX_ENUM = HAILO_MAX_ENUM } hailo_status; @@ -771,9 +775,6 @@ typedef enum { HAILO_STREAM_DIRECTION_MAX_ENUM = HAILO_MAX_ENUM } hailo_stream_direction_t; -// ******************************************** NOTE ******************************************** // -// Async Stream API and DmaMappedBuffer are currently not supported and are for internal use only // -// ********************************************************************************************** // /** Stream flags */ typedef enum { HAILO_STREAM_FLAGS_NONE = 0, /*!< No flags */ @@ -783,15 +784,57 @@ typedef enum { HAILO_STREAM_FLAGS_MAX_ENUM = HAILO_MAX_ENUM } hailo_stream_flags_t; -/** Hailo vdma buffer direction */ +// ************************************* NOTE - START ************************************* // +// Dma buffer allocation isn't currently supported and is for internal use only // +// **************************************************************************************** // +/** Hailo dma buffer direction */ +typedef enum { + HAILO_DMA_BUFFER_DIRECTION_H2D = 0, + HAILO_DMA_BUFFER_DIRECTION_D2H = 1, + HAILO_DMA_BUFFER_DIRECTION_BOTH = 2, + + /** Max enum value to maintain ABI Integrity */ + HAILO_DMA_BUFFER_DIRECTION_MAX_ENUM = HAILO_MAX_ENUM +} hailo_dma_buffer_direction_t; + +/** Hailo buffer flags */ typedef enum { - HAILO_VDMA_BUFFER_DIRECTION_FLAGS_NONE = 0, - HAILO_VDMA_BUFFER_DIRECTION_FLAGS_H2D = 1 << 0, - HAILO_VDMA_BUFFER_DIRECTION_FLAGS_D2H = 1 << 1, + HAILO_BUFFER_FLAGS_NONE = 0, /*!< No flags - heap allocated buffer */ + HAILO_BUFFER_FLAGS_DMA = 1 << 0, /*!< Buffer is mapped to DMA (will be page aligned implicitly) */ /** Max enum value to maintain ABI Integrity */ - HAILO_VDMA_BUFFER_DIRECTION_FLAGS_MAX_ENUM = HAILO_MAX_ENUM -} hailo_vdma_buffer_direction_flags_t; + HAILO_BUFFER_FLAGS_MAX_ENUM = HAILO_MAX_ENUM +} hailo_buffer_flags_t; + +/** Hailo buffer heap parameters */ +typedef struct { + EMPTY_STRUCT_PLACEHOLDER +} hailo_buffer_heap_params_t; + +// Hailo buffer dma mapping parameters. +// - If device is not NULL, the resulting buffer created by hailo_allocate_buffer will be mapped to the device. +// - If vdevice is not NULL, the resulting buffer created by hailo_allocate_buffer will be mapped to all the +// underlying devices held be vdevice. +// - If both device and vdevice are null, the resulting buffer created by hailo_allocate_buffer will be lazily +// mapped upon the first async transfer (i.e. when the buffer is passed to hailo_stream_read_raw_buffer_async +// or hailo_stream_write_raw_buffer_async). +typedef struct { + hailo_device device; + hailo_vdevice vdevice; + hailo_dma_buffer_direction_t direction; +} hailo_buffer_dma_mapping_params_t; + +/** Hailo buffer parameters */ +typedef struct { + hailo_buffer_flags_t flags; + union { + hailo_buffer_heap_params_t heap_params; + hailo_buffer_dma_mapping_params_t dma_mapping_params; + }; +} hailo_buffer_parameters_t; +// ************************************** NOTE - END ************************************** // +// Dma buffer allocation isn't currently supported and is for internal use only // +// **************************************************************************************** // /** Input or output data transform parameters */ typedef struct { @@ -1159,6 +1202,13 @@ typedef struct { char original_name[HAILO_MAX_STREAM_NAME_SIZE]; } hailo_nms_defuse_info_t; +typedef enum { + HAILO_BURST_TYPE_NO_BURST = 0, + HAILO_BURST_TYPE_H8_PER_CLASS = 1, + HAILO_BURST_TYPE_H15_PER_CLASS = 2, + HAILO_BURST_TYPE_H15_PER_FRAME = 3 +} hailo_nms_burst_type_t; + /** NMS Internal HW Info */ typedef struct { /** Amount of NMS classes */ @@ -1171,6 +1221,10 @@ typedef struct { uint32_t chunks_per_frame; bool is_defused; hailo_nms_defuse_info_t defuse_info; + /** Size of NMS burst in bytes */ + uint32_t burst_size; + /** NMS burst type */ + hailo_nms_burst_type_t burst_type; } hailo_nms_info_t; /** NMS Fuse Input */ @@ -1206,13 +1260,61 @@ typedef struct { } hailo_bbox_float32_t; #pragma pack(pop) +/** + * Completion info struct passed to the ::hailo_stream_write_async_callback_t after the async operation is + * done or has failed. + */ +typedef struct { + /** + * Status of the async transfer: + * - ::HAILO_SUCCESS - The transfer is complete. + * - ::HAILO_STREAM_ABORTED_BY_USER - The transfer was canceled (can happen after network deactivation). + * - Any other ::hailo_status on unexpected errors. + */ + hailo_status status; + + /** Address of the buffer passed to the async operation */ + const void *buffer_addr; + + /** Size of the buffer passed to the async operation. */ + size_t buffer_size; + + /** User specific data. Can be used as a context for the callback. */ + void *opaque; +} hailo_stream_write_async_completion_info_t; + +/** + * Async stream write complete callback prototype. + */ +typedef void (*hailo_stream_write_async_callback_t)(const hailo_stream_write_async_completion_info_t *info); + +/** + * Completion info struct passed to the ::hailo_stream_read_async_callback_t after the async operation is + * done or has failed. + */ typedef struct { /** - * - HAILO_SUCCESS when transfer is complete - * - HAILO_STREAM_NOT_ACTIVATED due to stream deactivation + * Status of the async transfer: + * - ::HAILO_SUCCESS - The transfer is complete. + * - ::HAILO_STREAM_ABORTED_BY_USER - The transfer was canceled (can happen after network deactivation). + * - Any other ::hailo_status on unexpected errors. */ hailo_status status; -} hailo_async_transfer_completion_info_t; + + /** Address of the buffer passed to the async operation */ + void *buffer_addr; + + /** Size of the buffer passed to the async operation. */ + size_t buffer_size; + + /** User specific data. Can be used as a context for the callback. */ + void *opaque; +} hailo_stream_read_async_completion_info_t; + +/** + * Async stream read complete callback prototype. + */ +typedef void (*hailo_stream_read_async_callback_t)(const hailo_stream_read_async_completion_info_t *info); /** * Input or output stream information. In case of multiple inputs or outputs, each one has @@ -1358,6 +1460,8 @@ typedef enum { HAILO_NOTIFICATION_ID_CONTEXT_SWITCH_BREAKPOINT_REACHED, /** Matches hailo_notification_message_parameters_t::health_monitor_clock_changed_notification */ HAILO_NOTIFICATION_ID_HEALTH_MONITOR_CLOCK_CHANGED_EVENT, + /** Matches hailo_notification_message_parameters_t::hailo_hw_infer_manager_infer_done_notification */ + HAILO_NOTIFICATION_ID_HW_INFER_MANAGER_INFER_DONE, /** Must be last! */ HAILO_NOTIFICATION_ID_COUNT, @@ -1443,6 +1547,10 @@ typedef struct { uint32_t current_clock; } hailo_health_monitor_clock_changed_notification_message_t; +typedef struct { + uint32_t infer_cycles; +} hailo_hw_infer_manager_infer_done_notification_message_t; + /** Union of all notification messages parameters. See ::hailo_notification_t */ typedef union { /** Ethernet rx error */ @@ -1463,6 +1571,8 @@ typedef union { hailo_context_switch_breakpoint_reached_message_t context_switch_breakpoint_reached_notification; /** Neural network core clock changed due to health monitor event */ hailo_health_monitor_clock_changed_notification_message_t health_monitor_clock_changed_notification; + /* HW infer manager finished infer notification */ + hailo_hw_infer_manager_infer_done_notification_message_t hw_infer_manager_infer_done_notification; } hailo_notification_message_parameters_t; /** Notification data that will be passed to the callback passed in ::hailo_notification_callback */ @@ -1689,7 +1799,7 @@ HAILORTAPI const char* hailo_get_status_message(hailo_status status); * device scanned. * @note ethernet devices are not considered "devices in the system", so they are not scanned in this function. * use :hailo_scan_ethernet_devices for ethernet devices. - * + * * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error. */ HAILORTAPI hailo_status hailo_scan_devices(hailo_scan_devices_params_t *params, hailo_device_id_t *device_ids, @@ -1697,15 +1807,15 @@ HAILORTAPI hailo_status hailo_scan_devices(hailo_scan_devices_params_t *params, /** * Creates a device by the given device id. - * + * * @param[in] device_id Device id, can represent several device types: * [-] for pcie devices - pcie bdf (XXXX:XX:XX.X or XX:XX.X) * [-] for ethernet devices - ip address (xxx.xxx.xxx.xxx) - * If NULL is given and there is only one available system device, use this device. + * If NULL is given, uses an arbitrary device found on the system. * @param[out] device A pointer to a ::hailo_device that receives the allocated PCIe device. * @return Upon success, returns Expected of a unique_ptr to Device object. * Otherwise, returns Unexpected of ::hailo_status error. - * + * * @note To release a device, call the ::hailo_release_device function with the returned ::hailo_device. */ HAILORTAPI hailo_status hailo_create_device_by_id(const hailo_device_id_t *device_id, hailo_device *device); @@ -1727,7 +1837,7 @@ HAILORTAPI hailo_status hailo_scan_pcie_devices( /** * Parse PCIe device BDF string into hailo device info structure. - * + * * @param[in] device_info_str BDF device info, format [\].\.\.\, same format as in lspci. * @param[out] device_info A pointer to a ::hailo_pcie_device_info_t that receives the parsed device info. * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns an ::hailo_status error. @@ -1738,9 +1848,9 @@ HAILORTAPI hailo_status hailo_parse_pcie_device_info(const char *device_info_str /** * Creates a PCIe device. - * - * @param[in] device_info Information about the device to open. If NULL is given and there is only - * one available PCIe device, use this device. + * + * @param[in] device_info Information about the device to open. If NULL is given, uses an arbitrary device found on + * the system. * @param[out] device A pointer to a ::hailo_device that receives the allocated PCIe device. * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns an ::hailo_status error. * @note To release a device, call the ::hailo_release_device function with the returned ::hailo_device. @@ -1749,7 +1859,7 @@ HAILORTAPI hailo_status hailo_create_pcie_device(hailo_pcie_device_info_t *devic /** * Returns information on all available ethernet devices in the system. - * + * * @param[in] interface_name The name of the network interface to scan. * @param[out] eth_device_infos A pointer to a buffer of ::hailo_eth_device_info_t that receives the * information. @@ -1766,7 +1876,7 @@ HAILORTAPI hailo_status hailo_scan_ethernet_devices(const char *interface_name, /** * Creates an ethernet device. - * + * * @param[in] device_info Information about the device to open. * @param[out] device A pointer to a ::hailo_device that receives the allocated ethernet device corresponding to * the given information. @@ -1777,7 +1887,7 @@ HAILORTAPI hailo_status hailo_create_ethernet_device(hailo_eth_device_info_t *de /** * Release an open device. - * + * * @param[in] device A ::hailo_device object to be released. * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error. */ @@ -1785,7 +1895,7 @@ HAILORTAPI hailo_status hailo_release_device(hailo_device device); /** * Returns the device type of the given device id string. - * + * * @param[in] device_id A :hailo_device_id_t device id to check. * @param[out] device_type A :hailo_device_type_t returned device type. * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error. @@ -2451,6 +2561,28 @@ HAILORTAPI hailo_status hailo_calculate_eth_input_rate_limits(hailo_hef hef, con HAILORTAPI hailo_status hailo_init_configure_params(hailo_hef hef, hailo_stream_interface_t stream_interface, hailo_configure_params_t *params); +/** + * Init configure params with default values for a given hef by virtual device. + * + * @param[in] hef A ::hailo_hef object to configure the @a device by. + * @param[in] vdevice A @a hailo_vdevice for which we init the params for. + * @param[out] params A @a hailo_configure_params_t to be filled. + * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error. + */ +HAILORTAPI hailo_status hailo_init_configure_params_by_vdevice(hailo_hef hef, hailo_vdevice vdevice, + hailo_configure_params_t *params); + +/** + * Init configure params with default values for a given hef by device. + * + * @param[in] hef A ::hailo_hef object to configure the @a device by. + * @param[in] device A @a hailo_device for which we init the params for. + * @param[out] params A @a hailo_configure_params_t to be filled. + * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error. + */ +HAILORTAPI hailo_status hailo_init_configure_params_by_device(hailo_hef hef, hailo_device device, + hailo_configure_params_t *params); + /** * Init configure params with default values for a given hef, where all input_streams_params are init to be MIPI type. * @@ -2690,6 +2822,24 @@ HAILORTAPI hailo_status hailo_set_scheduler_priority(hailo_configured_network_gr /** @} */ // end of group_network_group_functions +/** @defgroup group_buffer_functions Buffer functions + * @{ + */ +// ************************************* NOTE - START ************************************* // +// Dma buffer allocation isn't currently supported and is for internal use only // +// **************************************************************************************** // +// Free returned buffer via hailo_free_buffer +HAILORTAPI hailo_status hailo_allocate_buffer(size_t size, const hailo_buffer_parameters_t *allocation_params, void **buffer_out); +HAILORTAPI hailo_status hailo_free_buffer(void *buffer); +// Maps buffer to dma. Free mapping by calling hailo_dma_unmap_buffer_from_device and then free buffer as needed +// If buffer has already been mapped to device, then HAILO_DMA_MAPPING_ALREADY_EXISTS shall be returned +HAILORTAPI hailo_status hailo_dma_map_buffer_to_device(void *buffer, size_t size, hailo_device device, hailo_dma_buffer_direction_t direction); +HAILORTAPI hailo_status hailo_dma_unmap_buffer_from_device(void *buffer, hailo_device device, hailo_dma_buffer_direction_t direction); +// ************************************** NOTE - END ************************************** // +// Dma buffer allocation isn't currently supported and is for internal use only // +// **************************************************************************************** // +/** @} */ // end of group_buffer_functions + /** @defgroup group_stream_functions Stream functions * @{ */ @@ -2748,36 +2898,166 @@ HAILORTAPI hailo_status hailo_get_output_stream_info(hailo_output_stream stream, /** * Synchronously reads data from a stream. - * + * * @param[in] stream A ::hailo_output_stream object. * @param[in] buffer A pointer to a buffer that receives the data read from @a stream. * @param[in] size The amount of bytes to read, should be the frame size. - * + * * @note The output buffer format comes from the \e format field inside ::hailo_stream_info_t and the shape comes from * the \e hw_shape field inside ::hailo_stream_info_t. + * @note @a size is expected to be stream_info.hw_frame_size. * - * @note @a size is expected to be a product of stream_info.hw_frame_size (i.e. more than one frame may be read) - * * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error. */ HAILORTAPI hailo_status hailo_stream_read_raw_buffer(hailo_output_stream stream, void *buffer, size_t size); /** * Synchronously writes all data to a stream. - * + * * @param[in] stream A ::hailo_input_stream object. * @param[in] buffer A pointer to a buffer that contains the data to be written to @a stream. * @param[in] size The amount of bytes to write. - * + * * @note The input buffer format comes from the \e format field inside ::hailo_stream_info_t and the shape comes from * the \e hw_shape field inside ::hailo_stream_info_t. + * @note @a size is expected to be stream_info.hw_frame_size. * - * @note @a size is expected to be a product of stream_info.hw_frame_size (i.e. more than one frame may be read) - * * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error. */ HAILORTAPI hailo_status hailo_stream_write_raw_buffer(hailo_input_stream stream, const void *buffer, size_t size); +/** + * Waits until the stream is ready to launch a new ::hailo_stream_read_raw_buffer_async operation. Each stream has a + * limited-size queue for ongoing transfers. You can retrieve the queue size for the given stream by calling + * ::hailo_output_stream_get_async_max_queue_size. + * + * @param[in] stream A ::hailo_output_stream object. + * @param[in] transfer_size Must be the result of ::hailo_get_output_stream_frame_size for the given stream. + * @param[in] timeout_ms Amount of time to wait until the stream is ready in milliseconds. + * + * @return Upon success, returns ::HAILO_SUCCESS. Otherwise: + * - If @a timeout_ms has passed and the stream is not ready, returns ::HAILO_TIMEOUT. + * - In any other error case, returns ::hailo_status error. + * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error. + */ +HAILORTAPI hailo_status hailo_stream_wait_for_async_output_ready(hailo_output_stream stream, size_t transfer_size, + uint32_t timeout_ms); + +/** + * Waits until the stream is ready to launch a new ::hailo_stream_write_raw_buffer_async operation. Each stream has a + * limited-size queue for ongoing transfers. You can retrieve the queue size for the given stream by calling + * ::hailo_input_stream_get_async_max_queue_size. + * + * @param[in] stream A ::hailo_input_stream object. + * @param[in] transfer_size Must be the result of ::hailo_get_input_stream_frame_size for the given stream. + * @param[in] timeout_ms Amount of time to wait until the stream is ready in milliseconds. + * + * @return Upon success, returns ::HAILO_SUCCESS. Otherwise: + * - If @a timeout_ms has passed and the stream is not ready, returns ::HAILO_TIMEOUT. + * - In any other error case, returns ::hailo_status error. + */ +HAILORTAPI hailo_status hailo_stream_wait_for_async_input_ready(hailo_input_stream stream, size_t transfer_size, + uint32_t timeout_ms); + +/** + * Returns the maximum amount of frames that can be simultaneously read from the stream (by + * ::hailo_stream_read_raw_buffer_async calls) before any one of the read operations is complete, as signified by + * @a user_callback being called. + * + * @param[in] stream A ::hailo_output_stream object. + * @param[out] queue_size Returns value of the queue + * + * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error. + */ +HAILORTAPI hailo_status hailo_output_stream_get_async_max_queue_size(hailo_output_stream stream, size_t *queue_size); + +/** + * Returns the maximum amount of frames that can be simultaneously written to the stream (by + * ::hailo_stream_write_raw_buffer_async calls) before any one of the write operations is complete, as signified by + * @a user_callback being called. + * + * @param[in] stream A ::hailo_input_stream object. + * @param[out] queue_size Returns value of the queue + * + * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error. + */ +HAILORTAPI hailo_status hailo_input_stream_get_async_max_queue_size(hailo_input_stream stream, size_t *queue_size); + +/** + * Reads into @a buffer from the stream asynchronously, initiating a deferred operation that will be completed + * later. + * - If the function call succeeds (i.e., ::hailo_stream_read_raw_buffer_async returns ::HAILO_SUCCESS), the deferred + * operation has been initiated. Until @a user_callback is called, the user cannot change or delete @a buffer. + * - If the function call fails (i.e., ::hailo_stream_read_raw_buffer_async returns a status other than + * ::HAILO_SUCCESS), the deferred operation will not be initiated and @a user_callback will not be invoked. The user + * is free to change or delete @a buffer. + * - @a user_callback is triggered upon successful completion or failure of the deferred operation. + * The callback receives a ::hailo_stream_read_async_completion_info_t object containing a pointer to the transferred + * buffer (@a buffer_addr) and the transfer status (@a status). If the operation has completed successfully, the + * contents of @a buffer will have been updated by the read operation. + * + * @param[in] stream A ::hailo_output_stream object. + * @param[in] buffer The buffer to be read into. + * The buffer must be aligned to the system page size. + * @param[in] size The size of the given buffer, expected to be the result of + * ::hailo_get_output_stream_frame_size. + * @param[in] user_callback The callback that will be called when the transfer is complete or has failed. + * @param[in] opaque Optional pointer to user-defined context (may be NULL if not desired). + * + * @return Upon success, returns ::HAILO_SUCCESS. Otherwise: + * - If the stream queue is full, returns ::HAILO_QUEUE_IS_FULL. + * In this case, please wait until @a user_callback is called on previous + * reads, or call ::hailo_stream_wait_for_async_output_ready. The size of the queue can be + * determined by calling ::hailo_output_stream_get_async_max_queue_size. + * - In any other error case, returns a ::hailo_status error. + * + * @note @a user_callback should execute as quickly as possible. + * @note The output buffer format comes from the \e format field inside ::hailo_stream_info_t and the shape comes from + * the \e hw_shape field inside ::hailo_stream_info_t. + * @note The address provided must be aligned to the system's page size, and the rest of the page should not be in + * use by any other part of the program to ensure proper functioning of the DMA operation. Memory for the + * provided address can be allocated using `mmap` on Unix-like systems or `VirtualAlloc` on Windows. + */ +HAILORTAPI hailo_status hailo_stream_read_raw_buffer_async(hailo_output_stream stream, void *buffer, size_t size, + hailo_stream_read_async_callback_t user_callback, void *opaque); + +/** + * Writes the contents of @a buffer to the stream asynchronously, initiating a deferred operation that will be + * completed later. + * - If the function call succeeds (i.e., ::hailo_stream_write_raw_buffer_async returns ::HAILO_SUCCESS), the deferred + * operation has been initiated. Until @a user_callback is called, the user cannot change or delete @a buffer. + * - If the function call fails (i.e., ::hailo_stream_write_raw_buffer_async returns a status other than + * ::HAILO_SUCCESS), the deferred operation will not be initiated and @a user_callback will not be invoked. The user + * is free to change or delete @a buffer. + * - @a user_callback is triggered upon successful completion or failure of the deferred operation. The callback + * receives a ::hailo_stream_write_async_completion_info_t object containing a pointer to the transferred buffer + * (@a buffer_addr) and the transfer status (@a status). + * + * @param[in] stream A ::hailo_input_stream object. + * @param[in] buffer The buffer to be written. + * The buffer must be aligned to the system page size. + * @param[in] size The size of the given buffer, expected to be the result of + * ::hailo_get_input_stream_frame_size. + * @param[in] user_callback The callback that will be called when the transfer is complete + * or has failed. + * @param[in] opaque Optional pointer to user-defined context (may be NULL if not desired). + * + * @return Upon success, returns ::HAILO_SUCCESS. Otherwise: + * - If the stream queue is full, returns ::HAILO_QUEUE_IS_FULL. In this case please wait + * until @a user_callback is called on previous writes, or call ::hailo_stream_wait_for_async_input_ready. + * The size of the queue can be determined by calling ::hailo_input_stream_get_async_max_queue_size. + * - In any other error case, returns a ::hailo_status error. + * + * @note @a user_callback should run as quickly as possible. + * @note The input buffer format comes from the \e format field inside ::hailo_stream_info_t and the shape comes from + * the \e hw_shape field inside ::hailo_stream_info_t. + * @note The address provided must be aligned to the system's page size, and the rest of the page should not be in + * use by any other part of the program to ensure proper functioning of the DMA operation. Memory for the + * provided address can be allocated using `mmap` on Unix-like systems or `VirtualAlloc` on Windows. + */ +HAILORTAPI hailo_status hailo_stream_write_raw_buffer_async(hailo_input_stream stream, const void *buffer, size_t size, + hailo_stream_write_async_callback_t user_callback, void *opaque); + /** * Gets the size of a stream's frame on the host side in bytes * (the size could be affected by the format type - for example using UINT16, or by the data not being quantized yet) @@ -2937,7 +3217,7 @@ HAILORTAPI hailo_status hailo_create_demuxer_by_stream(hailo_output_stream strea HAILORTAPI hailo_status hailo_release_output_demuxer(hailo_output_demuxer demuxer); /** - * Demultiplexing an output frame pointed to by @a src directly to the buffer pointed to by @a dst. + * Demultiplexing an output frame pointed to by @a src directly to the buffers pointed to by @a raw_buffers. * * @param[in] demuxer A ::hailo_output_demuxer object used for the demuxing. * @param[in] src A pointer to a buffer to be demultiplexed. @@ -2947,11 +3227,29 @@ HAILORTAPI hailo_status hailo_release_output_demuxer(hailo_output_demuxer demuxe * demultiplexed data read from the @a stream. * @param[in] raw_buffers_count The number of ::hailo_stream_raw_buffer_t elements in the array pointed to by * @a raw_buffers. + * @note The order of @a raw_buffers should be the same as returned from the function 'hailo_get_mux_infos_by_output_demuxer()'. * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error. */ HAILORTAPI hailo_status hailo_demux_raw_frame_by_output_demuxer(hailo_output_demuxer demuxer, const void *src, size_t src_size, hailo_stream_raw_buffer_t *raw_buffers, size_t raw_buffers_count); +/** + * Demultiplexing an output frame pointed to by @a src directly to the buffers pointed to by @a raw_buffers_by_name. + * + * @param[in] demuxer A ::hailo_output_demuxer object used for the demuxing. + * @param[in] src A pointer to a buffer to be demultiplexed. + * @param[in] src_size The number of bytes to demultiplexed. This number must be equal to the + * hw_frame_size, and less than or equal to the size of @a src buffer. + * @param[in,out] raw_buffers_by_name A pointer to an array of ::hailo_stream_raw_buffer_by_name_t that receives the + * demultiplexed data read from the @a stream. hailo_stream_raw_buffer_by_name_t::name should + * be filled with the demuxes names. + * @param[in] raw_buffers_count The number of ::hailo_stream_raw_buffer_by_name_t elements in the array pointed to by + * @a raw_buffers_by_name. + * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error. + */ +HAILORTAPI hailo_status hailo_demux_by_name_raw_frame_by_output_demuxer(hailo_output_demuxer demuxer, const void *src, + size_t src_size, hailo_stream_raw_buffer_by_name_t *raw_buffers_by_name, size_t raw_buffers_count); + /** * Gets all multiplexed stream infos. * diff --git a/hailort/libhailort/include/hailo/hailort.hpp b/hailort/libhailort/include/hailo/hailort.hpp index d429a7d..7c503a1 100644 --- a/hailort/libhailort/include/hailo/hailort.hpp +++ b/hailort/libhailort/include/hailo/hailort.hpp @@ -28,7 +28,6 @@ #include "hailo/runtime_statistics.hpp" #include "hailo/network_rate_calculator.hpp" #include "hailo/quantization.hpp" -#include "hailo/dma_mapped_buffer.hpp" #include "hailo/hailort_defaults.hpp" #endif /* _HAILORT_HPP_ */ diff --git a/hailort/libhailort/include/hailo/hailort_common.hpp b/hailort/libhailort/include/hailo/hailort_common.hpp index d9ab7cf..996c6ab 100644 --- a/hailort/libhailort/include/hailo/hailort_common.hpp +++ b/hailort/libhailort/include/hailo/hailort_common.hpp @@ -19,6 +19,7 @@ #include +/** hailort namespace */ namespace hailort { @@ -35,8 +36,6 @@ public: static const uint32_t BBOX_PARAMS = sizeof(hailo_bbox_t) / sizeof(uint16_t); static const uint32_t MAX_DEFUSED_LAYER_COUNT = 9; static const size_t HW_DATA_ALIGNMENT = 8; - static const uint64_t NMS_DELIMITER = 0xFFFFFFFFFFFFFFFF; - static const uint64_t NMS_DUMMY_DELIMITER = 0xFFFFFFFFFFFFFFFE; static const uint32_t MUX_INFO_COUNT = 32; static const uint32_t MAX_MUX_PREDECESSORS = 4; static const uint16_t ETH_INPUT_BASE_PORT = 32401; @@ -279,10 +278,11 @@ public: static constexpr uint32_t get_nms_hw_frame_size(const hailo_nms_info_t &nms_info) { const uint32_t size_per_class = static_cast(sizeof(nms_bbox_counter_t)) + - nms_info.bbox_size * nms_info.max_bboxes_per_class; + nms_info.bbox_size * std::max(nms_info.burst_size, nms_info.max_bboxes_per_class); const uint32_t size_per_chunk = nms_info.number_of_classes * size_per_class; - // 1 delimiter for an entire frame (since we are reading delimiters directly into the buffer and replacing them) - return nms_info.bbox_size + (nms_info.chunks_per_frame * size_per_chunk); + // Extra Burst size for frame (since may be reading bursts directly into the buffer and replacing them) + const uint32_t size_for_extra_burst = nms_info.bbox_size * nms_info.burst_size; + return (nms_info.chunks_per_frame * size_per_chunk) + size_for_extra_burst; } /** @@ -386,6 +386,22 @@ inline constexpr hailo_format_flags_t& operator|=(hailo_format_flags_t &a, hailo return a; } +inline constexpr hailo_format_flags_t operator&(hailo_format_flags_t a, hailo_format_flags_t b) +{ + return static_cast(static_cast(a) & static_cast(b)); +} + +inline constexpr hailo_format_flags_t& operator&=(hailo_format_flags_t &a, hailo_format_flags_t b) +{ + a = a & b; + return a; +} + +inline constexpr hailo_format_flags_t operator~(hailo_format_flags_t a) +{ + return static_cast(~(static_cast(a))); +} + inline constexpr hailo_vstream_stats_flags_t operator|(hailo_vstream_stats_flags_t a, hailo_vstream_stats_flags_t b) { return static_cast(static_cast(a) | static_cast(b)); diff --git a/hailort/libhailort/include/hailo/hailort_defaults.hpp b/hailort/libhailort/include/hailo/hailort_defaults.hpp index 9577eec..c0edbac 100644 --- a/hailort/libhailort/include/hailo/hailort_defaults.hpp +++ b/hailort/libhailort/include/hailo/hailort_defaults.hpp @@ -14,7 +14,7 @@ #include "hailo/expected.hpp" #include "hailo/network_group.hpp" - +/** hailort namespace */ namespace hailort { diff --git a/hailort/libhailort/include/hailo/hef.hpp b/hailort/libhailort/include/hailo/hef.hpp index ad16c39..3a06ada 100644 --- a/hailort/libhailort/include/hailo/hef.hpp +++ b/hailort/libhailort/include/hailo/hef.hpp @@ -18,7 +18,7 @@ #include #include - +/** hailort namespace */ namespace hailort { @@ -452,7 +452,7 @@ public: */ std::string hash() const; - Expected get_hef_description(bool stream_infos, bool vstream_infos); + Expected get_description(bool stream_infos, bool vstream_infos); ~Hef(); Hef(Hef &&); diff --git a/hailort/libhailort/include/hailo/inference_pipeline.hpp b/hailort/libhailort/include/hailo/inference_pipeline.hpp index c4a3254..201d644 100644 --- a/hailort/libhailort/include/hailo/inference_pipeline.hpp +++ b/hailort/libhailort/include/hailo/inference_pipeline.hpp @@ -13,6 +13,7 @@ #include "hailo/vstream.hpp" +/** hailort namespace */ namespace hailort { diff --git a/hailort/libhailort/include/hailo/network_group.hpp b/hailort/libhailort/include/hailo/network_group.hpp index ae9ae8e..6b8d029 100644 --- a/hailort/libhailort/include/hailo/network_group.hpp +++ b/hailort/libhailort/include/hailo/network_group.hpp @@ -18,7 +18,7 @@ #include #include - +/** hailort namespace */ namespace hailort { @@ -44,6 +44,15 @@ using OutputStreamWithParamsVector = std::vector get_intermediate_buffer(const IntermediateBufferKey &key) = 0; - + // TODO HRT-10799: remove when enable batch switch flow for hailo15 virtual hailo_status set_keep_nn_config_during_reset(const bool keep_nn_config_during_reset) = 0; /** @@ -385,10 +394,18 @@ public: virtual Expected> create_input_vstreams(const std::map &inputs_params) = 0; virtual Expected> create_output_vstreams(const std::map &outputs_params) = 0; + virtual Expected run_hw_infer_estimator() = 0; + virtual hailo_status before_fork() { return HAILO_SUCCESS; } virtual hailo_status after_fork_in_parent() { return HAILO_SUCCESS; } virtual hailo_status after_fork_in_child() { return HAILO_SUCCESS; } + virtual Expected> get_sorted_output_names() = 0; + virtual Expected> get_stream_names_from_vstream_name(const std::string &vstream_name) = 0; + virtual Expected> get_vstream_names_from_stream_name(const std::string &stream_name) = 0; + + static Expected> duplicate_network_group_client(uint32_t handle, const std::string &network_group_name); + virtual Expected get_client_handle() const; protected: ConfiguredNetworkGroup() = default; diff --git a/hailort/libhailort/include/hailo/network_rate_calculator.hpp b/hailort/libhailort/include/hailo/network_rate_calculator.hpp index 047cb1c..6c964b2 100644 --- a/hailort/libhailort/include/hailo/network_rate_calculator.hpp +++ b/hailort/libhailort/include/hailo/network_rate_calculator.hpp @@ -21,6 +21,7 @@ #include +/** hailort namespace */ namespace hailort { @@ -77,6 +78,11 @@ public: Expected> get_udp_ports_rates_dict( std::vector> &udp_input_streams, uint32_t fps, uint32_t max_supported_bandwidth = HAILO_DEFAULT_MAX_ETHERNET_BANDWIDTH_BYTES_PER_SEC); + + // Undocumented, exported here for pyhailort usage + static hailo_status set_rate_limit(const std::string &ip, uint16_t port, uint32_t rate_bytes_per_sec); + static hailo_status reset_rate_limit(const std::string &ip, uint16_t port); + static Expected get_interface_name(const std::string &ip); }; } /* namespace hailort */ diff --git a/hailort/libhailort/include/hailo/quantization.hpp b/hailort/libhailort/include/hailo/quantization.hpp index d48b008..8d80c1c 100644 --- a/hailort/libhailort/include/hailo/quantization.hpp +++ b/hailort/libhailort/include/hailo/quantization.hpp @@ -16,10 +16,26 @@ #include #include +#ifdef _MSC_VER +#include +#endif +/** hailort namespace */ namespace hailort { +inline float bankers_round(float x) +{ +#ifdef _MSC_VER + // These instructions are intrinsics that the Microsoft C/C++ compiler supports when x86 is targeted + __m128 xmm = _mm_set_ss(x); + xmm = _mm_round_ss(xmm, xmm, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + return _mm_cvtss_f32(xmm); +#else + return rintf(x); +#endif +} + class RoundingToNearestGuard final { public: @@ -70,7 +86,6 @@ public: dst_ptr[i] = (T)(src_ptr[i]); } } else { - auto rounding_tonearest_guard = RoundingToNearestGuard(); for (uint32_t i = 0; i < buffer_elements_count; i++) { dst_ptr[i] = dequantize_output(src_ptr[i], quant_info); } @@ -79,7 +94,7 @@ public: /** * De-quantize in place the output buffer pointed by @a dst_ptr from data type @a Q to data type @a T. - * + * * @param[inout] dst_ptr A pointer to the buffer to be de-quantized. * @param[in] buffer_elements_count The number of elements in @a dst_ptr array. * @param[in] quant_info Quantization info. @@ -87,14 +102,28 @@ public: template static void dequantize_output_buffer_in_place(T *dst_ptr, uint32_t buffer_elements_count, hailo_quant_info_t quant_info) { - if (is_identity_qp(quant_info)) { + dequantize_output_buffer_in_place(dst_ptr, 0, buffer_elements_count, quant_info.qp_zp, quant_info.qp_scale); + } + + /** + * De-quantize in place the output buffer pointed by @a dst_ptr starting from @a offset from data type @a Q to data type @a T. + * + * @param[inout] dst_ptr A pointer to the buffer to be de-quantized. + * @param[in] offset The offset in @a dst_ptr array to start from. + * @param[in] buffer_elements_count The number of elements in @a dst_ptr array. + * @param[in] qp_zp Quantization zero point. + * @param[in] qp_scale Quantization scale. + */ + template + static void dequantize_output_buffer_in_place(T *dst_ptr, uint32_t offset, uint32_t buffer_elements_count, float32_t qp_zp, float32_t qp_scale) + { + if (is_identity_qp(qp_zp, qp_scale)) { for (int32_t i = (int32_t)buffer_elements_count - 1; i >= 0; i--) { - dst_ptr[i] = (T)(*((Q*)dst_ptr + i)); + dst_ptr[offset + i] = (T)(*((Q*)dst_ptr + offset + i)); } } else { - auto rounding_tonearest_guard = RoundingToNearestGuard(); for (int32_t i = (int32_t)buffer_elements_count - 1; i >= 0; i--) { - dst_ptr[i] = dequantize_output(*((Q*)dst_ptr + i), quant_info); + dst_ptr[offset + i] = dequantize_output(*((Q*)dst_ptr + offset + i), qp_zp, qp_scale); } } } @@ -113,7 +142,7 @@ public: auto rounding_tonearest_guard = RoundingToNearestGuard(); if (is_identity_qp(quant_info)) { for (uint32_t i = 0; i < buffer_elements_count; i++) { - dst_ptr[i] = (Q)rintf(src_ptr[i]); + dst_ptr[i] = (Q)bankers_round(src_ptr[i]); } } else { for (uint32_t i = 0; i < buffer_elements_count; i++) { @@ -155,7 +184,16 @@ public: */ static inline bool is_identity_qp(const hailo_quant_info_t &quant_info) { - return ((1 == quant_info.qp_scale) && (0 == quant_info.qp_zp)); + return is_identity_qp(quant_info.qp_zp, quant_info.qp_scale); + } + + /** + * Indicates whether the @a qp_zp and @a qp_scale is the identity scale. + * If true there is no need to fix the data's scale. + */ + static inline bool is_identity_qp(float32_t qp_zp, float32_t qp_scale) + { + return ((1 == qp_scale) && (0 == qp_zp)); } /** @@ -170,7 +208,23 @@ public: template static inline T dequantize_output(Q number, hailo_quant_info_t quant_info) { - return (T)((number - quant_info.qp_zp) * quant_info.qp_scale); + return dequantize_output(number, quant_info.qp_zp, quant_info.qp_scale); + } + + /** + * De-quantize @a number from data type @a Q to data type @a T and fix it's scale according to @a qp_zp and @a qp_scale. + * + * @param[in] number The value to be de-quantized. + * @param[in] qp_zp Quantization zero point. + * @param[in] qp_scale Quantization scale. + * + * @return Returns the dequantized value of @a number. + * + */ + template + static inline T dequantize_output(Q number, float32_t qp_zp, float32_t qp_scale) + { + return (T)((number - qp_zp) * qp_scale); } static inline float32_t clip(float32_t n, float32_t limval_min, float32_t limval_max) @@ -191,7 +245,7 @@ private: static inline Q quantize_input(T number, hailo_quant_info_t quant_info) { float32_t clipped_number = clip((float32_t)number, quant_info.limvals_min, quant_info.limvals_max); - return (Q)rintf((clipped_number / quant_info.qp_scale) + quant_info.qp_zp); + return (Q)bankers_round((clipped_number / quant_info.qp_scale) + quant_info.qp_zp); } }; diff --git a/hailort/libhailort/include/hailo/runtime_statistics.hpp b/hailort/libhailort/include/hailo/runtime_statistics.hpp index 60a5b29..3de7422 100644 --- a/hailort/libhailort/include/hailo/runtime_statistics.hpp +++ b/hailort/libhailort/include/hailo/runtime_statistics.hpp @@ -16,7 +16,7 @@ #include #include - +/** hailort namespace */ namespace hailort { diff --git a/hailort/libhailort/include/hailo/stream.hpp b/hailort/libhailort/include/hailo/stream.hpp index 2c5ee19..d3252a0 100644 --- a/hailort/libhailort/include/hailo/stream.hpp +++ b/hailort/libhailort/include/hailo/stream.hpp @@ -20,16 +20,12 @@ #include +/** hailort namespace */ namespace hailort { // Forward declaration struct LayerInfo; -class DmaMappedBuffer; - -using TransferDoneCallback = std::function buffer, - const hailo_async_transfer_completion_info_t &status, - void *opaque)>; /*! Input (host to device) stream representation */ @@ -41,6 +37,24 @@ public: InputStream(const InputStream&) = delete; InputStream& operator=(const InputStream&) = delete; + /** Context passed to the \ref TransferDoneCallback after the async operation is done or has failed. */ + struct CompletionInfo + { + /** + * Status of the async transfer. + * - ::HAILO_SUCCESS - When transfer is complete successfully. + * - ::HAILO_STREAM_ABORTED_BY_USER - The transfer was canceled (can happen after network deactivation). + * - Any other ::hailo_status on unexpected errors. + */ + hailo_status status; + + const void *buffer_addr; /* Points to the transferred buffer. */ + size_t buffer_size; /* Size of the transferred buffer. */ + }; + + /** Async transfer complete callback prototype. */ + using TransferDoneCallback = std::function; + /** * Set new timeout value to the input stream * @@ -61,21 +75,21 @@ public: /** * Aborting the stream. - * + * * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error. */ virtual hailo_status abort() = 0; /** * Clearing the aborted state of the stream. - * + * * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error. */ virtual hailo_status clear_abort() = 0; /** * Writes all pending data to the underlying stream. - * + * * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error. */ virtual hailo_status flush(); @@ -92,21 +106,117 @@ public: virtual bool is_scheduled() = 0; /** - * Writes the entire buffer to the stream without transformations + * Writes the entire buffer to the stream without transformations. * * @param[in] buffer The buffer to be written. * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns an ::hailo_status error. + * * @note @a buffer is expected to be in the format dictated by this.stream_info.format - * @note @a size is expected to be a product of this.stream_info.hw_frame_size (i.e. more than one frame may be written) + * @note @a buffer.size() is expected to be get_frame_size(). */ virtual hailo_status write(const MemoryView &buffer); - // ******************************************** NOTE ******************************************** // - // Async Stream API and DmaMappedBuffer are currently not supported and are for internal use only // - // ********************************************************************************************** // - virtual hailo_status wait_for_ready(size_t transfer_size, std::chrono::milliseconds timeout); // Internal use only - virtual hailo_status write_async(std::shared_ptr buffer, const TransferDoneCallback &user_callback, - void *opaque = nullptr); // Internal use only + /** + * Writes the entire buffer to the stream without transformations. + * + * @param[in] buffer The buffer to be written. + * @param[in] size The size of the buffer given. + * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns an ::hailo_status error. + * + * @note @a buffer is expected to be in the format dictated by this.stream_info.format + * @note @a size is expected to be get_frame_size(). + */ + virtual hailo_status write(const void *buffer, size_t size); + + /** + * Waits until the stream is ready to launch a new write_async() operation. Each stream contains some limited sized + * queue for ongoing transfers. Calling get_async_max_queue_size() will return the queue size for current stream. + * + * @param[in] transfer_size Must be get_frame_size(). + * @param[in] timeout Amount of time to wait until the stream is ready in milliseconds. + * + * @return Upon success, returns ::HAILO_SUCCESS. Otherwise: + * - If @a timeout has passed and the stream is not ready, returns ::HAILO_TIMEOUT. + * - In any other error case, returns ::hailo_status error. + */ + virtual hailo_status wait_for_async_ready(size_t transfer_size, std::chrono::milliseconds timeout); + + /** + * Returns the maximum amount of frames that can be simultaneously written to the stream (by write_async() calls) + * before any one of the write operations is complete, as signified by @a user_callback being called. + * + * @return Upon success, returns Expected of a the queue size. + * Otherwise, returns Unexpected of ::hailo_status error. + */ + virtual Expected get_async_max_queue_size() const; + + /** + * Writes the contents of @a buffer to the stream asynchronously, initiating a deferred operation that will be + * completed later. + * - If the function call succeeds (i.e., write_async() returns ::HAILO_SUCCESS), the deferred operation has been + * initiated. Until @a user_callback is called, the user cannot change or delete @a buffer. + * - If the function call fails (i.e., write_async() returns a status other than ::HAILO_SUCCESS), the deferred + * operation will not be initiated and @a user_callback will not be invoked. The user is free to change or delete + * @a buffer. + * - @a user_callback is triggered upon successful completion or failure of the deferred operation. The callback + * receives a \ref CompletionInfo object containing a pointer to the transferred buffer (@a buffer_addr) and the + * transfer status (@a status). + * + * @param[in] buffer The buffer to be written. + * The buffer must be aligned to the system page size. + * @param[in] user_callback The callback that will be called when the transfer is complete + * or has failed. + * + * @return Upon success, returns ::HAILO_SUCCESS. Otherwise: + * - If the stream queue is full, returns ::HAILO_QUEUE_IS_FULL. In this case please wait + * until @a user_callback is called on previous writes, or call wait_for_async_ready(). + * The size of the queue can be determined by calling get_async_max_queue_size(). + * - In any other error case, returns a ::hailo_status error. + * + * @note @a user_callback should run as quickly as possible. + * @note The buffer's format comes from the @a format field inside get_info() and the shape comes from + * the @a hw_shape field inside get_info(). + * @note The address provided must be aligned to the system's page size, and the rest of the page should not be in + * use by any other part of the program to ensure proper functioning of the DMA operation. Memory for the + * provided address can be allocated using `mmap` on Unix-like systems or `VirtualAlloc` on Windows. + */ + virtual hailo_status write_async(const MemoryView &buffer, const TransferDoneCallback &user_callback) = 0; + + /** + * Writes the contents of @a buffer to the stream asynchronously, initiating a deferred operation that will be + * completed later. + * - If the function call succeeds (i.e., write_async() returns ::HAILO_SUCCESS), the deferred operation has been + * initiated. Until @a user_callback is called, the user cannot change or delete @a buffer. + * - If the function call fails (i.e., write_async() returns a status other than ::HAILO_SUCCESS), the deferred + * operation will not be initiated and @a user_callback will not be invoked. The user is free to change or delete + * @a buffer. + * - @a user_callback is triggered upon successful completion or failure of the deferred operation. The callback + * receives a \ref CompletionInfo object containing a pointer to the transferred buffer (@a buffer_addr) and the + * transfer status (@a status). + * + * @param[in] buffer The buffer to be written. + * The buffer must be aligned to the system page size. + * @param[in] size The size of the given buffer, expected to be get_frame_size(). + * @param[in] user_callback The callback that will be called when the transfer is complete + * or has failed. + * + * @return Upon success, returns ::HAILO_SUCCESS. Otherwise: + * - If the stream queue is full, returns ::HAILO_QUEUE_IS_FULL. In this case please wait + * until @a user_callback is called on previous writes, or call wait_for_async_ready(). + * The size of the queue can be determined by calling get_async_max_queue_size(). + * - In any other error case, returns a ::hailo_status error. + * + * @note @a user_callback should run as quickly as possible. + * @note The buffer's format comes from the @a format field inside get_info() and the shape comes from + * the @a hw_shape field inside get_info(). + * @note The address provided must be aligned to the system's page size, and the rest of the page should not be in + * use by any other part of the program to ensure proper functioning of the DMA operation. Memory for the + * provided address can be allocated using `mmap` on Unix-like systems or `VirtualAlloc` on Windows. + */ + virtual hailo_status write_async(const void *buffer, size_t size, const TransferDoneCallback &user_callback) = 0; + + // The usage of BufferPtr for async API isn't currently supported and is for internal use only. + virtual hailo_status write_async(BufferPtr buffer, const TransferDoneCallback &user_callback) = 0; /** * @returns A ::hailo_stream_info_t object containing the stream's info. @@ -139,18 +249,17 @@ public: // get_network_group_activated_event is same as this function virtual EventPtr &get_core_op_activated_event() = 0; + protected: InputStream() = default; InputStream(InputStream &&) = delete; - // Note: Implement sync_write_all_raw_buffer_no_transform_impl for the actual stream interaction in sub classes - virtual hailo_status sync_write_all_raw_buffer_no_transform_impl(void *buffer, size_t offset, size_t size) = 0; + // Note: Implement write_impl for the actual stream interaction in sub classes + virtual hailo_status write_impl(const MemoryView &buffer) = 0; virtual hailo_status activate_stream(uint16_t dynamic_batch_size, bool resume_pending_stream_transfers) = 0; virtual hailo_status deactivate_stream() = 0; - virtual Expected sync_write_raw_buffer(const MemoryView &buffer) = 0; - hailo_stream_info_t m_stream_info; uint8_t m_dataflow_manager_id; @@ -169,6 +278,24 @@ public: OutputStream(const OutputStream&) = delete; OutputStream& operator=(const OutputStream&) = delete; + /** Context passed to the \ref TransferDoneCallback after the async operation is done or has failed. */ + struct CompletionInfo + { + /** + * Status of the async transfer. + * - ::HAILO_SUCCESS - When transfer is complete successfully. + * - ::HAILO_STREAM_ABORTED_BY_USER - The transfer was canceled (can happen after network deactivation). + * - Any other ::hailo_status on unexpected errors. + */ + hailo_status status; + + void *buffer_addr; /* Points to the transferred buffer. */ + size_t buffer_size; /* Size of the transferred buffer. */ + }; + + /** Async transfer complete callback prototype. */ + using TransferDoneCallback = std::function; + /** * Set new timeout value to the output stream * @@ -181,7 +308,7 @@ public: * @return returns the output stream's timeout in milliseconds. */ virtual std::chrono::milliseconds get_timeout() const = 0; - + /** * @return returns the output stream's interface. */ @@ -189,14 +316,14 @@ public: /** * Aborting the stream. - * + * * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error. */ virtual hailo_status abort() = 0; /** * Clearing the abort flag of the stream. - * + * * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error. */ virtual hailo_status clear_abort() = 0; @@ -249,19 +376,115 @@ public: /** * Reads the entire buffer from the stream without transformations * - * @param[out] buffer A pointer to a buffer that receives the data read from the stream. + * @param[in] buffer A buffer that receives the data read from the stream. * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns an ::hailo_status error. - * @note Upon return, @a buffer is expected to be in the format dictated by this.stream_info.format - * @note @a size is expected to be a product of this.stream_info.hw_frame_size (i.e. more than one frame may be read) + * @note Upon return, @a buffer is expected to be in the format dictated by this.get_info().format + * @note @a size is expected to be get_frame_size(). */ virtual hailo_status read(MemoryView buffer); - // ******************************************** NOTE ******************************************** // - // Async Stream API and DmaMappedBuffer are currently not supported and are for internal use only // - // ********************************************************************************************** // - virtual hailo_status wait_for_ready(size_t transfer_size, std::chrono::milliseconds timeout); // Internal use only - virtual hailo_status read_async(std::shared_ptr buffer, const TransferDoneCallback &user_callback, - void *opaque = nullptr); // Internal use only + /** + * Reads the entire buffer from the stream without transformations + * + * @param[in] buffer A pointer to a buffer that receives the data read from the stream. + * @param[in] size The size of the given buffer. + * + * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns an ::hailo_status error. + * + * @note Upon return, @a buffer is expected to be in the format dictated by this.get_info().format + * @note @a size is expected to be get_frame_size(). + */ + virtual hailo_status read(void *buffer, size_t size); + + /** + * Waits until the stream is ready to launch a new read_async() operation. Each stream contains some limited sized + * queue for ongoing transfers. Calling get_async_max_queue_size() will return the queue size for current stream. + * + * @param[in] transfer_size Must be get_frame_size(). + * @param[in] timeout Amount of time to wait until the stream is ready in milliseconds. + * + * @return Upon success, returns ::HAILO_SUCCESS. Otherwise: + * - If @a timeout has passed and the stream is not ready, returns ::HAILO_TIMEOUT. + * - In any other error case, returns ::hailo_status error. + */ + virtual hailo_status wait_for_async_ready(size_t transfer_size, std::chrono::milliseconds timeout); + + /** + * Returns the maximum amount of frames that can be simultaneously read from the stream (by read_async() calls) + * before any one of the read operations is complete, as signified by @a user_callback being called. + * + * @return Upon success, returns Expected of a the queue size. + * Otherwise, returns Unexpected of ::hailo_status error. + */ + virtual Expected get_async_max_queue_size() const; + + /** + * Reads into @a buffer from the stream asynchronously, initiating a deferred operation that will be completed + * later. + * - If the function call succeeds (i.e., read_async() returns ::HAILO_SUCCESS), the deferred operation has been + * initiated. Until @a user_callback is called, the user cannot change or delete @a buffer. + * - If the function call fails (i.e., read_async() returns a status other than ::HAILO_SUCCESS), the deferred + * operation will not be initiated and @a user_callback will not be invoked. The user is free to change or + * delete @a buffer. + * - @a user_callback is triggered upon successful completion or failure of the deferred operation. + * The callback receives a \ref CompletionInfo object containing a pointer to the transferred buffer + * (@a buffer_addr) and the transfer status (@a status). If the operation has completed successfully, the contents + * of @a buffer will have been updated by the read operation. + * + * @param[in] buffer The buffer to be read into. + * The buffer must be aligned to the system page size. + * @param[in] user_callback The callback that will be called when the transfer is complete or has failed. + * + * @return Upon success, returns ::HAILO_SUCCESS. Otherwise: + * - If the stream queue is full, returns ::HAILO_QUEUE_IS_FULL. + * In this case, please wait until @a user_callback is called on previous + * reads, or call wait_for_async_ready(). The size of the queue can be + * determined by calling get_async_max_queue_size(). + * - In any other error case, returns a ::hailo_status error. + * @note @a user_callback should execute as quickly as possible. + * @note The buffer's format is determined by the @a format field inside get_info(), + * and the shape is determined by the @a hw_shape field inside get_info(). + * @note The address provided must be aligned to the system's page size, and the rest of the page should not be in + * use by any other part of the program to ensure proper functioning of the DMA operation. Memory for the + * provided address can be allocated using `mmap` on Unix-like systems or `VirtualAlloc` on Windows. + */ + virtual hailo_status read_async(MemoryView buffer, const TransferDoneCallback &user_callback) = 0; + + /** + * Reads into @a buffer from the stream asynchronously, initiating a deferred operation that will be completed + * later. + * - If the function call succeeds (i.e., read_async() returns ::HAILO_SUCCESS), the deferred operation has been + * initiated. Until @a user_callback is called, the user cannot change or delete @a buffer. + * - If the function call fails (i.e., read_async() returns a status other than ::HAILO_SUCCESS), the deferred + * operation will not be initiated and @a user_callback will not be invoked. The user is free to change or + * delete @a buffer. + * - @a user_callback is triggered upon successful completion or failure of the deferred operation. + * The callback receives a \ref CompletionInfo object containing a pointer to the transferred buffer + * (@a buffer_addr) and the transfer status (@a status). If the operation has completed successfully, the contents + * of @a buffer will have been updated by the read operation. + * + * @param[in] buffer The buffer to be read into. + * The buffer must be aligned to the system page size. + * @param[in] size The size of the given buffer, expected to be get_frame_size(). + * @param[in] user_callback The callback that will be called when the transfer is complete or has failed. + * + * @return Upon success, returns ::HAILO_SUCCESS. Otherwise: + * - If the stream queue is full, returns ::HAILO_QUEUE_IS_FULL. + * In this case, please wait until @a user_callback is called on previous + * reads, or call wait_for_async_ready(). The size of the queue can be + * determined by calling get_async_max_queue_size(). + * - In any other error case, returns a ::hailo_status error. + * @note @a user_callback should execute as quickly as possible. + * @note The buffer's format is determined by the @a format field inside get_info(), + * and the shape is determined by the @a hw_shape field inside get_info() + * @note The address provided must be aligned to the system's page size, and the rest of the page should not be in + * use by any other part of the program to ensure proper functioning of the DMA operation. Memory for the + * provided address can be allocated using `mmap` on Unix-like systems or `VirtualAlloc` on Windows. + */ + virtual hailo_status read_async(void *buffer, size_t size, const TransferDoneCallback &user_callback) = 0; + + // The usage of BufferPtr for async API isn't currently supported and is for internal use only. + virtual hailo_status read_async(BufferPtr buffer, const TransferDoneCallback &user_callback) = 0; // get_network_group_activated_event is same as this function virtual EventPtr &get_core_op_activated_event() = 0; @@ -271,17 +494,17 @@ protected: virtual hailo_status activate_stream(uint16_t dynamic_batch_size, bool resume_pending_stream_transfers) = 0; virtual hailo_status deactivate_stream() = 0; - virtual hailo_status read_all(MemoryView &buffer) = 0; - - virtual Expected sync_read_raw_buffer(MemoryView &buffer) = 0; + virtual hailo_status read_impl(MemoryView &buffer) = 0; hailo_stream_info_t m_stream_info; uint8_t m_dataflow_manager_id; std::atomic m_invalid_frames_count; +protected: + hailo_status read_nms(void *buffer, size_t offset, size_t size); + private: virtual const LayerInfo& get_layer_info() = 0; - hailo_status read_nms(void *buffer, size_t offset, size_t size); void increase_invalid_frames_count(uint32_t value); friend class HefConfigurator; @@ -289,6 +512,7 @@ private: friend class HwReadElement; friend class OutputDemuxer; friend class CoreOp; + friend class NMSStreamReader; }; } /* namespace hailort */ diff --git a/hailort/libhailort/include/hailo/transform.hpp b/hailort/libhailort/include/hailo/transform.hpp index 883dbca..db1d7cf 100644 --- a/hailort/libhailort/include/hailo/transform.hpp +++ b/hailort/libhailort/include/hailo/transform.hpp @@ -20,7 +20,7 @@ #include #include - +/** hailort namespace */ namespace hailort { @@ -284,6 +284,7 @@ public: * @param[in] src A buffer to be demultiplexed. * @param[out] raw_buffers A vector of buffers that receives the demultiplexed data read from the stream. * The order of @a raw_buffers vector will remain as is. + * @note The order of @a raw_buffers should be the same as returned from the function 'get_edges_stream_info()'. * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error. */ virtual hailo_status transform_demux(const MemoryView src, std::vector &raw_buffers) = 0; diff --git a/hailort/libhailort/include/hailo/vdevice.hpp b/hailort/libhailort/include/hailo/vdevice.hpp index 9dd592a..0aa7bd3 100644 --- a/hailort/libhailort/include/hailo/vdevice.hpp +++ b/hailort/libhailort/include/hailo/vdevice.hpp @@ -17,6 +17,7 @@ #include "hailo/device.hpp" +/** hailort namespace */ namespace hailort { diff --git a/hailort/libhailort/include/hailo/vstream.hpp b/hailort/libhailort/include/hailo/vstream.hpp index faf2bf2..0cfe563 100644 --- a/hailort/libhailort/include/hailo/vstream.hpp +++ b/hailort/libhailort/include/hailo/vstream.hpp @@ -13,6 +13,7 @@ #include "hailo/network_group.hpp" #include "hailo/runtime_statistics.hpp" +/** hailort namespace */ namespace hailort { @@ -159,6 +160,10 @@ public: hailo_status before_fork(); hailo_status after_fork_in_parent(); hailo_status after_fork_in_child(); + bool is_aborted(); + + // Added to match the same API as InputStream. Will be filled when async API will be implemented for vstreams. + using TransferDoneCallback = void(*); protected: explicit InputVStream(std::shared_ptr vstream); @@ -171,6 +176,7 @@ protected: std::shared_ptr m_vstream; friend class VStreamsBuilderUtils; + friend class HailoRtRpcService; }; class HAILORTAPI OutputVStream @@ -304,6 +310,10 @@ public: hailo_status before_fork(); hailo_status after_fork_in_parent(); hailo_status after_fork_in_child(); + bool is_aborted(); + + // Added to match the same API as InputStream. Will be filled when async API will be implemented for vstreams. + using TransferDoneCallback = void(*); protected: explicit OutputVStream(std::shared_ptr vstream); @@ -317,6 +327,7 @@ protected: friend class VStreamsBuilderUtils; friend class VDeviceCoreOp; + friend class HailoRtRpcService; }; /*! Contains the virtual streams creation functions */ diff --git a/hailort/libhailort/src/CMakeLists.txt b/hailort/libhailort/src/CMakeLists.txt index 6dc846a..1c44fd0 100644 --- a/hailort/libhailort/src/CMakeLists.txt +++ b/hailort/libhailort/src/CMakeLists.txt @@ -34,9 +34,6 @@ add_subdirectory(network_group) add_subdirectory(core_op) add_subdirectory(net_flow) - -set(HAILORT_CPP_SOURCES "${HAILORT_CPP_SOURCES}" "${HAILORT_OPS_CPP_SOURCES}") - if(HAILO_BUILD_SERVICE) add_subdirectory(service) endif() @@ -57,10 +54,8 @@ relative_to_absolute_paths(HAILO_FULL_OS_DIR ${HAILO_FULL_OS_DIR}) set(HAILO_OS_DIR ${HAILO_OS_DIR} CACHE INTERNAL "Absolute path of os-dir") set(HAILO_FULL_OS_DIR ${HAILO_FULL_OS_DIR} CACHE INTERNAL "Absolute Full path of os-dir") set(HAILORT_CPP_SOURCES ${HAILORT_CPP_SOURCES} CACHE INTERNAL "Absolute paths of hailort's cpp source files") -set(HAILORT_CPP_OS_SOURCES ${HAILORT_CPP_OS_SOURCES} CACHE INTERNAL "Absolute paths of os-related source files") set(COMMON_C_SOURCES ${COMMON_C_SOURCES} CACHE INTERNAL "Absolute paths of common source files") -set(HAILORT_SRCS_ABS ${HAILORT_CPP_SOURCES} ${HAILORT_CPP_OS_SOURCES} ${HAILORT_COMMON_CPP_SOURCES} ${COMMON_C_SOURCES} CACHE INTERNAL "All absolute paths of hailort's source files") -set(HAILORT_OPS_CPP_SOURCES ${HAILORT_OPS_CPP_SOURCES} PARENT_SCOPE) +set(HAILORT_SRCS_ABS ${HAILORT_CPP_SOURCES} ${HAILORT_COMMON_CPP_SOURCES} ${COMMON_C_SOURCES} CACHE INTERNAL "All absolute paths of hailort's source files") SET_SOURCE_FILES_PROPERTIES(${C_SOURCES} PROPERTIES LANGUAGE CXX) add_library(libhailort SHARED ${HAILORT_SRCS_ABS}) @@ -102,6 +97,7 @@ set(HAILORT_PUBLIC_HEADERS ${HAILORT_INC_DIR}/hailo/platform.h ${HAILORT_INC_DIR}/hailo/hailort.hpp + ${HAILORT_INC_DIR}/hailo/buffer_storage.hpp ${HAILORT_INC_DIR}/hailo/buffer.hpp ${HAILORT_INC_DIR}/hailo/device.hpp ${HAILORT_INC_DIR}/hailo/event.hpp @@ -117,7 +113,6 @@ set(HAILORT_PUBLIC_HEADERS ${HAILORT_INC_DIR}/hailo/network_rate_calculator.hpp ${HAILORT_INC_DIR}/hailo/vdevice.hpp ${HAILORT_INC_DIR}/hailo/quantization.hpp - ${HAILORT_INC_DIR}/hailo/dma_mapped_buffer.hpp ${HAILORT_INC_DIR}/hailo/hailort_defaults.hpp ) diff --git a/hailort/libhailort/src/core_op/CMakeLists.txt b/hailort/libhailort/src/core_op/CMakeLists.txt index d70f0ee..a8d7a54 100644 --- a/hailort/libhailort/src/core_op/CMakeLists.txt +++ b/hailort/libhailort/src/core_op/CMakeLists.txt @@ -2,14 +2,13 @@ cmake_minimum_required(VERSION 3.0.0) set(SRC_FILES ${CMAKE_CURRENT_SOURCE_DIR}/core_op.cpp - + ${CMAKE_CURRENT_SOURCE_DIR}/resource_manager/resource_manager.cpp ${CMAKE_CURRENT_SOURCE_DIR}/resource_manager/resource_manager_builder.cpp ${CMAKE_CURRENT_SOURCE_DIR}/resource_manager/config_buffer.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/resource_manager/inter_context_buffer.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/resource_manager/ddr_channels_pair.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/resource_manager/intermediate_buffer.cpp ${CMAKE_CURRENT_SOURCE_DIR}/resource_manager/channel_allocator.cpp ${CMAKE_CURRENT_SOURCE_DIR}/resource_manager/context_switch_buffer_builder.cpp ) -set(HAILORT_CPP_SOURCES ${HAILORT_CPP_SOURCES} ${SRC_FILES} ${HAILORT_OPS_CPP_SOURCES} PARENT_SCOPE) +set(HAILORT_CPP_SOURCES ${HAILORT_CPP_SOURCES} ${SRC_FILES} PARENT_SCOPE) diff --git a/hailort/libhailort/src/core_op/core_op.cpp b/hailort/libhailort/src/core_op/core_op.cpp index 0fb260e..0967e78 100644 --- a/hailort/libhailort/src/core_op/core_op.cpp +++ b/hailort/libhailort/src/core_op/core_op.cpp @@ -150,85 +150,6 @@ Expected CoreOp::get_latency_measurement(const std::st return result; } -Expected CoreOp::get_output_streams_from_vstream_names( - const std::map &outputs_params) -{ - OutputStreamWithParamsVector results; - std::unordered_map outputs_edges_params; - for (auto &name_params_pair : outputs_params) { - auto stream_names = m_metadata->get_stream_names_from_vstream_name(name_params_pair.first); - CHECK_EXPECTED(stream_names); - - for (auto &stream_name : stream_names.value()) { - CHECK_AS_EXPECTED(contains(m_output_streams, stream_name), HAILO_NOT_FOUND); - auto output_stream = m_output_streams.at(stream_name); - if (output_stream->get_info().is_mux) { - outputs_edges_params.emplace(name_params_pair); - } - else { - NameToVStreamParamsMap name_to_params = {name_params_pair}; - results.emplace_back(output_stream, name_to_params); - } - } - } - // Add non mux streams to result - hailo_status status = add_mux_streams_by_edges_names(results, outputs_edges_params); - CHECK_SUCCESS_AS_EXPECTED(status); - - return results; -} - -// This function adds to results the OutputStreams that correspond to the edges in outputs_edges_params. -// If an edge name appears in outputs_edges_params then all of its predecessors must appear in outputs_edges_params as well, Otherwise, an error is returned. -// We use the set seen_edges in order to mark the edges already evaluated by one of its' predecessor. -hailo_status CoreOp::add_mux_streams_by_edges_names(OutputStreamWithParamsVector &results, - const std::unordered_map &outputs_edges_params) -{ - std::unordered_set seen_edges; - for (auto &name_params_pair : outputs_edges_params) { - if (seen_edges.end() != seen_edges.find(name_params_pair.first)) { - // Edge has already been seen by one of its predecessors - continue; - } - auto output_streams = get_output_streams_by_vstream_name(name_params_pair.first); - CHECK_EXPECTED_AS_STATUS(output_streams); - CHECK(output_streams->size() == 1, HAILO_INVALID_ARGUMENT, - "mux streams cannot be separated into multiple streams"); - auto output_stream = output_streams.release()[0]; - - // TODO: Find a better way to get the mux edges without creating OutputDemuxer - auto expected_demuxer = OutputDemuxer::create(*output_stream); - CHECK_EXPECTED_AS_STATUS(expected_demuxer); - - NameToVStreamParamsMap name_to_params; - for (auto &edge : expected_demuxer.value()->get_edges_stream_info()) { - auto edge_name_params_pair = outputs_edges_params.find(edge.name); - CHECK(edge_name_params_pair != outputs_edges_params.end(), HAILO_INVALID_ARGUMENT, - "All edges of stream {} must be in output vstream params. edge {} is missing.", - name_params_pair.first, edge.name); - seen_edges.insert(edge.name); - name_to_params.insert(*edge_name_params_pair); - } - results.emplace_back(output_stream, name_to_params); - } - return HAILO_SUCCESS; -} - -Expected CoreOp::get_output_streams_by_vstream_name(const std::string &name) -{ - auto stream_names = m_metadata->get_stream_names_from_vstream_name(name); - CHECK_EXPECTED(stream_names); - - OutputStreamPtrVector output_streams; - output_streams.reserve(stream_names->size()); - for (const auto &stream_name : stream_names.value()) { - CHECK_AS_EXPECTED(contains(m_output_streams, stream_name), HAILO_NOT_FOUND); - output_streams.emplace_back(m_output_streams.at(stream_name)); - } - - return output_streams; -} - Expected CoreOp::get_layer_info(const std::string &stream_name) { for (auto layer_info : m_metadata->get_all_layer_infos()) { @@ -311,11 +232,6 @@ hailo_status CoreOp::deactivate_low_level_streams() return status; } -Expected> CoreOp::get_vstream_names_from_stream_name(const std::string &stream_name) -{ - return m_metadata->get_vstream_names_from_stream_name(stream_name); -} - const SupportedFeatures &CoreOp::get_supported_features() { return m_metadata->supported_features(); @@ -587,99 +503,12 @@ hailo_status CoreOp::wait_for_activation(const std::chrono::milliseconds &timeou return m_core_op_activated_event->wait(timeout); } -Expected>> CoreOp::get_output_vstream_groups() -{ - std::vector> results; - - for (auto output_stream : get_output_streams()) { - auto vstreams_group = get_vstream_names_from_stream_name(output_stream.get().name()); - CHECK_EXPECTED(vstreams_group); - results.push_back(vstreams_group.release()); - } - - return results; -} - -Expected>> CoreOp::make_output_vstream_params_groups( - bool quantized, hailo_format_type_t format_type, uint32_t timeout_ms, uint32_t queue_size) -{ - auto params = make_output_vstream_params(quantized, format_type, timeout_ms, queue_size); - CHECK_EXPECTED(params); - - auto groups = get_output_vstream_groups(); - CHECK_EXPECTED(groups); - - std::vector> results(groups->size(), std::map()); - - size_t pipeline_group_index = 0; - for (const auto &group : groups.release()) { - for (const auto &name_pair : params.value()) { - if (contains(group, name_pair.first)) { - results[pipeline_group_index].insert(name_pair); - } - } - pipeline_group_index++; - } - - return results; -} - -Expected> CoreOp::make_input_vstream_params( - bool quantized, hailo_format_type_t format_type, uint32_t timeout_ms, uint32_t queue_size, - const std::string &network_name) -{ - auto input_vstream_infos = m_metadata->get_input_vstream_infos(network_name); - CHECK_EXPECTED(input_vstream_infos); - - std::map res; - auto status = Hef::Impl::fill_missing_vstream_params_with_default(res, input_vstream_infos.value(), quantized, - format_type, timeout_ms, queue_size); - CHECK_SUCCESS_AS_EXPECTED(status); - return res; -} - -Expected> CoreOp::make_output_vstream_params( - bool quantized, hailo_format_type_t format_type, uint32_t timeout_ms, uint32_t queue_size, - const std::string &network_name) -{ - auto output_vstream_infos = m_metadata->get_output_vstream_infos(network_name); - CHECK_EXPECTED(output_vstream_infos); - std::map res; - auto status = Hef::Impl::fill_missing_vstream_params_with_default(res, output_vstream_infos.value(), quantized, - format_type, timeout_ms, queue_size); - CHECK_SUCCESS_AS_EXPECTED(status); - return res; -} - -Expected> CoreOp::get_network_infos() const -{ - return m_metadata->get_network_infos(); -} - Expected> CoreOp::get_all_stream_infos( const std::string &network_name) const { return m_metadata->get_all_stream_infos(network_name); } -Expected> CoreOp::get_input_vstream_infos( - const std::string &network_name) const -{ - return m_metadata->get_input_vstream_infos(network_name); -} - -Expected> CoreOp::get_output_vstream_infos( - const std::string &network_name) const -{ - return m_metadata->get_output_vstream_infos(network_name); -} - -Expected> CoreOp::get_all_vstream_infos( - const std::string &network_name) const -{ - return m_metadata->get_all_vstream_infos(network_name); -} - AccumulatorPtr CoreOp::get_activation_time_accumulator() const { return m_activation_time_accumulator; diff --git a/hailort/libhailort/src/core_op/core_op.hpp b/hailort/libhailort/src/core_op/core_op.hpp index f5ba474..3c00c19 100644 --- a/hailort/libhailort/src/core_op/core_op.hpp +++ b/hailort/libhailort/src/core_op/core_op.hpp @@ -112,43 +112,21 @@ public: virtual std::vector> get_output_streams_by_interface(hailo_stream_interface_t stream_interface); virtual ExpectedRef get_input_stream_by_name(const std::string& name); virtual ExpectedRef get_output_stream_by_name(const std::string& name); - virtual Expected get_output_streams_from_vstream_names( - const std::map &outputs_params); virtual Expected get_latency_measurement(const std::string &network_name=""); - // TODO: HRT-9546 - Remove func, should be only in CNG - virtual Expected> make_input_vstream_params( - bool quantized, hailo_format_type_t format_type, uint32_t timeout_ms, uint32_t queue_size, - const std::string &network_name=""); - // TODO: HRT-9546 - Remove func, should be only in CNG - virtual Expected> make_output_vstream_params( - bool quantized, hailo_format_type_t format_type, uint32_t timeout_ms, uint32_t queue_size, - const std::string &network_name=""); - // TODO: HRT-9546 - Remove func, should be only in CNG - virtual Expected>> make_output_vstream_params_groups( - bool quantized, hailo_format_type_t format_type, uint32_t timeout_ms, uint32_t queue_size); - - // TODO: HRT-9546 - Remove func, should be only in CNG - virtual Expected>> get_output_vstream_groups(); - - // TODO: HRT-9546 - Remove func, should be only in CNG - Expected> get_vstream_names_from_stream_name(const std::string &stream_name); virtual hailo_status activate_impl(uint16_t dynamic_batch_size, bool resume_pending_stream_transfers = false) = 0; virtual hailo_status deactivate_impl(bool keep_nn_config_during_reset = false) = 0; - virtual Expected> get_network_infos() const; virtual Expected> get_all_stream_infos(const std::string &network_name="") const; - virtual Expected> get_input_vstream_infos(const std::string &network_name="") const; - virtual Expected> get_output_vstream_infos(const std::string &network_name="") const; - virtual Expected> get_all_vstream_infos(const std::string &network_name="") const; + virtual AccumulatorPtr get_activation_time_accumulator() const; virtual AccumulatorPtr get_deactivation_time_accumulator() const; hailo_status create_streams_from_config_params(Device &device); virtual bool is_multi_context() const; virtual const ConfigureNetworkParams get_config_params() const; - + virtual Expected run_hw_infer_estimator() = 0; const SupportedFeatures &get_supported_features(); Expected get_stream_batch_size(const std::string &stream_name); @@ -173,9 +151,6 @@ protected: const hailo_stream_parameters_t &stream_params, const std::string &stream_name); hailo_status create_input_stream_from_config_params(Device &device, const hailo_stream_parameters_t &stream_params, const std::string &stream_name); - hailo_status add_mux_streams_by_edges_names(OutputStreamWithParamsVector &result, - const std::unordered_map &outputs_edges_params); - Expected get_output_streams_by_vstream_name(const std::string &name); hailo_status activate_low_level_streams(uint16_t dynamic_batch_size, bool resume_pending_stream_transfers); hailo_status deactivate_low_level_streams(); diff --git a/hailort/libhailort/src/core_op/resource_manager/config_buffer.cpp b/hailort/libhailort/src/core_op/resource_manager/config_buffer.cpp index d15f487..6256911 100644 --- a/hailort/libhailort/src/core_op/resource_manager/config_buffer.cpp +++ b/hailort/libhailort/src/core_op/resource_manager/config_buffer.cpp @@ -11,20 +11,33 @@ #include "core_op/resource_manager/config_buffer.hpp" #include "vdma/memory/sg_buffer.hpp" #include "vdma/memory/continuous_buffer.hpp" +#include "vdma/memory/buffer_requirements.hpp" #include namespace hailort { -Expected ConfigBuffer::create(HailoRTDriver &driver, vdma::ChannelId channel_id, - const std::vector &cfg_sizes) +Expected> ConfigBuffer::create_buffer(HailoRTDriver &driver, vdma::ChannelId channel_id, + const std::vector &cfg_sizes, const uint32_t buffer_size) { - const auto buffer_size = std::accumulate(cfg_sizes.begin(), cfg_sizes.end(), 0); - auto buffer_ptr = should_use_ccb(driver) ? create_ccb_buffer(driver, buffer_size) : create_sg_buffer(driver, channel_id, cfg_sizes); + if (should_use_ccb(driver) && (HAILO_OUT_OF_HOST_CMA_MEMORY == buffer_ptr.status())) { + /* Try to use sg buffer instead */ + return create_sg_buffer(driver, channel_id, cfg_sizes); + } else { + return buffer_ptr; + } +} + +Expected ConfigBuffer::create(HailoRTDriver &driver, vdma::ChannelId channel_id, + const std::vector &cfg_sizes) +{ + const auto buffer_size = std::accumulate(cfg_sizes.begin(), cfg_sizes.end(), 0); + CHECK_AS_EXPECTED(IS_FIT_IN_UINT32(buffer_size), HAILO_INTERNAL_FAILURE, "config buffer size exceeded UINT32 range limit"); + auto buffer_ptr = create_buffer(driver, channel_id, cfg_sizes, static_cast(buffer_size)); CHECK_EXPECTED(buffer_ptr); return ConfigBuffer(buffer_ptr.release(), channel_id, buffer_size); @@ -42,7 +55,7 @@ Expected ConfigBuffer::program_descriptors() { // TODO HRT-9657: remove DEVICE interrupts auto descriptors_count = - m_buffer->program_descriptors(m_acc_buffer_offset, vdma::InterruptsDomain::DEVICE, m_acc_desc_count, false); + m_buffer->program_descriptors(m_acc_buffer_offset, vdma::InterruptsDomain::DEVICE, m_acc_desc_count); CHECK_EXPECTED(descriptors_count); m_acc_desc_count += descriptors_count.value(); @@ -125,19 +138,19 @@ hailo_status ConfigBuffer::write_inner(const MemoryView &data) Expected> ConfigBuffer::create_sg_buffer(HailoRTDriver &driver, vdma::ChannelId channel_id, const std::vector &cfg_sizes) { - auto desc_sizes_pair = vdma::DescriptorList::get_desc_buffer_sizes_for_multiple_transfers(driver, 1, cfg_sizes); - CHECK_EXPECTED(desc_sizes_pair); - const auto page_size = desc_sizes_pair->first; - const auto descs_count = desc_sizes_pair->second; - - size_t buffer_size = 0; - for (const auto cfg_size : cfg_sizes) { - const auto descs_count_for_cfg = DIV_ROUND_UP(cfg_size, page_size); - buffer_size += descs_count_for_cfg * page_size; - } - - auto buffer = vdma::SgBuffer::create(driver, buffer_size, descs_count, page_size, HailoRTDriver::DmaDirection::H2D, - channel_id); + static const bool NOT_CIRCULAR = false; + // For config channels (In Hailo15), the page size must be a multiplication of host default page size. + // Therefore we use the flag force_default_page_size for those types of buffers. + auto const FORCE_DEFAULT_PAGE_SIZE = true; + auto buffer_size_requirements = vdma::BufferSizesRequirements::get_sg_buffer_requirements_multiple_transfers( + driver.desc_max_page_size(), 1, cfg_sizes, NOT_CIRCULAR, FORCE_DEFAULT_PAGE_SIZE); + CHECK_EXPECTED(buffer_size_requirements); + const auto page_size = buffer_size_requirements->desc_page_size(); + const auto descs_count = buffer_size_requirements->descs_count(); + const auto buffer_size = buffer_size_requirements->buffer_size(); + + auto buffer = vdma::SgBuffer::create(driver, buffer_size, descs_count, page_size, NOT_CIRCULAR, + HailoRTDriver::DmaDirection::H2D, channel_id); CHECK_EXPECTED(buffer); auto buffer_ptr = make_unique_nothrow(buffer.release()); @@ -149,9 +162,20 @@ Expected> ConfigBuffer::create_sg_buffer(Hailo Expected> ConfigBuffer::create_ccb_buffer(HailoRTDriver &driver, uint32_t buffer_size) { - buffer_size = vdma::ContinuousBuffer::get_buffer_size(buffer_size); - auto buffer = vdma::ContinuousBuffer::create(buffer_size, driver); - CHECK_EXPECTED(buffer); + static const bool NOT_CIRCULAR = false; + static const uint16_t SINGLE_TRANSFER = 1; + auto buffer_size_requirements = vdma::BufferSizesRequirements::get_ccb_buffer_requirements_single_transfer( + SINGLE_TRANSFER, buffer_size, NOT_CIRCULAR); + CHECK_EXPECTED(buffer_size_requirements); + + auto buffer = vdma::ContinuousBuffer::create(buffer_size_requirements->buffer_size(), driver); + /* Don't print error here since this might be expected error that the libhailoRT can recover from + (out of host memory). If it's not the case, there is a print in hailort_driver.cpp file */ + if (HAILO_OUT_OF_HOST_CMA_MEMORY == buffer.status()) { + return make_unexpected(buffer.status()); + } else { + CHECK_EXPECTED(buffer); + } auto buffer_ptr = make_unique_nothrow(buffer.release()); CHECK_NOT_NULL_AS_EXPECTED(buffer_ptr, HAILO_OUT_OF_HOST_MEMORY); @@ -165,7 +189,7 @@ bool ConfigBuffer::should_use_ccb(HailoRTDriver &driver) case HailoRTDriver::DmaType::PCIE: return false; case HailoRTDriver::DmaType::DRAM: - if (std::getenv("HAILO_FORCE_CONF_CHANNEL_OVER_DESC") != nullptr) { + if (nullptr != std::getenv("HAILO_FORCE_CONF_CHANNEL_OVER_DESC")) { LOGGER__WARNING("Using desc instead of CCB for config channel is not optimal for performance.\n"); return false; } diff --git a/hailort/libhailort/src/core_op/resource_manager/config_buffer.hpp b/hailort/libhailort/src/core_op/resource_manager/config_buffer.hpp index b27611e..534bab5 100644 --- a/hailort/libhailort/src/core_op/resource_manager/config_buffer.hpp +++ b/hailort/libhailort/src/core_op/resource_manager/config_buffer.hpp @@ -22,7 +22,6 @@ namespace hailort { #define CCW_DATA_OFFSET (CCW_BYTES_IN_WORD * 2) #define CCW_HEADER_SIZE (CCW_DATA_OFFSET) - class ConfigBuffer final { public: @@ -57,6 +56,8 @@ private: vdma::ChannelId channel_id, const std::vector &cfg_sizes); static Expected> create_ccb_buffer(HailoRTDriver &driver, uint32_t buffer_size); + static Expected> create_buffer(HailoRTDriver &driver, vdma::ChannelId channel_id, + const std::vector &cfg_sizes, const uint32_t buffer_size); static bool should_use_ccb(HailoRTDriver &driver); diff --git a/hailort/libhailort/src/core_op/resource_manager/context_switch_buffer_builder.cpp b/hailort/libhailort/src/core_op/resource_manager/context_switch_buffer_builder.cpp index e7c60b3..5684abc 100644 --- a/hailort/libhailort/src/core_op/resource_manager/context_switch_buffer_builder.cpp +++ b/hailort/libhailort/src/core_op/resource_manager/context_switch_buffer_builder.cpp @@ -39,6 +39,11 @@ const std::vector &get_controls() const; + const CONTROL_PROTOCOL__context_switch_context_type_t &get_context_type() const; private: CONTROL_PROTOCOL__context_switch_context_info_single_control_t ¤t_control(); diff --git a/hailort/libhailort/src/core_op/resource_manager/ddr_channels_pair.cpp b/hailort/libhailort/src/core_op/resource_manager/ddr_channels_pair.cpp deleted file mode 100644 index b170300..0000000 --- a/hailort/libhailort/src/core_op/resource_manager/ddr_channels_pair.cpp +++ /dev/null @@ -1,140 +0,0 @@ -/** - * Copyright (c) 2022 Hailo Technologies Ltd. All rights reserved. - * Distributed under the MIT license (https://opensource.org/licenses/MIT) -**/ -/** - * @file ddr_channels_pair.cpp - **/ - -#include "common/utils.hpp" - -#include "core_op/resource_manager/ddr_channels_pair.hpp" -#include "vdma/memory/continuous_buffer.hpp" -#include "vdma/memory/sg_buffer.hpp" - - -namespace hailort -{ - - -Expected DdrChannelsPair::create(HailoRTDriver &driver, const DdrChannelsInfo &ddr_channels_info) -{ - auto buffer_exp = should_use_ccb(driver) ? - create_ccb_buffer(driver, ddr_channels_info.row_size, ddr_channels_info.min_buffered_rows) : - create_sg_buffer(driver, ddr_channels_info.row_size, ddr_channels_info.min_buffered_rows, ddr_channels_info.d2h_channel_id); - CHECK_EXPECTED(buffer_exp); - auto buffer_ptr = buffer_exp.release(); - - CHECK_AS_EXPECTED(0 == (ddr_channels_info.row_size % buffer_ptr->desc_page_size()), HAILO_INTERNAL_FAILURE, - "DDR channel buffer row size must be a multiple of descriptor page size"); - - const auto interrupts_domain = vdma::InterruptsDomain::NONE; - const auto total_size = buffer_ptr->descs_count() * buffer_ptr->desc_page_size(); - auto desc_count_local = buffer_ptr->program_descriptors(total_size, interrupts_domain, 0, true); - CHECK_EXPECTED(desc_count_local); - - return DdrChannelsPair(std::move(buffer_ptr), ddr_channels_info); -} - -uint16_t DdrChannelsPair::descs_count() const -{ - assert(IS_FIT_IN_UINT16(m_buffer->descs_count())); - return static_cast(m_buffer->descs_count()); -} - -uint32_t DdrChannelsPair::descriptors_per_frame() const -{ - return (m_info.row_size / m_buffer->desc_page_size()) * m_info.total_buffers_per_frame; -} - -Expected DdrChannelsPair::read() const -{ - const auto size = m_buffer->size(); - auto res = Buffer::create(size); - CHECK_EXPECTED(res); - - auto status = m_buffer->read(res->data(), size, 0); - CHECK_SUCCESS_AS_EXPECTED(status); - - return res.release(); -} - -const DdrChannelsInfo& DdrChannelsPair::info() const -{ - return m_info; -} - - -bool DdrChannelsPair::need_manual_credit_management() const -{ - // On scatter gather manual credit management is needed - return m_buffer->type() == vdma::VdmaBuffer::Type::SCATTER_GATHER; -} - -CONTROL_PROTOCOL__host_buffer_info_t DdrChannelsPair::get_host_buffer_info() const -{ - return m_buffer->get_host_buffer_info(m_info.row_size); -} - -Expected> DdrChannelsPair::create_sg_buffer(HailoRTDriver &driver, - uint32_t row_size, uint16_t buffered_rows, vdma::ChannelId d2h_channel_id) -{ - auto desc_sizes_pair = vdma::DescriptorList::get_desc_buffer_sizes_for_single_transfer(driver, - buffered_rows, buffered_rows, row_size); - CHECK_EXPECTED(desc_sizes_pair); - const auto desc_page_size = desc_sizes_pair->first; - const auto descs_count = desc_sizes_pair->second; - // DdrChannels are circular so we need to allocate the full descriptors list. - const auto buffer_size = desc_page_size * descs_count; - - auto buffer = vdma::SgBuffer::create(driver, buffer_size, descs_count, desc_page_size, - HailoRTDriver::DmaDirection::BOTH, d2h_channel_id); - CHECK_EXPECTED(buffer); - - auto buffer_ptr = make_unique_nothrow(buffer.release()); - CHECK_NOT_NULL_AS_EXPECTED(buffer_ptr, HAILO_OUT_OF_HOST_MEMORY); - - return std::unique_ptr(std::move(buffer_ptr)); -} - -DdrChannelsPair::DdrChannelsPair(std::unique_ptr &&buffer, const DdrChannelsInfo &ddr_channels_info) : - m_buffer(std::move(buffer)), - m_info(ddr_channels_info) -{} - -Expected> DdrChannelsPair::create_ccb_buffer(HailoRTDriver &driver, - uint32_t row_size, uint16_t buffered_rows) -{ - // The first 12 channels in D2H CCB ("regular channels") requires that the amount of descriptors will be a power - // of 2. Altough the 4 last channels ("enhanced channels") don't have this requirements, we keep the code the same. - auto buffer_size = vdma::ContinuousBuffer::get_buffer_size_desc_power2(row_size * buffered_rows); - auto buffer = vdma::ContinuousBuffer::create(buffer_size, driver); - CHECK_EXPECTED(buffer); - - auto buffer_ptr = make_unique_nothrow(buffer.release()); - CHECK_NOT_NULL_AS_EXPECTED(buffer_ptr, HAILO_OUT_OF_HOST_MEMORY); - - return std::unique_ptr(std::move(buffer_ptr)); -} - -bool DdrChannelsPair::should_use_ccb(HailoRTDriver &driver) -{ - switch (driver.dma_type()) { - case HailoRTDriver::DmaType::PCIE: - return false; - case HailoRTDriver::DmaType::DRAM: - if (std::getenv("HAILO_FORCE_DDR_CHANNEL_OVER_DESC") != nullptr) { - LOGGER__WARNING("Using desc instead of CCB for ddr channel is not optimal for performance.\n"); - return false; - } - else { - return true; - } - } - - // Shouldn't reach here - assert(false); - return false; -} - -} /* namespace hailort */ diff --git a/hailort/libhailort/src/core_op/resource_manager/ddr_channels_pair.hpp b/hailort/libhailort/src/core_op/resource_manager/ddr_channels_pair.hpp deleted file mode 100644 index 4caadc3..0000000 --- a/hailort/libhailort/src/core_op/resource_manager/ddr_channels_pair.hpp +++ /dev/null @@ -1,68 +0,0 @@ -/** - * Copyright (c) 2022 Hailo Technologies Ltd. All rights reserved. - * Distributed under the MIT license (https://opensource.org/licenses/MIT) -**/ -/** - * @file ddr_channels_pair.hpp - * @brief DDR channel pairs are pair of vdma channels used in the same context for skip-connection. - **/ - -#ifndef _HAILO_DDR_CHANNELS_PAIR_HPP_ -#define _HAILO_DDR_CHANNELS_PAIR_HPP_ - -#include "hailo/hailort.h" -#include "hailo/buffer.hpp" - -#include "vdma/memory/vdma_buffer.hpp" - - -namespace hailort -{ - -struct DdrChannelsInfo -{ - vdma::ChannelId d2h_channel_id; - uint8_t d2h_stream_index; - vdma::ChannelId h2d_channel_id; - uint8_t h2d_stream_index; - uint8_t network_index; - uint16_t row_size; - uint16_t min_buffered_rows; - // total_buffers_per_frame not same as core_buffer_per frame. - //(In DDR core buffer per frame is 1). Used to calc total host descriptors_per_frame. - uint16_t total_buffers_per_frame; -}; - -class DdrChannelsPair final -{ -public: - static Expected create(HailoRTDriver &driver, const DdrChannelsInfo &ddr_channels_info); - - uint16_t descs_count() const; - uint32_t descriptors_per_frame() const; - Expected read() const; - const DdrChannelsInfo & info() const; - - // Checks if the credits are automaticaly going from d2h channel to its h2d channel, or it needs to be done manually - // (Using a fw task). - bool need_manual_credit_management() const; - - CONTROL_PROTOCOL__host_buffer_info_t get_host_buffer_info() const; - -private: - DdrChannelsPair(std::unique_ptr &&buffer, const DdrChannelsInfo &ddr_channels_info); - - static Expected> create_sg_buffer(HailoRTDriver &driver, - uint32_t row_size, uint16_t buffered_rows, vdma::ChannelId d2h_channel_id); - static Expected> create_ccb_buffer(HailoRTDriver &driver, - uint32_t row_size, uint16_t buffered_rows); - - static bool should_use_ccb(HailoRTDriver &driver); - - std::unique_ptr m_buffer; - DdrChannelsInfo m_info; -}; - -} /* namespace hailort */ - -#endif /* _HAILO_DDR_CHANNELS_PAIR_HPP_ */ diff --git a/hailort/libhailort/src/core_op/resource_manager/inter_context_buffer.cpp b/hailort/libhailort/src/core_op/resource_manager/inter_context_buffer.cpp deleted file mode 100644 index 5bac263..0000000 --- a/hailort/libhailort/src/core_op/resource_manager/inter_context_buffer.cpp +++ /dev/null @@ -1,166 +0,0 @@ -/** - * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved. - * Distributed under the MIT license (https://opensource.org/licenses/MIT) - **/ -/** - * @file inter_context_buffer.cpp - * @brief Manages inter-context buffer. - */ - -#include "core_op/resource_manager/resource_manager.hpp" -#include "core_op/resource_manager/inter_context_buffer.hpp" -#include "vdma/memory/sg_buffer.hpp" -#include "vdma/memory/continuous_buffer.hpp" - - -namespace hailort -{ - -Expected InterContextBuffer::create(HailoRTDriver &driver, uint32_t transfer_size, - uint16_t max_batch_size, vdma::ChannelId d2h_channel_id) -{ - auto buffer_exp = should_use_ccb(driver) ? - create_ccb_buffer(driver, transfer_size, max_batch_size) : - create_sg_buffer(driver, transfer_size, max_batch_size, d2h_channel_id); - CHECK_EXPECTED(buffer_exp); - auto buffer_ptr = buffer_exp.release(); - - size_t acc_offset = 0; - for (uint16_t i = 0; i < max_batch_size; i++) { - const auto last_desc_interrupts_domain = ((max_batch_size - 1) == i) ? - vdma::InterruptsDomain::DEVICE : vdma::InterruptsDomain::NONE; - static const auto BUFFER_NOT_CIRCULAR = false; - auto desc_count_local = buffer_ptr->program_descriptors(transfer_size, last_desc_interrupts_domain, acc_offset, - BUFFER_NOT_CIRCULAR); - CHECK_EXPECTED(desc_count_local, "Failed to program descs for inter context channels. Given max_batch_size is too big."); - acc_offset += desc_count_local.value(); - } - - return InterContextBuffer(std::move(buffer_ptr), transfer_size, max_batch_size); -} - -hailo_status InterContextBuffer::reprogram(uint16_t batch_size) -{ - const auto prev_batch_size = m_dynamic_batch_size; - auto status = set_dynamic_batch_size(batch_size); - CHECK_SUCCESS(status); - - if (prev_batch_size == m_dynamic_batch_size) { - LOGGER__TRACE("Batch size hasn't changed ({}); nothing to be done.", batch_size); - return HAILO_SUCCESS; - } - - status = m_buffer->reprogram_device_interrupts_for_end_of_batch(m_transfer_size, prev_batch_size, - vdma::InterruptsDomain::NONE); - CHECK_SUCCESS(status, "Failed reprogramming device interrupts for the end of the previous batch (size {})", - prev_batch_size); - status = m_buffer->reprogram_device_interrupts_for_end_of_batch(m_transfer_size, m_dynamic_batch_size, - vdma::InterruptsDomain::DEVICE); - CHECK_SUCCESS(status, "Failed reprogramming device interrupts for the end of the current batch (size {})", - m_dynamic_batch_size); - - return HAILO_SUCCESS; -} - -Expected InterContextBuffer::read() -{ - const auto size = m_transfer_size * m_dynamic_batch_size; - assert(size <= m_buffer->size()); - - auto res = Buffer::create(size); - CHECK_EXPECTED(res); - - auto status = m_buffer->read(res->data(), size, 0); - CHECK_SUCCESS_AS_EXPECTED(status); - - return res.release(); -} - -CONTROL_PROTOCOL__host_buffer_info_t InterContextBuffer::get_host_buffer_info() const -{ - return m_buffer->get_host_buffer_info(m_transfer_size); -} - -InterContextBuffer::InterContextBuffer(std::unique_ptr &&buffer, uint32_t transfer_size, - uint16_t batch_size) : - m_buffer(std::move(buffer)), - m_transfer_size(transfer_size), - m_max_batch_size(batch_size), - m_dynamic_batch_size(batch_size) -{} - -hailo_status InterContextBuffer::set_dynamic_batch_size(uint16_t batch_size) -{ - if (CONTROL_PROTOCOL__IGNORE_DYNAMIC_BATCH_SIZE == batch_size) { - LOGGER__TRACE("Received CONTROL_PROTOCOL__IGNORE_DYNAMIC_BATCH_SIZE == batch_size; " - "Leaving previously set value of {}", m_dynamic_batch_size); - } else { - CHECK(batch_size <= m_max_batch_size, HAILO_INVALID_ARGUMENT, - "batch_size ({}) must be <= than m_max_batch_size ({})", - batch_size, m_max_batch_size); - - LOGGER__TRACE("Setting intermediate buffer's batch_size to {}", batch_size); - m_dynamic_batch_size = batch_size; - } - - return HAILO_SUCCESS; -} - -Expected> InterContextBuffer::create_sg_buffer(HailoRTDriver &driver, - uint32_t transfer_size, uint16_t batch_size, vdma::ChannelId d2h_channel_id) -{ - auto desc_sizes_pair = vdma::DescriptorList::get_desc_buffer_sizes_for_single_transfer(driver, - batch_size, batch_size, transfer_size); - CHECK_EXPECTED(desc_sizes_pair); - const auto desc_page_size = desc_sizes_pair->first; - const auto descs_count = desc_sizes_pair->second; - - // TODO: HRT-9914 - Instead of using aligned descriptor for each transfer, we should do it for the all frame. - const size_t desc_per_transfer = DIV_ROUND_UP(transfer_size, desc_page_size); - const size_t buffer_size = desc_per_transfer * desc_page_size * batch_size; - auto buffer = vdma::SgBuffer::create(driver, buffer_size, descs_count, desc_page_size, - HailoRTDriver::DmaDirection::BOTH, d2h_channel_id); - CHECK_EXPECTED(buffer); - - auto buffer_ptr = make_unique_nothrow(buffer.release()); - CHECK_NOT_NULL_AS_EXPECTED(buffer_ptr, HAILO_OUT_OF_HOST_MEMORY); - - return std::unique_ptr(std::move(buffer_ptr)); -} - -Expected> InterContextBuffer::create_ccb_buffer(HailoRTDriver &driver, - uint32_t transfer_size, uint16_t batch_size) -{ - // The first 12 channels in D2H CCB ("regular channels") requires that the amount of descriptors will be a power - // of 2. Altough the 4 last channels ("enhanced channels") don't have this requirements, we keep the code the same. - auto buffer_size = vdma::ContinuousBuffer::get_buffer_size_desc_power2(transfer_size * batch_size); - auto buffer = vdma::ContinuousBuffer::create(buffer_size, driver); - CHECK_EXPECTED(buffer); - - auto buffer_ptr = make_unique_nothrow(buffer.release()); - CHECK_NOT_NULL_AS_EXPECTED(buffer_ptr, HAILO_OUT_OF_HOST_MEMORY); - - return std::unique_ptr(std::move(buffer_ptr)); -} - -bool InterContextBuffer::should_use_ccb(HailoRTDriver &driver) -{ - switch (driver.dma_type()) { - case HailoRTDriver::DmaType::PCIE: - return false; - case HailoRTDriver::DmaType::DRAM: - if (nullptr == std::getenv("HAILO_FORCE_INFER_CONTEXT_CHANNEL_OVER_DESC")) { - return false; - } - else { - LOGGER__INFO("Using (non default mode) CCB for inter context channels.\n"); - return true; - } - } - - // Shouldn't reach here - assert(false); - return false; -} - -} /* namespace hailort */ diff --git a/hailort/libhailort/src/core_op/resource_manager/inter_context_buffer.hpp b/hailort/libhailort/src/core_op/resource_manager/inter_context_buffer.hpp deleted file mode 100644 index 912501e..0000000 --- a/hailort/libhailort/src/core_op/resource_manager/inter_context_buffer.hpp +++ /dev/null @@ -1,54 +0,0 @@ -/** - * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved. - * Distributed under the MIT license (https://opensource.org/licenses/MIT) - **/ -/** - * @file inter_context_buffer.hpp - * @brief Manages inter-context buffer. - */ - -#ifndef _HAILO_INTER_CONTEXT_BUFFER_HPP_ -#define _HAILO_INTER_CONTEXT_BUFFER_HPP_ - -#include "hailo/expected.hpp" -#include "hailo/buffer.hpp" - -#include "os/hailort_driver.hpp" -#include "vdma/memory/vdma_buffer.hpp" - -#include "control_protocol.h" - - -namespace hailort -{ - -class InterContextBuffer final { -public: - static Expected create(HailoRTDriver &driver, uint32_t transfer_size, - uint16_t max_batch_size, vdma::ChannelId d2h_channel_id); - - hailo_status reprogram(uint16_t batch_size); - Expected read(); - - CONTROL_PROTOCOL__host_buffer_info_t get_host_buffer_info() const; - -private: - InterContextBuffer(std::unique_ptr &&buffer, uint32_t transfer_size, uint16_t batch_size); - hailo_status set_dynamic_batch_size(uint16_t batch_size); - - static Expected> create_sg_buffer(HailoRTDriver &driver, - uint32_t transfer_size, uint16_t batch_size, vdma::ChannelId d2h_channel_id); - static Expected> create_ccb_buffer(HailoRTDriver &driver, - uint32_t transfer_size, uint16_t batch_size); - - static bool should_use_ccb(HailoRTDriver &driver); - - std::unique_ptr m_buffer; - const uint32_t m_transfer_size; - const uint16_t m_max_batch_size; - uint16_t m_dynamic_batch_size; -}; - -} /* namespace hailort */ - -#endif /* _HAILO_INTER_CONTEXT_BUFFER_HPP_ */ \ No newline at end of file diff --git a/hailort/libhailort/src/core_op/resource_manager/intermediate_buffer.cpp b/hailort/libhailort/src/core_op/resource_manager/intermediate_buffer.cpp new file mode 100644 index 0000000..2023de1 --- /dev/null +++ b/hailort/libhailort/src/core_op/resource_manager/intermediate_buffer.cpp @@ -0,0 +1,196 @@ +/** + * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved. + * Distributed under the MIT license (https://opensource.org/licenses/MIT) + **/ +/** + * @file intermediate_buffer.cpp + * @brief Manages intermediate buffers, including inter-context and ddr buffers. + */ + +#include "intermediate_buffer.hpp" + +#include "core_op/resource_manager/resource_manager.hpp" +#include "vdma/memory/sg_buffer.hpp" +#include "vdma/memory/continuous_buffer.hpp" +#include "vdma/memory/buffer_requirements.hpp" + + +namespace hailort +{ +Expected> IntermediateBuffer::create_buffer(HailoRTDriver &driver, uint32_t transfer_size, + uint16_t max_batch_size, vdma::ChannelId d2h_channel_id, StreamingType streaming_type) +{ + const bool is_circular = (streaming_type == StreamingType::CIRCULAR_CONTINUOS); + auto buffer_exp = should_use_ccb(driver, streaming_type) ? + create_ccb_buffer(driver, transfer_size, max_batch_size, is_circular) : + create_sg_buffer(driver, transfer_size, max_batch_size, d2h_channel_id, is_circular); + + if (should_use_ccb(driver, streaming_type) && (HAILO_OUT_OF_HOST_CMA_MEMORY == buffer_exp.status())) { + /* Try to use sg buffer instead */ + return create_sg_buffer(driver, transfer_size, max_batch_size, d2h_channel_id, is_circular); + } else { + return buffer_exp; + } +} + +Expected IntermediateBuffer::create(HailoRTDriver &driver, uint32_t transfer_size, + uint16_t max_batch_size, vdma::ChannelId d2h_channel_id, StreamingType streaming_type) +{ + auto buffer_exp = create_buffer(driver, transfer_size, max_batch_size, d2h_channel_id, streaming_type); + CHECK_EXPECTED(buffer_exp); + auto buffer_ptr = buffer_exp.release(); + + if (streaming_type == StreamingType::BURST) { + // We have max_batch_size transfers, so we program them one by one. The last transfer should report interrupt + // to the device. + size_t acc_offset = 0; + for (uint16_t i = 0; i < max_batch_size; i++) { + const auto last_desc_interrupts_domain = ((max_batch_size - 1) == i) ? + vdma::InterruptsDomain::DEVICE : vdma::InterruptsDomain::NONE; + auto desc_count_local = buffer_ptr->program_descriptors(transfer_size, last_desc_interrupts_domain, acc_offset); + CHECK_EXPECTED(desc_count_local, "Failed to program descs for inter context channels. Given max_batch_size is too big."); + acc_offset += desc_count_local.value(); + } + } else { + // Program all descriptors, no need for interrupt. + const auto interrupts_domain = vdma::InterruptsDomain::NONE; + const auto total_size = buffer_ptr->descs_count() * buffer_ptr->desc_page_size(); + auto desc_count_local = buffer_ptr->program_descriptors(total_size, interrupts_domain, 0); + CHECK_EXPECTED(desc_count_local); + } + + return IntermediateBuffer(std::move(buffer_ptr), transfer_size, max_batch_size, streaming_type); +} + +hailo_status IntermediateBuffer::set_dynamic_batch_size(uint16_t batch_size) +{ + if (m_streaming_type == StreamingType::CIRCULAR_CONTINUOS) { + // The buffer pattern does not depend on the batch for circular continuous buffers. + return HAILO_SUCCESS; + } + + CHECK(batch_size <= m_max_batch_size, HAILO_INVALID_ARGUMENT, + "batch_size ({}) must be <= than m_max_batch_size ({})", + batch_size, m_max_batch_size); + + LOGGER__TRACE("Setting intermediate buffer's batch_size to {}", batch_size); + const auto prev_batch_size = m_dynamic_batch_size; + m_dynamic_batch_size = batch_size; + + auto status = m_buffer->reprogram_device_interrupts_for_end_of_batch(m_transfer_size, prev_batch_size, + vdma::InterruptsDomain::NONE); + CHECK_SUCCESS(status, "Failed reprogramming device interrupts for the end of the previous batch (size {})", + prev_batch_size); + status = m_buffer->reprogram_device_interrupts_for_end_of_batch(m_transfer_size, m_dynamic_batch_size, + vdma::InterruptsDomain::DEVICE); + CHECK_SUCCESS(status, "Failed reprogramming device interrupts for the end of the current batch (size {})", + m_dynamic_batch_size); + + return HAILO_SUCCESS; +} + +Expected IntermediateBuffer::read() +{ + const auto size = m_transfer_size * m_dynamic_batch_size; + assert(size <= m_buffer->size()); + + auto res = Buffer::create(size); + CHECK_EXPECTED(res); + + auto status = m_buffer->read(res->data(), size, 0); + CHECK_SUCCESS_AS_EXPECTED(status); + + return res.release(); +} + +CONTROL_PROTOCOL__host_buffer_info_t IntermediateBuffer::get_host_buffer_info() const +{ + return m_buffer->get_host_buffer_info(m_transfer_size); +} + +IntermediateBuffer::IntermediateBuffer(std::unique_ptr &&buffer, uint32_t transfer_size, + uint16_t batch_size, StreamingType streaming_type) : + m_buffer(std::move(buffer)), + m_transfer_size(transfer_size), + m_max_batch_size(batch_size), + m_streaming_type(streaming_type), + m_dynamic_batch_size(batch_size) +{} + +Expected> IntermediateBuffer::create_sg_buffer(HailoRTDriver &driver, + uint32_t transfer_size, uint16_t batch_size, vdma::ChannelId d2h_channel_id, bool is_circular) +{ + auto const DONT_FORCE_DEFAULT_PAGE_SIZE = false; + auto buffer_requirements = vdma::BufferSizesRequirements::get_sg_buffer_requirements_single_transfer( + driver.desc_max_page_size(), batch_size, batch_size, transfer_size, is_circular, DONT_FORCE_DEFAULT_PAGE_SIZE); + CHECK_EXPECTED(buffer_requirements); + const auto desc_page_size = buffer_requirements->desc_page_size(); + const auto descs_count = buffer_requirements->descs_count(); + const auto buffer_size = buffer_requirements->buffer_size(); + + auto buffer = vdma::SgBuffer::create(driver, buffer_size, descs_count, desc_page_size, is_circular, + HailoRTDriver::DmaDirection::BOTH, d2h_channel_id); + CHECK_EXPECTED(buffer); + + auto buffer_ptr = make_unique_nothrow(buffer.release()); + CHECK_NOT_NULL_AS_EXPECTED(buffer_ptr, HAILO_OUT_OF_HOST_MEMORY); + + return std::unique_ptr(std::move(buffer_ptr)); +} + +Expected> IntermediateBuffer::create_ccb_buffer(HailoRTDriver &driver, + uint32_t transfer_size, uint16_t batch_size, bool is_circular) +{ + auto buffer_size_requirements = vdma::BufferSizesRequirements::get_ccb_buffer_requirements_single_transfer( + batch_size, transfer_size, is_circular); + CHECK_EXPECTED(buffer_size_requirements); + + auto buffer = vdma::ContinuousBuffer::create(buffer_size_requirements->buffer_size(), driver); + /* Don't print error here since this might be expected error that the libhailoRT can recover from + (out of host memory). If it's not the case, there is a print in hailort_driver.cpp file */ + if (HAILO_OUT_OF_HOST_CMA_MEMORY == buffer.status()) { + return make_unexpected(buffer.status()); + } else { + CHECK_EXPECTED(buffer); + } + + auto buffer_ptr = make_unique_nothrow(buffer.release()); + CHECK_NOT_NULL_AS_EXPECTED(buffer_ptr, HAILO_OUT_OF_HOST_MEMORY); + + return std::unique_ptr(std::move(buffer_ptr)); +} + +bool IntermediateBuffer::should_use_ccb(HailoRTDriver &driver, StreamingType streaming_type) +{ + if (driver.dma_type() == HailoRTDriver::DmaType::PCIE) { + // CCB not supported on PCIe + return false; + } + + switch (streaming_type) { + case StreamingType::BURST: + // On burst (aka inter-context), because the buffers are big (And depends on the max_batch_size), we currently + // don't want to use CCB by default. + if (nullptr != std::getenv("HAILO_FORCE_INFER_CONTEXT_CHANNEL_OVER_DESC")) { + LOGGER__WARNING("Using desc instead of CCB for inter context channels is not optimal for performance.\n"); + return false; + } else { + return true; + } + case StreamingType::CIRCULAR_CONTINUOS: + // On circular_continuous (aka ddr), the buffers are relatively small and we want to verify the C2C mechanism, + // therefore the CCB is the default behaviour. + if (nullptr != std::getenv("HAILO_FORCE_DDR_CHANNEL_OVER_DESC")) { + LOGGER__WARNING("Using desc instead of CCB for ddr channel is not optimal for performance.\n"); + return false; + } else { + return true; + } + } + + // Shouldn't reach here + assert(false); + return false; +} + +} /* namespace hailort */ diff --git a/hailort/libhailort/src/core_op/resource_manager/intermediate_buffer.hpp b/hailort/libhailort/src/core_op/resource_manager/intermediate_buffer.hpp new file mode 100644 index 0000000..0d4deca --- /dev/null +++ b/hailort/libhailort/src/core_op/resource_manager/intermediate_buffer.hpp @@ -0,0 +1,65 @@ +/** + * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved. + * Distributed under the MIT license (https://opensource.org/licenses/MIT) + **/ +/** + * @file intermediate_buffer.hpp + * @brief Manages intermediate buffers, including inter-context and ddr buffers. + */ + +#ifndef _HAILO_INTERMEDIATE_BUFFER_HPP_ +#define _HAILO_INTERMEDIATE_BUFFER_HPP_ + +#include "hailo/expected.hpp" +#include "hailo/buffer.hpp" + +#include "os/hailort_driver.hpp" +#include "vdma/memory/vdma_buffer.hpp" + +#include "control_protocol.h" + + +namespace hailort +{ + +class IntermediateBuffer final { +public: + + enum class StreamingType { + // Used for inter-context buffer. The buffer is not circular and the data is fetched in bursts. + BURST, + + // Used for ddr-channel buffers. The buffer is circular and fetched continuously. + CIRCULAR_CONTINUOS, + }; + + static Expected create(HailoRTDriver &driver, uint32_t transfer_size, + uint16_t max_batch_size, vdma::ChannelId d2h_channel_id, StreamingType streaming_type); + + hailo_status set_dynamic_batch_size(uint16_t batch_size); + Expected read(); + CONTROL_PROTOCOL__host_buffer_info_t get_host_buffer_info() const; + +private: + IntermediateBuffer(std::unique_ptr &&buffer, uint32_t transfer_size, uint16_t batch_size, + StreamingType streaming_type); + + static Expected> create_sg_buffer(HailoRTDriver &driver, + uint32_t transfer_size, uint16_t batch_size, vdma::ChannelId d2h_channel_id, bool is_circular); + static Expected> create_ccb_buffer(HailoRTDriver &driver, + uint32_t transfer_size, uint16_t batch_size, bool is_circular); + static Expected> create_buffer(HailoRTDriver &driver, uint32_t transfer_size, + uint16_t max_batch_size, vdma::ChannelId d2h_channel_id, StreamingType streaming_type); + + static bool should_use_ccb(HailoRTDriver &driver, StreamingType streaming_type); + + std::unique_ptr m_buffer; + const uint32_t m_transfer_size; + const uint16_t m_max_batch_size; + const StreamingType m_streaming_type; + uint16_t m_dynamic_batch_size; +}; + +} /* namespace hailort */ + +#endif /* _HAILO_INTERMEDIATE_BUFFER_HPP_ */ \ No newline at end of file diff --git a/hailort/libhailort/src/core_op/resource_manager/resource_manager.cpp b/hailort/libhailort/src/core_op/resource_manager/resource_manager.cpp index 64b97e9..be3ac4c 100644 --- a/hailort/libhailort/src/core_op/resource_manager/resource_manager.cpp +++ b/hailort/libhailort/src/core_op/resource_manager/resource_manager.cpp @@ -2,6 +2,7 @@ #include "core_op/resource_manager/resource_manager.hpp" #include "vdma/channel/boundary_channel.hpp" +#include "vdma/memory/buffer_requirements.hpp" #include "device_common/control.hpp" #include @@ -42,14 +43,24 @@ ContextSwitchBufferBuilder &ContextResources::builder() return m_builder; } -void ContextResources::add_edge_layer(const LayerInfo &layer_info, vdma::ChannelId channel_id, - const CONTROL_PROTOCOL__host_buffer_info_t &buffer_info) +hailo_status ContextResources::add_edge_layer(const LayerInfo &layer_info, vdma::ChannelId channel_id, + const CONTROL_PROTOCOL__host_buffer_info_t &buffer_info, const SupportedFeatures &supported_features) { + auto status = validate_edge_layer(layer_info, channel_id, supported_features); + CHECK_SUCCESS(status); + m_edge_layers.emplace_back(EdgeLayer{ layer_info, channel_id, buffer_info }); + + return HAILO_SUCCESS; +} + +void ContextResources::add_ddr_channels_info(const DdrChannelsInfo &ddr_info) +{ + m_ddr_channels_infos.emplace_back(ddr_info); } std::vector ContextResources::get_edge_layers() const @@ -80,10 +91,11 @@ std::vector ContextResources::get_edge_layers(LayerType layer_type, h return edge_layers; } -Expected ContextResources::get_edge_layer_by_stream_index(uint8_t stream_index) const +Expected ContextResources::get_edge_layer_by_stream_index(const uint8_t stream_index, + const hailo_stream_direction_t direction) const { for (const auto &edge_layer : m_edge_layers) { - if (edge_layer.layer_info.stream_index == stream_index) { + if ((stream_index == edge_layer.layer_info.stream_index) && (direction == edge_layer.layer_info.direction)) { return EdgeLayer(edge_layer); } } @@ -92,21 +104,11 @@ Expected ContextResources::get_edge_layer_by_stream_index(uint8_t str return make_unexpected(HAILO_INTERNAL_FAILURE); } - -ExpectedRef ContextResources::create_ddr_channels_pair(const DdrChannelsInfo &ddr_info) -{ - auto buffer = DdrChannelsPair::create(m_driver, ddr_info); - CHECK_EXPECTED(buffer); - - m_ddr_channels_pairs.emplace_back(buffer.release()); - return std::ref(m_ddr_channels_pairs.back()); -} - -ExpectedRef ContextResources::get_ddr_channels_pair(uint8_t d2h_stream_index) const +Expected ContextResources::get_ddr_channels_info(uint8_t d2h_stream_index) const { - for (auto &ddr_channels_pair : m_ddr_channels_pairs) { - if (ddr_channels_pair.info().d2h_stream_index == d2h_stream_index) { - return std::ref(ddr_channels_pair); + for (const auto &ddr_channels_info : m_ddr_channels_infos) { + if (ddr_channels_info.d2h_stream_index == d2h_stream_index) { + return DdrChannelsInfo{ddr_channels_info}; } } @@ -114,18 +116,39 @@ ExpectedRef ContextResources::get_ddr_channels_pair(uint8 return make_unexpected(HAILO_INTERNAL_FAILURE); } -const std::vector &ContextResources::get_ddr_channels_pairs() const +const std::vector &ContextResources::get_ddr_channels_infos() const { - return m_ddr_channels_pairs; + return m_ddr_channels_infos; } -hailo_status ContextResources::validate_edge_layers() +hailo_status ContextResources::validate_edge_layer(const LayerInfo &layer_info, vdma::ChannelId channel_id, + const SupportedFeatures &supported_features) { - std::set used_channel_ids; + bool stream_index_already_used = false; + for (const auto &edge_layer : m_edge_layers) { - CHECK(used_channel_ids.find(edge_layer.channel_id) == used_channel_ids.end(), HAILO_INTERNAL_FAILURE, - "Same stream use the same channel id {}", edge_layer.channel_id); - used_channel_ids.insert(edge_layer.channel_id); + CHECK(!(edge_layer.channel_id == channel_id), HAILO_INTERNAL_FAILURE, + "Same stream use the same channel id {}", channel_id); + + // In Activation Context it is ok to have multiple edge layers with same stream index seeing as they could be for + // Different contexts etc... + if (CONTROL_PROTOCOL__CONTEXT_SWITCH_CONTEXT_TYPE_ACTIVATION != m_builder.get_context_type()) { + if (edge_layer.layer_info.stream_index == layer_info.stream_index) { + // Validate that the amount of edge layers with the same stream index per context is 2 (And with opposite directions) + // In the case of dual direction supported feature - otherwise 1 + if (supported_features.dual_direction_stream_index) { + CHECK(!stream_index_already_used, HAILO_INTERNAL_FAILURE, + "Stream Index {} used for too many edge layers in one context", edge_layer.layer_info.stream_index); + CHECK(layer_info.direction != edge_layer.layer_info.direction, HAILO_INTERNAL_FAILURE, + "Stream Index {} used for other edge layer in same direction", edge_layer.layer_info.stream_index); + stream_index_already_used = true; + } else { + LOGGER__ERROR("Stream Index {} used for too many edge layers in one context", + edge_layer.layer_info.stream_index); + return HAILO_INTERNAL_FAILURE; + } + } + } } return HAILO_SUCCESS; @@ -169,7 +192,7 @@ static Expected create_latency_meters_from_config_params( LatencyMetersMap latency_meters_map; if ((config_params.latency & HAILO_LATENCY_MEASURE) == HAILO_LATENCY_MEASURE) { - // Best affort for starting latency meter. + // Best effort for starting latency meter. auto networks_names = core_op_metadata->get_network_names(); for (auto &network_name : networks_names) { auto layer_infos = core_op_metadata->get_all_layer_infos(network_name); @@ -196,7 +219,7 @@ Expected ResourcesManager::create(VdmaDevice &vdma_device, Hai const auto &config_channels_info = core_op_metadata->config_channels_info(); config_channels_ids.reserve(config_channels_info.size()); for (uint8_t cfg_index = 0; cfg_index < config_channels_info.size(); cfg_index++) { - const auto layer_identifier = std::make_tuple(LayerType::CFG, "", cfg_index); + const auto layer_identifier = std::make_tuple(LayerType::CFG, HAILO_H2D_STREAM, "", cfg_index); const auto engine_index = config_channels_info[cfg_index].engine_index; auto channel_id = allocator.get_available_channel_id(layer_identifier, HailoRTDriver::DmaDirection::H2D, engine_index); CHECK_EXPECTED(channel_id); @@ -225,7 +248,7 @@ ResourcesManager::ResourcesManager(VdmaDevice &vdma_device, HailoRTDriver &drive m_vdma_device(vdma_device), m_driver(driver), m_config_params(config_params), - m_inter_context_buffers(), + m_intermediate_buffers(), m_core_op_metadata(std::move(core_op_metadata)), m_core_op_index(core_op_index), m_dynamic_context_count(0), @@ -244,7 +267,7 @@ ResourcesManager::ResourcesManager(ResourcesManager &&other) noexcept : m_vdma_device(other.m_vdma_device), m_driver(other.m_driver), m_config_params(other.m_config_params), - m_inter_context_buffers(std::move(other.m_inter_context_buffers)), + m_intermediate_buffers(std::move(other.m_intermediate_buffers)), m_core_op_metadata(std::move(other.m_core_op_metadata)), m_core_op_index(other.m_core_op_index), m_dynamic_context_count(std::exchange(other.m_dynamic_context_count, static_cast(0))), @@ -340,6 +363,24 @@ void ResourcesManager::process_interrupts(IrqData &&irq_data) } } +// TODO: after adding NMS single int, we can create an async channel for async nms output stream (HRT-10553) +Expected ResourcesManager::get_boundary_vdma_channel_type(const LayerInfo &layer_info) +{ + CHECK_AS_EXPECTED(contains(m_config_params.stream_params_by_name, layer_info.name), HAILO_INVALID_ARGUMENT, + "Can't find stream params for layer {}", layer_info.name); + const auto async_stream = (0 != (m_config_params.stream_params_by_name.at(layer_info.name).flags & HAILO_STREAM_FLAGS_ASYNC)); + if (async_stream) { + // NMS async streams use buffered channels + if (layer_info.format.order == HAILO_FORMAT_ORDER_HAILO_NMS) { + return vdma::BoundaryChannel::Type::BUFFERED; + } + // Non-nms async streams use async channels + return vdma::BoundaryChannel::Type::ASYNC; + } + // Buffered streams => buffered channels + return vdma::BoundaryChannel::Type::BUFFERED; +} + hailo_status ResourcesManager::create_boundary_vdma_channel(const LayerInfo &layer_info) { // TODO: put in layer info @@ -349,34 +390,46 @@ hailo_status ResourcesManager::create_boundary_vdma_channel(const LayerInfo &lay channel_direction, layer_info.dma_engine_index); CHECK_EXPECTED_AS_STATUS(channel_id); - auto network_batch_size = get_network_batch_size(layer_info.network_name); + const auto network_batch_size = get_network_batch_size(layer_info.network_name); CHECK_EXPECTED_AS_STATUS(network_batch_size); - uint32_t min_active_trans = MIN_ACTIVE_TRANSFERS_SCALE * network_batch_size.value(); - uint32_t max_active_trans = MAX_ACTIVE_TRANSFERS_SCALE * network_batch_size.value(); + const auto nms_max_detections_per_frame = + layer_info.nms_info.number_of_classes * layer_info.nms_info.max_bboxes_per_class * layer_info.nms_info.chunks_per_frame; + + const auto max_active_transfers_scale = (layer_info.format.order == HAILO_FORMAT_ORDER_HAILO_NMS) ? + (nms_max_detections_per_frame * MAX_ACTIVE_TRANSFERS_SCALE) : MAX_ACTIVE_TRANSFERS_SCALE; + + const auto min_active_trans = MIN_ACTIVE_TRANSFERS_SCALE * network_batch_size.value(); + const auto max_active_trans = (layer_info.format.order == HAILO_FORMAT_ORDER_HAILO_NMS) ? + /* NMS Case - Value be be higher than UINT16_MAX. in this case we only limit to UART16_MAX with no error */ + std::min(static_cast(UINT16_MAX), max_active_transfers_scale * network_batch_size.value()) : + max_active_transfers_scale * network_batch_size.value(); - CHECK(IS_FIT_IN_UINT16(min_active_trans), HAILO_INVALID_ARGUMENT, + CHECK(IS_FIT_IN_UINT16(min_active_trans), HAILO_INVALID_ARGUMENT, "calculated min_active_trans for vdma descriptor list is out of UINT16 range"); - CHECK(IS_FIT_IN_UINT16(max_active_trans), HAILO_INVALID_ARGUMENT, + CHECK(IS_FIT_IN_UINT16(max_active_trans), HAILO_INVALID_ARGUMENT, "calculated min_active_trans for vdma descriptor list is out of UINT16 range"); auto latency_meter = (contains(m_latency_meters, layer_info.network_name)) ? m_latency_meters.at(layer_info.network_name) : nullptr; /* TODO - HRT-6829- page_size should be calculated inside the vDMA channel class create function */ - const auto transfer_size = (layer_info.nn_stream_config.periph_bytes_per_buffer * - layer_info.nn_stream_config.core_buffers_per_frame); - auto desc_sizes_pair = vdma::DescriptorList::get_desc_buffer_sizes_for_single_transfer(m_driver, - static_cast(min_active_trans), static_cast(max_active_trans), transfer_size); - CHECK_EXPECTED_AS_STATUS(desc_sizes_pair); - - const auto page_size = desc_sizes_pair->first; + static const bool IS_CIRCULAR = true; + const auto transfer_size = LayerInfoUtils::get_layer_transfer_size(layer_info); + + auto const DONT_FORCE_DEFAULT_PAGE_SIZE = false; + auto buffer_sizes_requirements = vdma::BufferSizesRequirements::get_sg_buffer_requirements_single_transfer( + m_driver.desc_max_page_size(), static_cast(min_active_trans), static_cast(max_active_trans), + transfer_size, IS_CIRCULAR, DONT_FORCE_DEFAULT_PAGE_SIZE); + CHECK_EXPECTED_AS_STATUS(buffer_sizes_requirements); + + const auto page_size = buffer_sizes_requirements->desc_page_size(); const auto descs_count = (nullptr != std::getenv("HAILO_CONFIGURE_FOR_HW_INFER")) ? - MAX_DESCS_COUNT : desc_sizes_pair->second; + MAX_DESCS_COUNT : buffer_sizes_requirements->descs_count(); - const auto channel_type = (0 == (m_config_params.stream_params_by_name.at(layer_info.name).flags & HAILO_STREAM_FLAGS_ASYNC)) ? - vdma::BoundaryChannel::Type::BUFFERED : vdma::BoundaryChannel::Type::ASYNC; + auto channel_type = get_boundary_vdma_channel_type(layer_info); + CHECK_EXPECTED_AS_STATUS(channel_type); auto channel = vdma::BoundaryChannel::create(channel_id.value(), channel_direction, m_driver, descs_count, page_size, - layer_info.name, latency_meter, network_batch_size.value(), channel_type); + layer_info.name, latency_meter, network_batch_size.value(), channel_type.release()); CHECK_EXPECTED_AS_STATUS(channel); m_boundary_channels.emplace(channel_id.value(), channel.release()); @@ -410,25 +463,23 @@ hailo_power_mode_t ResourcesManager::get_power_mode() const return m_config_params.power_mode; } -ExpectedRef ResourcesManager::create_inter_context_buffer(uint32_t transfer_size, - uint8_t src_stream_index, uint8_t src_context_index, const std::string &network_name, vdma::ChannelId d2h_channel_id) +ExpectedRef ResourcesManager::create_intermediate_buffer(uint32_t transfer_size, + uint16_t batch_size, uint8_t src_stream_index, uint8_t src_context_index, + vdma::ChannelId d2h_channel_id, IntermediateBuffer::StreamingType streaming_type) { - auto network_batch_size_exp = get_network_batch_size(network_name); - CHECK_EXPECTED(network_batch_size_exp); - auto network_batch_size = network_batch_size_exp.value(); - - auto buffer = InterContextBuffer::create(m_driver, transfer_size, network_batch_size, d2h_channel_id); + auto buffer = IntermediateBuffer::create(m_driver, transfer_size, batch_size, d2h_channel_id, + streaming_type); CHECK_EXPECTED(buffer); const auto key = std::make_pair(src_context_index, src_stream_index); - auto emplace_res = m_inter_context_buffers.emplace(key, buffer.release()); + auto emplace_res = m_intermediate_buffers.emplace(key, buffer.release()); return std::ref(emplace_res.first->second); } -ExpectedRef ResourcesManager::get_inter_context_buffer(const IntermediateBufferKey &key) +ExpectedRef ResourcesManager::get_intermediate_buffer(const IntermediateBufferKey &key) { - auto buffer_it = m_inter_context_buffers.find(key); - if (std::end(m_inter_context_buffers) == buffer_it) { + auto buffer_it = m_intermediate_buffers.find(key); + if (std::end(m_intermediate_buffers) == buffer_it) { return make_unexpected(HAILO_NOT_FOUND); } @@ -490,10 +541,15 @@ Expected ResourcesManager::get_default_streams_interfa return m_vdma_device.get_default_streams_interface(); } -hailo_status ResourcesManager::set_inter_context_channels_dynamic_batch_size(uint16_t dynamic_batch_size) +hailo_status ResourcesManager::set_dynamic_batch_size(uint16_t dynamic_batch_size) { - for (auto &key_buff_pair : m_inter_context_buffers) { - const auto status = key_buff_pair.second.reprogram(dynamic_batch_size); + if (CONTROL_PROTOCOL__IGNORE_DYNAMIC_BATCH_SIZE == dynamic_batch_size) { + LOGGER__TRACE("Received CONTROL_PROTOCOL__IGNORE_DYNAMIC_BATCH_SIZE == batch_size"); + return HAILO_SUCCESS; + } + + for (auto &key_buff_pair : m_intermediate_buffers) { + const auto status = key_buff_pair.second.set_dynamic_batch_size(dynamic_batch_size); CHECK_SUCCESS(status); } @@ -520,24 +576,11 @@ Expected ResourcesManager::get_network_batch_size(const std::string &n Expected ResourcesManager::read_intermediate_buffer(const IntermediateBufferKey &key) { - auto inter_context_buffer_it = m_inter_context_buffers.find(key); - if (std::end(m_inter_context_buffers) != inter_context_buffer_it) { - return inter_context_buffer_it->second.read(); - } - - const auto dynamic_context_index = key.first; - const size_t context_index = dynamic_context_index + CONTROL_PROTOCOL__CONTEXT_SWITCH_NUMBER_OF_NON_DYNAMIC_CONTEXTS; - CHECK_AS_EXPECTED(context_index < m_contexts_resources.size(), HAILO_NOT_FOUND, "Context index {} out of range", - dynamic_context_index); - const auto d2h_stream_index = key.second; - if (auto ddr_channels_pair = m_contexts_resources[context_index].get_ddr_channels_pair(d2h_stream_index)) { - return ddr_channels_pair->get().read(); - } - - LOGGER__ERROR("Failed to find intermediate buffer for src_context {}, src_stream_index {}", key.first, + auto intermediate_buffer_it = m_intermediate_buffers.find(key); + CHECK_AS_EXPECTED(std::end(m_intermediate_buffers) != intermediate_buffer_it, + HAILO_NOT_FOUND, "Failed to find intermediate buffer for src_context {}, src_stream_index {}", key.first, key.second); - return make_unexpected(HAILO_NOT_FOUND); - + return intermediate_buffer_it->second.read(); } hailo_status ResourcesManager::configure() @@ -559,9 +602,9 @@ hailo_status ResourcesManager::configure() return HAILO_SUCCESS; } -hailo_status ResourcesManager::enable_state_machine(uint16_t dynamic_batch_size) +hailo_status ResourcesManager::enable_state_machine(uint16_t dynamic_batch_size, uint16_t batch_count) { - return Control::enable_core_op(m_vdma_device, m_core_op_index, dynamic_batch_size); + return Control::enable_core_op(m_vdma_device, m_core_op_index, dynamic_batch_size, batch_count); } hailo_status ResourcesManager::reset_state_machine(bool keep_nn_config_during_reset) @@ -627,9 +670,8 @@ Expected ResourcesManager::program_desc_for_hw_only_flow(std::shared_p for (uint16_t transfer_index = 0; transfer_index < dynamic_batch_size; transfer_index++) { const auto last_desc_interrupts_domain = ((dynamic_batch_size - 1) == transfer_index) ? vdma::InterruptsDomain::DEVICE : vdma::InterruptsDomain::NONE; - static const auto BUFFER_NOT_CIRCULAR = false; auto desc_count_local = desc_list->program_last_descriptor(single_transfer_size, - last_desc_interrupts_domain, acc_desc_offset, BUFFER_NOT_CIRCULAR); + last_desc_interrupts_domain, acc_desc_offset); CHECK_EXPECTED(desc_count_local, "Failed to program descs for inter context channels. Given max_batch_size is too big."); acc_desc_offset += desc_count_local.value(); } @@ -640,7 +682,7 @@ Expected ResourcesManager::program_desc_for_hw_only_flow(std::shared_p } Expected> ResourcesManager::create_mapped_buffer_for_hw_only_infer( - vdma::BoundaryChannelPtr boundary_channel_ptr, const hailo_vdma_buffer_direction_flags_t direction, + vdma::BoundaryChannelPtr boundary_channel_ptr, const HailoRTDriver::DmaDirection direction, const uint32_t single_transfer_size, const uint16_t dynamic_batch_size, const uint16_t batch_count) { auto total_frames_per_run = dynamic_batch_size * batch_count; @@ -652,15 +694,12 @@ Expected> ResourcesManager::create_mapped_b CHECK_AS_EXPECTED(IS_FIT_IN_UINT16(total_desc_count), HAILO_INVALID_ARGUMENT, "calculated total_desc_count for vdma descriptor list is out of UINT16 range"); - auto mapped_buffer_exp = DmaMappedBuffer::create(total_desc_count * desc_list->desc_page_size(), direction, m_vdma_device); - CHECK_EXPECTED(mapped_buffer_exp); - - auto mapped_buffer = make_shared_nothrow(mapped_buffer_exp.release()); - CHECK_NOT_NULL_AS_EXPECTED(mapped_buffer, HAILO_OUT_OF_HOST_MEMORY); - m_hw_only_boundary_buffers.push_back(mapped_buffer); + auto mapped_buffer = vdma::MappedBuffer::create_shared(m_driver, direction, total_desc_count * desc_list->desc_page_size()); + CHECK_EXPECTED(mapped_buffer); + m_hw_only_boundary_buffers.emplace_back(mapped_buffer.release()); uint32_t STARTING_DESC = 0; - auto status = desc_list->configure_to_use_buffer(*mapped_buffer, boundary_channel_ptr->get_channel_id(), STARTING_DESC); + auto status = desc_list->configure_to_use_buffer(*m_hw_only_boundary_buffers.back(), boundary_channel_ptr->get_channel_id(), STARTING_DESC); CHECK_SUCCESS_AS_EXPECTED(status); auto desc_programed = program_desc_for_hw_only_flow(desc_list, single_transfer_size, dynamic_batch_size, batch_count); @@ -684,13 +723,32 @@ void ResourcesManager::add_channel_to_hw_infer_channel_info(std::pair(opaque)->notify_one(); + return; + }; + + auto status = get_device().set_notification_callback(callback, + HAILO_NOTIFICATION_ID_HW_INFER_MANAGER_INFER_DONE, static_cast(&infer_done_cond)); + CHECK_SUCCESS(status); + + return HAILO_SUCCESS; +} + Expected ResourcesManager::calc_hw_infer_batch_count(uint16_t dynamic_batch_size) { uint16_t batch_count = UINT16_MAX; for (const auto &layer_info : m_core_op_metadata->get_all_layer_infos()) { const auto stream_info = LayerInfoUtils::get_stream_info_from_layer_info(layer_info); - const auto single_transfer_size = (HAILO_FORMAT_ORDER_HAILO_NMS == stream_info.format.order) ? - stream_info.nms_info.bbox_size : stream_info.hw_frame_size; + uint32_t single_transfer_size = LayerInfoUtils::get_stream_transfer_size(stream_info, layer_info); auto boundary_channel_ptr_exp = get_boundary_vdma_channel_by_stream_name(layer_info.name); CHECK_EXPECTED(boundary_channel_ptr_exp); auto boundary_channel_ptr = boundary_channel_ptr_exp.release(); @@ -701,33 +759,40 @@ Expected ResourcesManager::calc_hw_infer_batch_count(uint16_t dynamic_ return batch_count; } -void ResourcesManager::hw_infer_calc_stats(uint16_t batch_count, uint16_t dynamic_batch_size, +HwInferResults ResourcesManager::hw_infer_calc_stats(uint16_t batch_count, uint16_t dynamic_batch_size, size_t single_frame_transfer_size, uint32_t infer_cycles) { - const auto total_transfer_size = single_frame_transfer_size * dynamic_batch_size * batch_count; - const auto total_frames = dynamic_batch_size * batch_count; + HwInferResults hw_infer_results{}; + const size_t total_transfer_size = single_frame_transfer_size * dynamic_batch_size * batch_count; + const size_t total_frames_passed = dynamic_batch_size * batch_count; // TODO - get clock rate from Chip (still not supported in VPU mode) const float32_t CPU_CLOCK_RATE = static_cast(5.0 / (1000 * 1000 * 1000)); const float32_t time_sec = static_cast(infer_cycles) * CPU_CLOCK_RATE; - const float32_t fps = static_cast(total_frames) / time_sec; + const float32_t fps = static_cast(total_frames_passed) / time_sec; const float32_t BYTE_TO_BIT = 8.0; const float32_t BITS_TO_GBIT = static_cast(1.0 * 1000 * 1000 * 1000); const float32_t BW_Gbps = static_cast(total_transfer_size) * BYTE_TO_BIT / time_sec / BITS_TO_GBIT; - LOGGER__ERROR("\nBatch count - {}\nTotal transfer size: {}\ntotal_frames - {}\ntime_sec - {}\nfps - {}\nBW_Gbps - {}", - batch_count, total_transfer_size, total_frames, time_sec, fps, BW_Gbps); + + /* Prepare results */ + hw_infer_results.batch_count = batch_count; + hw_infer_results.total_transfer_size = total_transfer_size; + hw_infer_results.total_frames_passed = total_frames_passed; + hw_infer_results.time_sec = time_sec; + hw_infer_results.fps = fps; + hw_infer_results.BW_Gbps = BW_Gbps; + + return hw_infer_results; } -Expected ResourcesManager::run_hw_only_infer(uint16_t dynamic_batch_size) +Expected ResourcesManager::run_hw_only_infer() { - CONTROL_PROTOCOL__hw_only_infer_results_t infer_results = {}; - CONTROL_PROTOCOL__hw_infer_channels_info_t channels_info = {}; + CONTROL_PROTOCOL__hw_only_infer_results_t fw_infer_results{}; + CONTROL_PROTOCOL__hw_infer_channels_info_t channels_info{}; channels_info.channel_count = 0; + static constexpr auto INFER_TIMEOUT = std::chrono::milliseconds(120000); - CHECK_AS_EXPECTED(dynamic_batch_size <= m_config_params.batch_size, HAILO_INVALID_ARGUMENT, - "Dynamic batch size must be up to configured batch size"); - - auto batch_count = calc_hw_infer_batch_count(dynamic_batch_size); + auto batch_count = calc_hw_infer_batch_count(m_config_params.batch_size); CHECK_EXPECTED(batch_count); for (const auto &layer_info : m_core_op_metadata->get_all_layer_infos()) { @@ -737,31 +802,36 @@ Expected ResourcesManager::run_hw_onl auto single_transfer_size = (HAILO_FORMAT_ORDER_HAILO_NMS == stream_info.format.order) ? stream_info.nms_info.bbox_size : stream_info.hw_frame_size; const auto direction = (layer_info.direction == HAILO_H2D_STREAM) ? - HAILO_VDMA_BUFFER_DIRECTION_FLAGS_H2D : HAILO_VDMA_BUFFER_DIRECTION_FLAGS_D2H; + HailoRTDriver::DmaDirection::H2D : HailoRTDriver::DmaDirection::D2H; auto channel_info_pair = create_mapped_buffer_for_hw_only_infer(boundary_channel_ptr.release(), direction, - single_transfer_size, dynamic_batch_size, batch_count.value()); + single_transfer_size, m_config_params.batch_size, batch_count.value()); CHECK_EXPECTED(channel_info_pair); add_channel_to_hw_infer_channel_info(channel_info_pair.release(), channels_info); } - auto status = Control::start_hw_only_infer(m_vdma_device, m_core_op_index, dynamic_batch_size, &channels_info); + std::condition_variable infer_done_cond; + auto status = set_hw_infer_done_notification(infer_done_cond); + CHECK_SUCCESS_AS_EXPECTED(status); + + std::mutex mutex; + std::unique_lock lock(mutex); + + status = Control::start_hw_only_infer(m_vdma_device, m_core_op_index, m_config_params.batch_size, + batch_count.value(), &channels_info); CHECK_SUCCESS_AS_EXPECTED(status); - // Delay until infer ends - // TODO HRT-9829 - chagne to notification from FW - std::this_thread::sleep_for(std::chrono::milliseconds(20000)); + infer_done_cond.wait_for(lock, INFER_TIMEOUT); - status = Control::stop_hw_only_infer(m_vdma_device, &infer_results); + status = Control::stop_hw_only_infer(m_vdma_device, &fw_infer_results); CHECK_SUCCESS_AS_EXPECTED(status); auto single_frame_transfer_size = m_core_op_metadata->get_total_transfer_size(); CHECK_EXPECTED(single_frame_transfer_size); - hw_infer_calc_stats(batch_count.value(), dynamic_batch_size, single_frame_transfer_size.release(), infer_results.infer_cycles); - - return infer_results; + return hw_infer_calc_stats(batch_count.value(), m_config_params.batch_size, single_frame_transfer_size.release(), + fw_infer_results.infer_cycles); } } /* namespace hailort */ diff --git a/hailort/libhailort/src/core_op/resource_manager/resource_manager.hpp b/hailort/libhailort/src/core_op/resource_manager/resource_manager.hpp index 9417167..3a6b4db 100644 --- a/hailort/libhailort/src/core_op/resource_manager/resource_manager.hpp +++ b/hailort/libhailort/src/core_op/resource_manager/resource_manager.hpp @@ -28,8 +28,7 @@ #include "hailo/hailort.h" -#include "core_op/resource_manager/inter_context_buffer.hpp" -#include "core_op/resource_manager/ddr_channels_pair.hpp" +#include "core_op/resource_manager/intermediate_buffer.hpp" #include "core_op/resource_manager/config_buffer.hpp" #include "core_op/resource_manager/channel_allocator.hpp" #include "core_op/resource_manager/context_switch_buffer_builder.hpp" @@ -42,6 +41,7 @@ namespace hailort { #define DEFAULT_ACTUAL_BATCH_SIZE (1) +#define MAX_NUMBER_DATA_STREAM_INDEX (20) struct EdgeLayer { @@ -50,6 +50,39 @@ struct EdgeLayer { CONTROL_PROTOCOL__host_buffer_info_t buffer_info; }; +struct DdrChannelsInfo +{ + vdma::ChannelId d2h_channel_id; + uint8_t d2h_stream_index; + vdma::ChannelId h2d_channel_id; + uint8_t h2d_stream_index; + CONTROL_PROTOCOL__host_buffer_info_t host_buffer_info; + uint8_t network_index; + uint16_t row_size; + uint16_t min_buffered_rows; + // total_buffers_per_frame not same as core_buffer_per frame. + //(In DDR core buffer per frame is 1). Used to calc total host descriptors_per_frame. + uint16_t total_buffers_per_frame; + + // Checks if the credits are automaticaly going from d2h channel to its h2d channel, or it needs to be done manually + // (Using a fw task). + bool need_manual_credit_management() const + { + return host_buffer_info.buffer_type == CONTROL_PROTOCOL__HOST_BUFFER_TYPE_EXTERNAL_DESC; + } + + uint16_t descs_count() const + { + assert(IS_FIT_IN_UINT16(host_buffer_info.total_desc_count)); + return static_cast(host_buffer_info.total_desc_count); + } + + uint32_t descriptors_per_frame() const + { + return (row_size / host_buffer_info.desc_page_size) * total_buffers_per_frame; + } +}; + class ContextResources final { public: static Expected create(HailoRTDriver &driver, CONTROL_PROTOCOL__context_switch_context_type_t context_type, @@ -58,21 +91,23 @@ public: const std::vector &get_controls() const; ContextSwitchBufferBuilder &builder(); - void add_edge_layer(const LayerInfo &layer_info, vdma::ChannelId channel_id, - const CONTROL_PROTOCOL__host_buffer_info_t &buffer_info); + hailo_status add_edge_layer(const LayerInfo &layer_info, vdma::ChannelId channel_id, + const CONTROL_PROTOCOL__host_buffer_info_t &buffer_info, const SupportedFeatures &supported_features); + void add_ddr_channels_info(const DdrChannelsInfo &ddr_info); std::vector get_edge_layers() const; std::vector get_edge_layers(LayerType layer_type) const; std::vector get_edge_layers(hailo_stream_direction_t direction) const; std::vector get_edge_layers(LayerType layer_type, hailo_stream_direction_t direction) const; - Expected get_edge_layer_by_stream_index(uint8_t stream_index) const; + Expected get_edge_layer_by_stream_index(const uint8_t stream_index, + const hailo_stream_direction_t direction) const; - ExpectedRef create_ddr_channels_pair(const DdrChannelsInfo &ddr_info); - ExpectedRef get_ddr_channels_pair(uint8_t d2h_stream_index) const; - const std::vector &get_ddr_channels_pairs() const; + Expected get_ddr_channels_info(uint8_t d2h_stream_index) const; + const std::vector &get_ddr_channels_infos() const; - hailo_status validate_edge_layers(); + hailo_status validate_edge_layer(const LayerInfo &layer_info, vdma::ChannelId channel_id, + const SupportedFeatures &supported_features); std::vector &get_config_buffers(); @@ -87,9 +122,9 @@ private: std::reference_wrapper m_driver; ContextSwitchBufferBuilder m_builder; std::vector m_config_buffers; - std::vector m_ddr_channels_pairs; std::vector m_edge_layers; + std::vector m_ddr_channels_infos; }; class ResourcesManager final @@ -106,9 +141,11 @@ public: ResourcesManager &operator=(ResourcesManager &&other) = delete; ResourcesManager(ResourcesManager &&other) noexcept; - ExpectedRef create_inter_context_buffer(uint32_t transfer_size, uint8_t src_stream_index, - uint8_t src_context_index, const std::string &network_name, vdma::ChannelId d2h_channel_id); - ExpectedRef get_inter_context_buffer(const IntermediateBufferKey &key); + ExpectedRef create_intermediate_buffer(uint32_t transfer_size, uint16_t batch_size, + uint8_t src_stream_index, uint8_t src_context_index, vdma::ChannelId d2h_channel_id, + IntermediateBuffer::StreamingType streaming_type); + ExpectedRef get_intermediate_buffer(const IntermediateBufferKey &key); + Expected get_boundary_vdma_channel_type(const LayerInfo &layer_info); hailo_status create_boundary_vdma_channel(const LayerInfo &layer_info); Expected get_control_core_op_header(); @@ -149,9 +186,10 @@ public: Expected read_intermediate_buffer(const IntermediateBufferKey &key); - hailo_status set_inter_context_channels_dynamic_batch_size(uint16_t dynamic_batch_size); + hailo_status set_dynamic_batch_size(uint16_t dynamic_batch_size); hailo_status configure(); - hailo_status enable_state_machine(uint16_t dynamic_batch_size); + hailo_status enable_state_machine(uint16_t dynamic_batch_size, + uint16_t batch_count = CONTROL_PROTOCOL__INIFINITE_BATCH_COUNT); hailo_status reset_state_machine(bool keep_nn_config_during_reset = false); hailo_status cancel_pending_async_transfers(); hailo_status start_vdma_interrupts_dispatcher(); @@ -163,14 +201,15 @@ public: Expected program_desc_for_hw_only_flow(std::shared_ptr desc_list, const uint32_t single_transfer_size, const uint16_t dynamic_batch_size, const uint16_t batch_count); Expected> create_mapped_buffer_for_hw_only_infer( - vdma::BoundaryChannelPtr boundary_channel_ptr, const hailo_vdma_buffer_direction_flags_t direction, + vdma::BoundaryChannelPtr boundary_channel_ptr, const HailoRTDriver::DmaDirection direction, const uint32_t single_transfer_size, const uint16_t dynamic_batch_size, const uint16_t batch_count); void add_channel_to_hw_infer_channel_info(std::pair channel_info, CONTROL_PROTOCOL__hw_infer_channels_info_t &channels_info); Expected calc_hw_infer_batch_count(uint16_t dynamic_batch_size); - void hw_infer_calc_stats(uint16_t batch_count, uint16_t dynamic_batch_size, + HwInferResults hw_infer_calc_stats(uint16_t batch_count, uint16_t dynamic_batch_size, size_t single_frame_transfer_size, uint32_t infer_cycles); - Expected run_hw_only_infer(uint16_t dynamic_batch_size); + hailo_status set_hw_infer_done_notification(std::condition_variable &infer_done_cond); + Expected run_hw_only_infer(); private: hailo_status fill_infer_features(CONTROL_PROTOCOL__application_header_t &app_header); @@ -184,7 +223,7 @@ private: VdmaDevice &m_vdma_device; HailoRTDriver &m_driver; const ConfigureNetworkParams m_config_params; - std::map m_inter_context_buffers; + std::map m_intermediate_buffers; std::shared_ptr m_core_op_metadata; uint8_t m_core_op_index; uint8_t m_dynamic_context_count; @@ -198,7 +237,7 @@ private: // config_stream_index. std::vector m_config_channels_ids; // Mapped buffers would be used only in hw only flow - std::vector> m_hw_only_boundary_buffers; + std::vector> m_hw_only_boundary_buffers; ResourcesManager(VdmaDevice &vdma_device, HailoRTDriver &driver, ChannelAllocator &&channel_allocator, const ConfigureNetworkParams config_params, diff --git a/hailort/libhailort/src/core_op/resource_manager/resource_manager_builder.cpp b/hailort/libhailort/src/core_op/resource_manager/resource_manager_builder.cpp index 0218b10..b05b833 100644 --- a/hailort/libhailort/src/core_op/resource_manager/resource_manager_builder.cpp +++ b/hailort/libhailort/src/core_op/resource_manager/resource_manager_builder.cpp @@ -15,7 +15,7 @@ namespace hailort { -static uint16_t calculate_periph_buffers_per_frame(const CONTROL_PROTOCOL__hw_consts_t &hw_consts, +static uint16_t calculate_power_optimized_periph_buffers_per_frame(const CONTROL_PROTOCOL__hw_consts_t &hw_consts, uint16_t min_periph_buffers_per_frame, uint32_t frame_size, uint16_t periph_buffers_per_frame) { const auto max_periph_buffers_per_frame = MIN(frame_size, static_cast(hw_consts.max_periph_buffers_per_frame)); @@ -37,78 +37,165 @@ static uint16_t calculate_periph_buffers_per_frame(const CONTROL_PROTOCOL__hw_co } } -static hailo_status calculate_credit_params(const CONTROL_PROTOCOL__hw_consts_t &hw_consts, uint16_t desc_page_size, - hailo_stream_direction_t direction, bool should_optimize_credits, uint16_t *periph_bytes_per_buffer, - uint16_t *periph_buffers_per_frame) +static Expected calculate_credit_params(const CONTROL_PROTOCOL__hw_consts_t &hw_consts, uint16_t desc_page_size, + bool should_optimize_credits, const LayerInfo &layer_info) { // Next parameters differ between RX and TX - auto local_periph_bytes_per_buffer = (*periph_bytes_per_buffer); - auto local_periph_buffers_per_frame = (*periph_buffers_per_frame); - uint32_t periph_frame_size = (*periph_bytes_per_buffer) * (*periph_buffers_per_frame); - const auto max_bytes_per_buffer = MAX(hw_consts.max_acceptable_bytes_per_buffer, (*periph_bytes_per_buffer)); + auto local_periph_bytes_per_buffer = layer_info.nn_stream_config.periph_bytes_per_buffer; + auto local_periph_buffers_per_frame = layer_info.nn_stream_config.periph_buffers_per_frame; + uint32_t periph_frame_size = local_periph_bytes_per_buffer * local_periph_buffers_per_frame; + const auto max_bytes_per_buffer = MAX(hw_consts.max_acceptable_bytes_per_buffer, local_periph_bytes_per_buffer); - if (0 != (local_periph_bytes_per_buffer % hw_consts.fifo_word_granularity_bytes)) { - return HAILO_INTERNAL_FAILURE; - } + CHECK_AS_EXPECTED(0 == (local_periph_bytes_per_buffer % hw_consts.fifo_word_granularity_bytes), HAILO_INTERNAL_FAILURE, + "Error, Invalid periph bytes ber puffer value {} must divide by {} with no remainder", + local_periph_bytes_per_buffer, hw_consts.fifo_word_granularity_bytes); if (should_optimize_credits) { // If credits optimizations flag is on, assuming periph_buffers_per_frame * periph_bytes_per_buffer == periph_frame_size // Find the lowest periph_buffers_per_frame that divides periph_frame_size and is bigger than periph_frame_size / max_bytes_per_buffer // Also, periph_bytes_per_buffer must be a multiple of 8 const auto min_periph_buffers_per_frame = DIV_ROUND_UP(periph_frame_size, max_bytes_per_buffer); - local_periph_buffers_per_frame = calculate_periph_buffers_per_frame(hw_consts, static_cast(min_periph_buffers_per_frame), - periph_frame_size, local_periph_buffers_per_frame); + local_periph_buffers_per_frame = calculate_power_optimized_periph_buffers_per_frame(hw_consts, + static_cast(min_periph_buffers_per_frame), periph_frame_size, local_periph_buffers_per_frame); assert(IS_FIT_IN_UINT16(periph_frame_size / local_periph_buffers_per_frame)); local_periph_bytes_per_buffer = static_cast(periph_frame_size / local_periph_buffers_per_frame); // Must be integer according to last function } // Periph credits size must be lower than the following value to make sure that the credit size allows // for at least desc_page_size bytes left in the FIFO for the last descriptor in the pattern - if ((direction == HAILO_D2H_STREAM) && - (static_cast(local_periph_bytes_per_buffer) > (hw_consts.outbound_data_stream_size - 8 - desc_page_size))) { - LOGGER__ERROR("Current periph_bytes_per_buffer is {} which is too high. Exiting.", local_periph_bytes_per_buffer); - return HAILO_INTERNAL_FAILURE; + const bool space_left_in_fifo = ((layer_info.direction != HAILO_D2H_STREAM) || + (static_cast(local_periph_bytes_per_buffer) <= (hw_consts.outbound_data_stream_size - 8 - desc_page_size))); + CHECK_AS_EXPECTED(space_left_in_fifo, HAILO_INTERNAL_FAILURE, + "Current periph_bytes_per_buffer is {} which is too high. Exiting.", local_periph_bytes_per_buffer); + + auto updated_layer_info = layer_info; + updated_layer_info.nn_stream_config.periph_bytes_per_buffer = local_periph_bytes_per_buffer; + updated_layer_info.nn_stream_config.periph_buffers_per_frame = local_periph_buffers_per_frame; + + return updated_layer_info; +} + +// NOTE: in case of ddr where periph is aligned to PERIPH_BYTES_PER_BUFFER_DDR_ALIGNMENT_SIZE we cant force that +// periph_bytes_per_buffer * periph_buffers_per_frame will equal exactly hw_frame_size. +static bool is_logical_periph_bytes_per_buffer(const uint32_t periph_bytes_per_buffer, const size_t hw_frame_size, const bool is_ddr, + const uint32_t max_shmifo_size, const uint32_t desc_page_size, const uint32_t max_periph_bytes_value, + const uint16_t core_bytes_per_buffer) +{ + if (is_ddr) { + // In DDR there is no residue of descriptor - but has to divide with no remainder by core_bytes_per_buffer + // Calculated by DFC + return (periph_bytes_per_buffer < max_shmifo_size) && (periph_bytes_per_buffer <= max_periph_bytes_value) && + (0 == (core_bytes_per_buffer % periph_bytes_per_buffer)); } + return ((periph_bytes_per_buffer < (max_shmifo_size - desc_page_size)) && + (0 == (hw_frame_size % periph_bytes_per_buffer)) && (periph_bytes_per_buffer <= max_periph_bytes_value)); +} - *periph_bytes_per_buffer = local_periph_bytes_per_buffer; - *periph_buffers_per_frame = local_periph_buffers_per_frame; - return HAILO_SUCCESS; +static Expected> calculate_periph_requirements(const LayerInfo &layer_info, const uint32_t desc_page_size, + const bool is_periph_calculated_in_hailort, const uint32_t max_periph_bytes_value) +{ + // If extension for calculating periph values in hailort is false - copy values from core registers , otherwise + // If extesnion is true - calculate them according to shape and other layer information + if (!is_periph_calculated_in_hailort) { + return std::make_tuple(static_cast(layer_info.nn_stream_config.core_bytes_per_buffer), + static_cast(layer_info.nn_stream_config.core_buffers_per_frame)); + } + + if (HAILO_FORMAT_ORDER_HAILO_NMS == layer_info.format.order) { + CHECK_AS_EXPECTED(IS_FIT_IN_UINT16(layer_info.nms_info.bbox_size * layer_info.nms_info.burst_size), + HAILO_INVALID_HEF, "Invalid burst size"); + return std::make_tuple(static_cast(layer_info.nms_info.bbox_size * layer_info.nms_info.burst_size), + static_cast(1)); + } + + CHECK_AS_EXPECTED(IS_FIT_IN_UINT32(layer_info.hw_shape.width * layer_info.hw_shape.features * + layer_info.hw_shape.height * layer_info.hw_data_bytes), HAILO_INVALID_HEF, "Invalid core frame size"); + + const auto is_ddr = (LayerType::DDR == layer_info.type); + const uint32_t alignment = is_ddr ? PERIPH_BYTES_PER_BUFFER_DDR_ALIGNMENT_SIZE : PERIPH_BYTES_PER_BUFFER_ALIGNMENT_SIZE; + const auto row_size = static_cast(layer_info.hw_shape.width * layer_info.hw_shape.features * + layer_info.hw_data_bytes); + const auto core_frame_size = layer_info.hw_shape.height * row_size; + + // Currently takes the largest periph_bytes_per_buffer that is possible with shmifo size and desc page size + // TODO HRT-10961 : calculate optimal periph size + auto periph_bytes_per_buffer = HailoRTCommon::align_to(row_size, alignment); + while (!is_logical_periph_bytes_per_buffer(periph_bytes_per_buffer, core_frame_size, is_ddr, layer_info.max_shmifo_size, + desc_page_size, max_periph_bytes_value, layer_info.nn_stream_config.core_bytes_per_buffer) && (0 < periph_bytes_per_buffer)) { + periph_bytes_per_buffer -= alignment; + } + + CHECK_AS_EXPECTED(0 != periph_bytes_per_buffer, HAILO_INVALID_ARGUMENT, "Error, Could not find logical periph bytes per buffer value"); + + uint32_t periph_buffers_per_frame = (core_frame_size / periph_bytes_per_buffer); + // In ddr if we get a periph bytes per buffer os small that the periph buffers per frame cant fit in uint16 + // put uint16_t max - seeing as this value doesnt really affect anything and we should not fail in that case. + if (is_ddr && !IS_FIT_IN_UINT16(periph_buffers_per_frame)) { + LOGGER__DEBUG("periph buffers per frame in ddr too large for 16 bit register - putting uint16_t max"); + periph_buffers_per_frame = UINT16_MAX; + } + CHECK_AS_EXPECTED(IS_FIT_IN_UINT16(periph_buffers_per_frame), HAILO_INVALID_ARGUMENT); + + return std::make_tuple(static_cast(periph_bytes_per_buffer), static_cast(periph_buffers_per_frame)); } static Expected update_layer_info(const LayerInfo &original_layer_info, const CONTROL_PROTOCOL__host_buffer_info_t &buffer_info, - const CONTROL_PROTOCOL__hw_consts_t &hw_consts, bool should_optimize_credits) + const CONTROL_PROTOCOL__hw_consts_t &hw_consts, const ProtoHEFHwArch &hw_arch, const bool should_optimize_credits, + const bool is_periph_calculated_in_hailort) { LayerInfo local_layer_info = original_layer_info; - auto status = calculate_credit_params(hw_consts, buffer_info.desc_page_size, local_layer_info.direction, - should_optimize_credits, &local_layer_info.nn_stream_config.periph_bytes_per_buffer, - &local_layer_info.nn_stream_config.periph_buffers_per_frame); - CHECK_SUCCESS_AS_EXPECTED(status); - if (local_layer_info.max_shmifo_size == 0) { local_layer_info.max_shmifo_size = hw_consts.default_initial_credit_size; } - return local_layer_info; + // If Hw padding supported dont update periph registers because they were updated in get_hw_padding + // TODO HRT-11006 : currently check is_hw_padding_supported and the feature_padding_payload because in MIPI Input stream + // Even if is_hw_padding_supported is true we will not use hw padding. + auto max_periph_bytes_from_hef = HefConfigurator::max_periph_bytes_value(DeviceBase::hef_arch_to_device_arch(hw_arch)); + CHECK_EXPECTED(max_periph_bytes_from_hef); + const auto max_periph_bytes = MIN(max_periph_bytes_from_hef.value(), local_layer_info.max_shmifo_size); + + const bool hw_padding_supported = HefConfigurator::is_hw_padding_supported(local_layer_info, + max_periph_bytes) && (0 != original_layer_info.nn_stream_config.feature_padding_payload); + if (!hw_padding_supported) { + // Update periph values + const auto periph_requirements = calculate_periph_requirements(local_layer_info, buffer_info.desc_page_size, + is_periph_calculated_in_hailort, max_periph_bytes); + CHECK_EXPECTED(periph_requirements); + + // Calculate and update value of periph bytes per buffer and periph buffers per frame + local_layer_info.nn_stream_config.periph_bytes_per_buffer = std::get<0>(periph_requirements.value()); + local_layer_info.nn_stream_config.periph_buffers_per_frame = std::get<1>(periph_requirements.value()); + } + + auto updated_local_layer_info = calculate_credit_params(hw_consts, buffer_info.desc_page_size, should_optimize_credits, + local_layer_info); + CHECK_EXPECTED(updated_local_layer_info); + + return updated_local_layer_info; } static hailo_status fill_boundary_input_layer(ContextResources &context_resources, ResourcesManager &resources_manager, const LayerInfo layer_info, const CONTROL_PROTOCOL__hw_consts_t &hw_consts, - bool should_optimize_credits) + const ProtoHEFHwArch &hw_arch, bool should_optimize_credits) { - const auto transfer_size = (layer_info.nn_stream_config.periph_bytes_per_buffer * - layer_info.nn_stream_config.core_buffers_per_frame); + const auto transfer_size = LayerInfoUtils::get_layer_transfer_size(layer_info); auto vdma_channel = resources_manager.get_boundary_vdma_channel_by_stream_name(layer_info.name); CHECK_EXPECTED_AS_STATUS(vdma_channel); const auto buffer_info = vdma_channel.value()->get_boundary_buffer_info(transfer_size); - auto local_layer_info = update_layer_info(layer_info, buffer_info, hw_consts, should_optimize_credits); + const bool is_periph_calculated_in_hailort = resources_manager.get_supported_features().periph_calculation_in_hailort; + auto local_layer_info = update_layer_info(layer_info, buffer_info, hw_consts, hw_arch, should_optimize_credits, + is_periph_calculated_in_hailort); CHECK_EXPECTED_AS_STATUS(local_layer_info); const auto channel_id = vdma_channel.value()->get_channel_id(); - context_resources.add_edge_layer(local_layer_info.value(), channel_id, buffer_info); + auto status = context_resources.add_edge_layer(local_layer_info.value(), channel_id, buffer_info, + resources_manager.get_supported_features()); + CHECK_SUCCESS(status); LOGGER__DEBUG("Boundary input stream: {} h2d_channel: {}.", layer_info.stream_index, channel_id); return HAILO_SUCCESS; @@ -116,7 +203,7 @@ static hailo_status fill_boundary_input_layer(ContextResources &context_resource static hailo_status fill_inter_context_input_layer(ContextResources &context_resources, ResourcesManager &resources_manager, const LayerInfo &layer_info, const CONTROL_PROTOCOL__hw_consts_t &hw_consts, - bool should_optimize_credits) + const ProtoHEFHwArch &hw_arch, bool should_optimize_credits) { const auto channel_id = resources_manager.get_available_channel_id(to_layer_identifier(layer_info), HailoRTDriver::DmaDirection::H2D, layer_info.dma_engine_index); @@ -125,17 +212,19 @@ static hailo_status fill_inter_context_input_layer(ContextResources &context_res /* Get inter context buffer previously created */ const auto &connected_context = layer_info.connected_context_info; auto intermediate_buffer_key = std::make_pair(connected_context.context_index, connected_context.stream_index); - auto inter_context_buffer_exp = resources_manager.get_inter_context_buffer(intermediate_buffer_key); + auto inter_context_buffer_exp = resources_manager.get_intermediate_buffer(intermediate_buffer_key); CHECK_EXPECTED_AS_STATUS(inter_context_buffer_exp, "Failed to find inter context buffer for src context {}, src_stream_index {}", connected_context.context_index, connected_context.stream_index); auto &inter_context_buffer = inter_context_buffer_exp->get(); + const bool is_periph_calculated_in_hailort = resources_manager.get_supported_features().periph_calculation_in_hailort; auto local_layer_info = update_layer_info(layer_info, inter_context_buffer.get_host_buffer_info(), hw_consts, - should_optimize_credits); + hw_arch, should_optimize_credits, is_periph_calculated_in_hailort); CHECK_EXPECTED_AS_STATUS(local_layer_info); - context_resources.add_edge_layer(local_layer_info.value(), channel_id.value(), - inter_context_buffer.get_host_buffer_info()); + auto status = context_resources.add_edge_layer(local_layer_info.value(), channel_id.value(), + inter_context_buffer.get_host_buffer_info(), resources_manager.get_supported_features()); + CHECK_SUCCESS(status); LOGGER__DEBUG("Intermediate input stream {}, src_context:{}, dst_context: {}, h2d_channel {}.", layer_info.stream_index, layer_info.context_index, layer_info.connected_context_info.context_index, @@ -146,20 +235,23 @@ static hailo_status fill_inter_context_input_layer(ContextResources &context_res static hailo_status fill_boundary_output_layer(ContextResources &context_resources, ResourcesManager &resources_manager, const LayerInfo &layer_info, const CONTROL_PROTOCOL__hw_consts_t &hw_consts, - bool should_optimize_credits) + const ProtoHEFHwArch &hw_arch, bool should_optimize_credits) { - const auto transfer_size = (layer_info.nn_stream_config.periph_bytes_per_buffer * - layer_info.nn_stream_config.core_buffers_per_frame); + const auto transfer_size = LayerInfoUtils::get_layer_transfer_size(layer_info); auto vdma_channel = resources_manager.get_boundary_vdma_channel_by_stream_name(layer_info.name); CHECK_EXPECTED_AS_STATUS(vdma_channel); const auto buffer_info = vdma_channel.value()->get_boundary_buffer_info(transfer_size); - auto local_layer_info = update_layer_info(layer_info, buffer_info, hw_consts, should_optimize_credits); + const bool is_periph_calculated_in_hailort = resources_manager.get_supported_features().periph_calculation_in_hailort; + auto local_layer_info = update_layer_info(layer_info, buffer_info, hw_consts, hw_arch, should_optimize_credits, + is_periph_calculated_in_hailort); CHECK_EXPECTED_AS_STATUS(local_layer_info); const auto channel_id = vdma_channel.value()->get_channel_id(); - context_resources.add_edge_layer(local_layer_info.value(), channel_id, buffer_info); + auto status = context_resources.add_edge_layer(local_layer_info.value(), channel_id, buffer_info, + resources_manager.get_supported_features()); + CHECK_SUCCESS(status); LOGGER__DEBUG("Boundary output stream: {} d2h_channel: {}.", layer_info.stream_index, channel_id); return HAILO_SUCCESS; @@ -167,26 +259,31 @@ static hailo_status fill_boundary_output_layer(ContextResources &context_resourc static hailo_status fill_inter_context_output_layer(ContextResources &context_resources, ResourcesManager &resources_manager, const LayerInfo &layer_info, - const CONTROL_PROTOCOL__hw_consts_t &hw_consts, bool should_optimize_credits) + const CONTROL_PROTOCOL__hw_consts_t &hw_consts, const ProtoHEFHwArch &hw_arch, bool should_optimize_credits) { const auto channel_id = resources_manager.get_available_channel_id(to_layer_identifier(layer_info), HailoRTDriver::DmaDirection::D2H, layer_info.dma_engine_index); CHECK_EXPECTED_AS_STATUS(channel_id); - const auto frame_credits_in_bytes = (layer_info.nn_stream_config.periph_bytes_per_buffer * - layer_info.nn_stream_config.core_buffers_per_frame); + const auto frame_credits_in_bytes = LayerInfoUtils::get_layer_transfer_size(layer_info); + + auto network_batch_size = resources_manager.get_network_batch_size(layer_info.network_name); + CHECK_EXPECTED_AS_STATUS(network_batch_size); - auto inter_context_buffer_exp = resources_manager.create_inter_context_buffer(frame_credits_in_bytes, - layer_info.stream_index, layer_info.context_index, layer_info.network_name, channel_id.value()); + auto inter_context_buffer_exp = resources_manager.create_intermediate_buffer(frame_credits_in_bytes, + network_batch_size.value(), layer_info.stream_index, layer_info.context_index, channel_id.value(), + IntermediateBuffer::StreamingType::BURST); CHECK_EXPECTED_AS_STATUS(inter_context_buffer_exp); auto &inter_context_buffer = inter_context_buffer_exp->get(); + const bool is_periph_calculated_in_hailort = resources_manager.get_supported_features().periph_calculation_in_hailort; auto local_layer_info = update_layer_info(layer_info, inter_context_buffer.get_host_buffer_info(), hw_consts, - should_optimize_credits); + hw_arch, should_optimize_credits, is_periph_calculated_in_hailort); CHECK_EXPECTED_AS_STATUS(local_layer_info); - context_resources.add_edge_layer(local_layer_info.value(), channel_id.value(), - inter_context_buffer.get_host_buffer_info()); + auto status = context_resources.add_edge_layer(local_layer_info.value(), channel_id.value(), + inter_context_buffer.get_host_buffer_info(), resources_manager.get_supported_features()); + CHECK_SUCCESS(status); LOGGER__DEBUG("Inter-context output stream {}, src_context:{}, d2h_channel {}.", layer_info.stream_index, layer_info.context_index, channel_id.value()); @@ -195,78 +292,103 @@ static hailo_status fill_inter_context_output_layer(ContextResources &context_re static hailo_status fill_ddr_output_layer(ContextResources &context_resources, ResourcesManager &resources_manager, const LayerInfo &layer_info, - const CONTROL_PROTOCOL__hw_consts_t &hw_consts) + const CONTROL_PROTOCOL__hw_consts_t &hw_consts, const ProtoHEFHwArch &hw_arch) { CHECK(resources_manager.get_supported_features().padded_ddr_buffers, HAILO_INVALID_HEF, "Failed opening non-compatible HEF that uses the following deprecated features: host-managed DDR buffers." "Please re-compile the HEF using a newer Dataflow Compiler version (v3.11.0 or newer)"); - // Allocate resources and prepare ddr_info - - DdrChannelsInfo ddr_pair_info = {}; - ddr_pair_info.h2d_stream_index = layer_info.connected_context_info.stream_index; - ddr_pair_info.d2h_stream_index = layer_info.stream_index; - ddr_pair_info.network_index = layer_info.network_index; - // It is assumed that output channels are parsed before input channels. + // It is assumed that output channels are parsed before input channels. // Allocate vdma channel index for both edges - const auto h2d_layer_identifier = std::make_tuple(LayerType::DDR, layer_info.name, ddr_pair_info.h2d_stream_index); + const auto h2d_stream_index = layer_info.connected_context_info.stream_index; + const auto h2d_layer_identifier = std::make_tuple(LayerType::DDR, HAILO_H2D_STREAM, + layer_info.name, h2d_stream_index); const auto h2d_channel_id = resources_manager.get_available_channel_id(h2d_layer_identifier, HailoRTDriver::DmaDirection::H2D, layer_info.connected_context_info.dma_engine_index); CHECK_EXPECTED_AS_STATUS(h2d_channel_id); - ddr_pair_info.h2d_channel_id = h2d_channel_id.value(); - const auto d2h_layer_identifier = std::make_tuple(LayerType::DDR, layer_info.name, ddr_pair_info.d2h_stream_index); + const auto d2h_stream_index = layer_info.stream_index; + const auto d2h_layer_identifier = std::make_tuple(LayerType::DDR, HAILO_D2H_STREAM, + layer_info.name, d2h_stream_index); const auto d2h_channel_id = resources_manager.get_available_channel_id(d2h_layer_identifier, HailoRTDriver::DmaDirection::D2H, layer_info.dma_engine_index); CHECK_EXPECTED_AS_STATUS(d2h_channel_id); - ddr_pair_info.d2h_channel_id = d2h_channel_id.value(); - ddr_pair_info.row_size = layer_info.nn_stream_config.core_bytes_per_buffer; - ddr_pair_info.min_buffered_rows = layer_info.ddr_info.min_buffered_rows; - ddr_pair_info.total_buffers_per_frame = layer_info.ddr_info.total_buffers_per_frame; + // In DDR layer there is no residue - so can ignore descriptor size + const auto IGNORE_DESCRIPTOR_SIZE = 0; + // Send layer info with updated shmifo size + auto layer_info_updated_shmifo = layer_info; + if (layer_info_updated_shmifo.max_shmifo_size == 0) { + layer_info_updated_shmifo.max_shmifo_size = hw_consts.default_initial_credit_size; + } + + auto max_periph_bytes = HefConfigurator::max_periph_bytes_value(DeviceBase::hef_arch_to_device_arch(hw_arch)); + CHECK_EXPECTED_AS_STATUS(max_periph_bytes, "Error calculating max periph bytes per buffer"); + const auto periph_values = calculate_periph_requirements(layer_info_updated_shmifo, IGNORE_DESCRIPTOR_SIZE, + resources_manager.get_supported_features().periph_calculation_in_hailort, max_periph_bytes.value()); + CHECK_EXPECTED_AS_STATUS(periph_values); + + const auto row_size = std::get<0>(periph_values.value()); + const auto min_buffered_rows = layer_info.ddr_info.min_buffered_rows; - // Create the ddr buffer - auto ddr_channels_pair = context_resources.create_ddr_channels_pair(ddr_pair_info); - CHECK_EXPECTED_AS_STATUS(ddr_channels_pair); + // Allocate the ddr buffer + auto ddr_buffer = resources_manager.create_intermediate_buffer(row_size, min_buffered_rows, + d2h_stream_index, layer_info.context_index, d2h_channel_id.value(), + IntermediateBuffer::StreamingType::CIRCULAR_CONTINUOS); + CHECK_EXPECTED_AS_STATUS(ddr_buffer); + + DdrChannelsInfo ddr_pair_info{}; + ddr_pair_info.h2d_stream_index = h2d_stream_index; + ddr_pair_info.d2h_stream_index = d2h_stream_index; + ddr_pair_info.network_index = layer_info.network_index; + ddr_pair_info.h2d_channel_id = h2d_channel_id.value(); + ddr_pair_info.d2h_channel_id = d2h_channel_id.value(); + ddr_pair_info.row_size = row_size; + ddr_pair_info.min_buffered_rows = min_buffered_rows; + ddr_pair_info.total_buffers_per_frame = layer_info.ddr_info.total_buffers_per_frame; + ddr_pair_info.host_buffer_info = ddr_buffer->get().get_host_buffer_info(); + context_resources.add_ddr_channels_info(ddr_pair_info); // On ddr layers, we assume the periph credit size is aligned to the size of descriptor, so we don't want to // optimize the credits. const bool should_optimize_credits = false; - auto local_layer_info = update_layer_info(layer_info, ddr_channels_pair->get().get_host_buffer_info(), hw_consts, - should_optimize_credits); + const bool is_periph_calculated_in_hailort = resources_manager.get_supported_features().periph_calculation_in_hailort; + auto local_layer_info = update_layer_info(layer_info, ddr_buffer->get().get_host_buffer_info(), hw_consts, + hw_arch, should_optimize_credits, is_periph_calculated_in_hailort); CHECK_EXPECTED_AS_STATUS(local_layer_info); - context_resources.add_edge_layer(local_layer_info.value(), ddr_pair_info.d2h_channel_id, - ddr_channels_pair->get().get_host_buffer_info()); + auto status = context_resources.add_edge_layer(local_layer_info.value(), ddr_pair_info.d2h_channel_id, + ddr_buffer->get().get_host_buffer_info(), resources_manager.get_supported_features()); + CHECK_SUCCESS(status); return HAILO_SUCCESS; } -static hailo_status fill_ddr_input_layer(ContextResources &context_resources, - const LayerInfo &layer_info, const CONTROL_PROTOCOL__hw_consts_t &hw_consts) +static hailo_status fill_ddr_input_layer(ContextResources &context_resources, ResourcesManager &resources_manager, + const LayerInfo &layer_info, const CONTROL_PROTOCOL__hw_consts_t &hw_consts, const ProtoHEFHwArch &hw_arch) { auto connected_stream_index = layer_info.connected_context_info.stream_index; - auto ddr_channels_pair = context_resources.get_ddr_channels_pair(connected_stream_index); - CHECK(ddr_channels_pair, HAILO_INVALID_HEF, "Matching DDR layer as not found for context {} src stream {}", + auto ddr_info = context_resources.get_ddr_channels_info(connected_stream_index); + CHECK_EXPECTED_AS_STATUS(ddr_info, "Matching DDR layer as not found for context {} src stream {}", layer_info.context_index, connected_stream_index); - - const auto ddr_info = ddr_channels_pair->get().info(); LOGGER__DEBUG("DDR layer: input stream_index: {}, output stream_index: {}, h2d_channel {}, d2h_channel: {}.", - ddr_info.h2d_stream_index, ddr_info.d2h_stream_index, ddr_info.h2d_channel_id, ddr_info.d2h_channel_id); + ddr_info->h2d_stream_index, ddr_info->d2h_stream_index, ddr_info->h2d_channel_id, ddr_info->d2h_channel_id); - CHECK(layer_info.stream_index == ddr_info.h2d_stream_index, HAILO_INVALID_HEF, "DDR channel pair mismatch in h2d channel"); - CHECK(layer_info.connected_context_info.stream_index == ddr_info.d2h_stream_index, HAILO_INVALID_HEF, "DDR channel pair mismatch in d2h channel"); - CHECK(layer_info.network_index == ddr_info.network_index, HAILO_INVALID_HEF, "DDR channel pair mismatch network_index"); + CHECK(layer_info.stream_index == ddr_info->h2d_stream_index, HAILO_INVALID_HEF, "DDR channel pair mismatch in h2d channel"); + CHECK(layer_info.connected_context_info.stream_index == ddr_info->d2h_stream_index, HAILO_INVALID_HEF, "DDR channel pair mismatch in d2h channel"); + CHECK(layer_info.network_index == ddr_info->network_index, HAILO_INVALID_HEF, "DDR channel pair mismatch network_index"); // On ddr layers, we assume the periph credit size is aligned to the size of descriptor, so we don't want to // optimize the credits. const bool should_optimize_credits = false; - auto local_layer_info = update_layer_info(layer_info, ddr_channels_pair->get().get_host_buffer_info(), hw_consts, - should_optimize_credits); + const bool is_periph_calculated_in_hailort = resources_manager.get_supported_features().periph_calculation_in_hailort; + auto local_layer_info = update_layer_info(layer_info, ddr_info->host_buffer_info, hw_consts, + hw_arch, should_optimize_credits, is_periph_calculated_in_hailort); CHECK_EXPECTED_AS_STATUS(local_layer_info); - context_resources.add_edge_layer(local_layer_info.value(), ddr_channels_pair->get().info().h2d_channel_id, - ddr_channels_pair->get().get_host_buffer_info()); + auto status = context_resources.add_edge_layer(local_layer_info.value(), ddr_info->h2d_channel_id, + ddr_info->host_buffer_info, resources_manager.get_supported_features()); + CHECK_SUCCESS(status); return HAILO_SUCCESS; } @@ -275,11 +397,10 @@ static hailo_status add_ddr_buffers_info(std::vector find_dummy_stream(const LayerInfo &layer_info, const ContextResources &context_resources) +// TODO HRT-10073: change to supported features list +static bool is_hailo15_device_type(const hailo_device_architecture_t dev_arch) { - const auto other_direction = (HAILO_H2D_STREAM == layer_info.direction) ? HAILO_D2H_STREAM : HAILO_H2D_STREAM; - const auto other_direction_edge_layers = context_resources.get_edge_layers(other_direction); - CHECK_AS_EXPECTED(!other_direction_edge_layers.empty(), HAILO_INTERNAL_FAILURE, "Couldn't find dummy stream"); - return Expected(other_direction_edge_layers.front().layer_info.stream_index); + // Compare with HAILO15 device arch + return (HAILO_ARCH_HAILO15 == dev_arch); } -static hailo_status add_change_vdma_to_stream_mapping( +static Expected find_dummy_stream(const LayerInfo &layer_info, const ContextResources &context_resources, + const bool is_null_shmifo_supported) +{ + if (is_null_shmifo_supported) { + static const uint8_t DUMMY_STREAM_INDEX = 31; + return Expected(DUMMY_STREAM_INDEX); + } else { + const auto other_direction = (HAILO_H2D_STREAM == layer_info.direction) ? HAILO_D2H_STREAM : HAILO_H2D_STREAM; + const auto other_direction_edge_layers = context_resources.get_edge_layers(other_direction); + CHECK_AS_EXPECTED(!other_direction_edge_layers.empty(), HAILO_INTERNAL_FAILURE, "Couldn't find dummy stream"); + return Expected(other_direction_edge_layers.front().layer_info.stream_index); + } +} + +static hailo_status add_change_vdma_to_stream_mapping(const ProtoHEFHwArch &hw_arch, const CoreOpMetadata &core_op_metadata, const ResourcesManager &resources_manager, ContextResources &context_resources, uint8_t context_index, std::vector &processed_configuration_actions) @@ -557,7 +688,8 @@ static hailo_status add_change_vdma_to_stream_mapping( const bool is_dummy_stream = layer_info.context_index != context_index; uint8_t stream_index = layer_info.stream_index; if (is_dummy_stream) { - auto dummy_stream_index = find_dummy_stream(layer_info, context_resources); + auto dummy_stream_index = find_dummy_stream(layer_info, context_resources, + is_hailo15_device_type(DeviceBase::hef_arch_to_device_arch(hw_arch))); CHECK_EXPECTED_AS_STATUS(dummy_stream_index); stream_index = *dummy_stream_index; } @@ -603,9 +735,9 @@ static hailo_status push_edge_layer_activation_actions( for (const auto &edge_layer : context_resources.get_edge_layers(LayerType::DDR, HAILO_H2D_STREAM)) { const auto d2h_stream_index = edge_layer.layer_info.connected_context_info.stream_index; - auto pair = context_resources.get_ddr_channels_pair(d2h_stream_index); - CHECK_EXPECTED_AS_STATUS(pair); - const auto d2h_channel_id = pair->get().info().d2h_channel_id; + auto ddr_channels_info = context_resources.get_ddr_channels_info(d2h_stream_index); + CHECK_EXPECTED_AS_STATUS(ddr_channels_info); + const auto d2h_channel_id = ddr_channels_info->d2h_channel_id; auto activate_action = ActivateDdrInputChannelAction::create(edge_layer.channel_id, edge_layer.layer_info.stream_index, edge_layer.layer_info.nn_stream_config, edge_layer.buffer_info, @@ -633,7 +765,8 @@ static hailo_status push_edge_layer_activation_actions( return HAILO_SUCCESS; } -static hailo_status proccess_trigger_new_data_input_action(const ContextSwitchConfigActionPtr &configuration_action, +static hailo_status proccess_trigger_new_data_input_action(const ProtoHEFHwArch &hw_arch, + const ContextSwitchConfigActionPtr &configuration_action, uint32_t trigger_new_data_from_input_group_start, uint32_t trigger_new_data_from_input_group_end, const uint32_t &action_index, @@ -648,7 +781,7 @@ static hailo_status proccess_trigger_new_data_input_action(const ContextSwitchCo CHECK_SUCCESS(status); if (!is_single_context) { - status = add_change_vdma_to_stream_mapping(core_op_metadata, resources_manager, + status = add_change_vdma_to_stream_mapping(hw_arch, core_op_metadata, resources_manager, context_resources, context_index, processed_configuration_actions); CHECK_SUCCESS(status); } @@ -734,8 +867,8 @@ static hailo_status add_config_channel_activation_actions(std::vector &configuration_actions, - const CoreOpMetadata &core_op_metadata, +static hailo_status handle_edge_layer_activation_actions(const ProtoHEFHwArch &hw_arch, + std::vector &configuration_actions, const CoreOpMetadata &core_op_metadata, const ResourcesManager &resources_manager, ContextResources &context_resources, uint8_t context_index, bool is_single_context) { @@ -751,7 +884,7 @@ static hailo_status handle_edge_layer_activation_actions(std::vectorget_type()) { - auto status = proccess_trigger_new_data_input_action(configuration_action, + auto status = proccess_trigger_new_data_input_action(hw_arch, configuration_action, trigger_new_data_from_input_group_start, trigger_new_data_from_input_group_end, action_index, core_op_metadata, resources_manager, context_resources, context_index, processed_configuration_actions, is_single_context); CHECK_SUCCESS(status); @@ -809,13 +942,6 @@ static hailo_status handle_repeated_actions(std::vector &actions) { @@ -854,17 +980,17 @@ static hailo_status fill_context_recipes_for_multi_context(const ProtoHEFHwArch hailo_status status = HAILO_UNINITIALIZED; // Add edge layers mapping - status = parse_and_fill_edge_layers_mapping(context_resources, context_metadata, resources_manager); + status = parse_and_fill_edge_layers_mapping(context_resources, context_metadata, resources_manager, hw_arch); CHECK_SUCCESS(status); // Parse context std::vector actions = context_metadata.get_actions(); - const auto support_pre_fetch = is_hailo15_device_type(hw_arch); + const auto support_pre_fetch = is_hailo15_device_type(DeviceBase::hef_arch_to_device_arch(hw_arch)); status = add_fetch_config_actions(actions, context_resources.get_config_buffers(), support_pre_fetch); CHECK_SUCCESS(status); - status = handle_edge_layer_activation_actions(actions, core_op_metadata, resources_manager, + status = handle_edge_layer_activation_actions(hw_arch, actions, core_op_metadata, resources_manager, context_resources, context_index, is_single_context); CHECK_SUCCESS(status); @@ -899,7 +1025,7 @@ static hailo_status create_boundary_channels(ResourcesManager &resources_manager static hailo_status fill_activation_config_recepies_for_multi_context( ContextResources &context_resources, ResourcesManager &resources_manager, - std::shared_ptr core_op_metadata) + std::shared_ptr core_op_metadata, const ProtoHEFHwArch &hw_arch) { auto hw_consts = Control::get_hw_consts(resources_manager.get_device()); CHECK_EXPECTED_AS_STATUS(hw_consts); @@ -908,19 +1034,16 @@ static hailo_status fill_activation_config_recepies_for_multi_context( for (const auto &layer_info : core_op_metadata->get_output_layer_infos()){ auto status = fill_boundary_output_layer(context_resources, resources_manager, layer_info, *hw_consts, - should_optimize_credits); + hw_arch, should_optimize_credits); CHECK_SUCCESS(status); } for (const auto &layer_info : core_op_metadata->get_input_layer_infos()) { auto status = fill_boundary_input_layer(context_resources, resources_manager, layer_info, *hw_consts, - should_optimize_credits); + hw_arch, should_optimize_credits); CHECK_SUCCESS(status); } - auto status = context_resources.validate_edge_layers(); - CHECK_SUCCESS(status); - std::vector actions; for (const auto &edge_layer : context_resources.get_edge_layers(LayerType::BOUNDARY)) { auto action = edge_layer.layer_info.direction == HAILO_H2D_STREAM ? @@ -933,6 +1056,38 @@ static hailo_status fill_activation_config_recepies_for_multi_context( return write_action_list(context_resources, context_resources.builder(), actions); } +static Expected create_switch_lcu_batch_action(const ContextSwitchConfigActionPtr action, + ContextResources &context_resources) +{ + uint8_t cluster_index = 0; + uint8_t lcu_index = 0; + uint8_t network_index = 0; + uint32_t kernel_done_count = 0; + + CHECK_AS_EXPECTED((ContextSwitchConfigAction::Type::EnableLcuDefault == action->get_type()) || + (ContextSwitchConfigAction::Type::EnableLcuNonDefault == action->get_type()), HAILO_INVALID_ARGUMENT, + "Invalid action type - must be enable lcu (default or non default), Received type {}", action->get_type()); + + const auto params_buffer = action->serialize_params(context_resources); + CHECK_EXPECTED(params_buffer); + + if (ContextSwitchConfigAction::Type::EnableLcuDefault == action->get_type()) { + const auto params = reinterpret_cast(params_buffer.value().data()); + cluster_index = CONTEXT_SWITCH_DEFS__PACKED_LCU_ID_CLUSTER_INDEX_READ(params->packed_lcu_id); + lcu_index = CONTEXT_SWITCH_DEFS__PACKED_LCU_ID_LCU_INDEX_READ(params->packed_lcu_id); + network_index = params->network_index; + kernel_done_count = CONTEXT_SWITCH_DEFS__ENABLE_LCU_DEFAULT_KERNEL_COUNT; + } else { + const auto params = reinterpret_cast(params_buffer.value().data()); + cluster_index = CONTEXT_SWITCH_DEFS__PACKED_LCU_ID_CLUSTER_INDEX_READ(params->packed_lcu_id); + lcu_index = CONTEXT_SWITCH_DEFS__PACKED_LCU_ID_LCU_INDEX_READ(params->packed_lcu_id); + network_index = params->network_index; + kernel_done_count = params->kernel_done_count; + } + + return SwitchLcuBatchAction::create(cluster_index, lcu_index, network_index, kernel_done_count); +} + static hailo_status fill_batch_switching_context_config_recepies_for_multi_context( ContextResources &context_resources, const CoreOpMetadata &core_op_metadata) { @@ -943,14 +1098,19 @@ static hailo_status fill_batch_switching_context_config_recepies_for_multi_conte CHECK_EXPECTED_AS_STATUS(reset_ddr_action); actions.emplace_back(reset_ddr_action.release()); - // We need to re-enable all the lcus of the first context since some of their config regs are batch dependent. - // => We'll filter out all of the "enable lcu" actions from the preliminary context - static const std::set BATCH_SWITCHING_ACTIONS = { + // Find all the enabled lcus from the preliminary context in order to create coresponding switch lcu batch actions to run + // In the batch switch context + static const std::set ENABLE_LCU_ACTIONS = { ContextSwitchConfigAction::Type::EnableLcuDefault, ContextSwitchConfigAction::Type::EnableLcuNonDefault }; - const auto batch_switch_actions = core_op_metadata.preliminary_context().get_actions_of_type(BATCH_SWITCHING_ACTIONS); - actions.insert(actions.end(), batch_switch_actions.begin(), batch_switch_actions.end()); + + const auto batch_switch_actions = core_op_metadata.preliminary_context().get_actions_of_type(ENABLE_LCU_ACTIONS); + for (const auto &action : batch_switch_actions) { + auto switch_lcu_batch_action = create_switch_lcu_batch_action(action, context_resources); + CHECK_EXPECTED_AS_STATUS(switch_lcu_batch_action); + actions.insert(actions.end(), switch_lcu_batch_action.release()); + } auto status = handle_repeated_actions(actions); CHECK_SUCCESS(status); @@ -969,19 +1129,19 @@ static hailo_status fill_preliminary_config_recepies_for_multi_context(const Pro // Add edge layers mapping (only preliminary_run_asap networks have edge layers in the preliminary context) assert(PRELIMINARY_CONTEXT_INDEX < core_op_metadata->dynamic_contexts().size()); auto status = parse_and_fill_edge_layers_mapping(context_resources, - core_op_metadata->dynamic_contexts()[PRELIMINARY_CONTEXT_INDEX], resources_manager); + core_op_metadata->dynamic_contexts()[PRELIMINARY_CONTEXT_INDEX], resources_manager, hw_arch); CHECK_SUCCESS(status); } // Parse preliminary config std::vector actions = preliminary_context.get_actions(); - const auto support_pre_fetch = is_hailo15_device_type(hw_arch); + const auto support_pre_fetch = is_hailo15_device_type(DeviceBase::hef_arch_to_device_arch(hw_arch)); auto status = add_fetch_config_actions(actions, context_resources.get_config_buffers(), support_pre_fetch); CHECK_SUCCESS(status); if (resources_manager.get_supported_features().preliminary_run_asap) { - status = handle_edge_layer_activation_actions(actions, *core_op_metadata, resources_manager, + status = handle_edge_layer_activation_actions(hw_arch, actions, *core_op_metadata, resources_manager, context_resources, PRELIMINARY_CONTEXT_INDEX, is_single_context); CHECK_SUCCESS(status); } @@ -1026,7 +1186,7 @@ Expected> ResourcesManagerBuilder::build(uint8 auto activation_context = resources_manager->add_new_context(CONTROL_PROTOCOL__CONTEXT_SWITCH_CONTEXT_TYPE_ACTIVATION); CHECK_EXPECTED(activation_context); status = fill_activation_config_recepies_for_multi_context(activation_context.value().get(), - resources_manager.value(), core_op_metadata); + resources_manager.value(), core_op_metadata, hw_arch); CHECK_SUCCESS_AS_EXPECTED(status); auto batch_switching_context = resources_manager->add_new_context(CONTROL_PROTOCOL__CONTEXT_SWITCH_CONTEXT_TYPE_BATCH_SWITCHING); diff --git a/hailort/libhailort/src/device_common/control.cpp b/hailort/libhailort/src/device_common/control.cpp index 19946b9..9c72ebb 100644 --- a/hailort/libhailort/src/device_common/control.cpp +++ b/hailort/libhailort/src/device_common/control.cpp @@ -197,22 +197,9 @@ hailo_status control__parse_core_identify_results(CONTROL_PROTOCOL__core_identif return HAILO_SUCCESS; } -hailo_status Control::validate_arch_supported(Device &device, const std::vector &supported_archs) -{ - auto dev_arch = device.get_architecture(); - CHECK_EXPECTED_AS_STATUS(dev_arch); - for (const auto &arch : supported_archs) { - if (*dev_arch == arch) { - return HAILO_SUCCESS; - } - } - LOGGER__ERROR("Control is not supported for this device architecture - {}", HailoRTCommon::get_device_arch_str(*dev_arch)); - return HAILO_NOT_SUPPORTED; -} - hailo_status Control::parse_and_validate_response(uint8_t *message, uint32_t message_size, CONTROL_PROTOCOL__response_header_t **header, CONTROL_PROTOCOL__payload_t **payload, - CONTROL_PROTOCOL__request_t *request) + CONTROL_PROTOCOL__request_t *request, Device &device) { hailo_status status = HAILO_UNINITIALIZED; HAILO_COMMON_STATUS_t common_status = HAILO_COMMON_STATUS__UNINITIALIZED; @@ -251,12 +238,29 @@ hailo_status Control::parse_and_validate_response(uint8_t *message, uint32_t mes (FIRMWARE_STATUS_t)fw_status.minor_status, common_status); } + if ((CONTROL_PROTOCOL_STATUS_CONTROL_UNSUPPORTED == fw_status.minor_status) || + (CONTROL_PROTOCOL_STATUS_CONTROL_UNSUPPORTED == fw_status.major_status)) { + auto device_arch = device.get_architecture(); + auto dev_arch_str = (device_arch) ? HailoRTCommon::get_device_arch_str(*device_arch) : "Unable to parse arch"; + LOGGER__ERROR("Opcode {} is not supported on the device." \ + " This error usually occurs when the control is not supported for the device arch - ({}), or not compiled to the FW", + CONTROL_PROTOCOL__get_textual_opcode((CONTROL_PROTOCOL__OPCODE_t)BYTE_ORDER__ntohl(request->header.common_header.opcode)), + dev_arch_str); + } + + if ((CONTROL_PROTOCOL_STATUS_UNSUPPORTED_DEVICE == fw_status.minor_status) || + (CONTROL_PROTOCOL_STATUS_UNSUPPORTED_DEVICE == fw_status.major_status)) { + LOGGER__ERROR("Opcode {} is not supported on the current board.", + CONTROL_PROTOCOL__get_textual_opcode((CONTROL_PROTOCOL__OPCODE_t)BYTE_ORDER__ntohl(request->header.common_header.opcode))); + } + if ((HAILO_CONTROL_STATUS_UNSUPPORTED_OPCODE == fw_status.minor_status) || (HAILO_CONTROL_STATUS_UNSUPPORTED_OPCODE == fw_status.major_status)) { status = HAILO_UNSUPPORTED_OPCODE; LOGGER__ERROR("Opcode {} is not supported", CONTROL_PROTOCOL__get_textual_opcode((CONTROL_PROTOCOL__OPCODE_t)BYTE_ORDER__ntohl(request->header.common_header.opcode))); } + goto exit; } @@ -301,7 +305,7 @@ Expected Control::identify(Device &device) /* Parse response */ status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload, - &request); + &request, device); CHECK_SUCCESS_AS_EXPECTED(status); identify_response = (CONTROL_PROTOCOL_identify_response_t *)(payload->parameters); @@ -336,7 +340,7 @@ hailo_status Control::core_identify(Device &device, hailo_core_information_t *co /* Parse response */ status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload, - &request); + &request, device); if (HAILO_SUCCESS != status) { goto exit; } @@ -359,14 +363,10 @@ hailo_status Control::set_fw_logger(Device &device, hailo_fw_logger_level_t leve CONTROL_PROTOCOL__request_t request = {}; size_t request_size = 0; - /* Validate arch */ - auto status = Control::validate_arch_supported(device); - CHECK_SUCCESS(status); - auto common_status = CONTROL_PROTOCOL__pack_set_fw_logger_request(&request, &request_size, device.get_control_sequence(), level, static_cast(interface_mask)); - status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE; + auto status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE; CHECK_SUCCESS(status); uint8_t response_buffer[RESPONSE_MAX_BUFFER_SIZE] = {}; @@ -378,7 +378,7 @@ hailo_status Control::set_fw_logger(Device &device, hailo_fw_logger_level_t leve CONTROL_PROTOCOL__response_header_t *header = NULL; CONTROL_PROTOCOL__payload_t *payload = NULL; status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload, - &request); + &request, device); CHECK_SUCCESS(status); return HAILO_SUCCESS; @@ -389,13 +389,9 @@ hailo_status Control::set_clock_freq(Device &device, uint32_t clock_freq) CONTROL_PROTOCOL__request_t request = {}; size_t request_size = 0; - /* Validate arch */ - auto status = Control::validate_arch_supported(device); - CHECK_SUCCESS(status); - auto common_status = CONTROL_PROTOCOL__pack_set_clock_freq_request(&request, &request_size, device.get_control_sequence(), clock_freq); - status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE; + auto status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE; CHECK_SUCCESS(status); uint8_t response_buffer[RESPONSE_MAX_BUFFER_SIZE] = {}; @@ -407,7 +403,7 @@ hailo_status Control::set_clock_freq(Device &device, uint32_t clock_freq) CONTROL_PROTOCOL__response_header_t *header = NULL; CONTROL_PROTOCOL__payload_t *payload = NULL; status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload, - &request); + &request, device); CHECK_SUCCESS(status); return HAILO_SUCCESS; @@ -418,13 +414,9 @@ hailo_status Control::set_throttling_state(Device &device, bool should_activate) CONTROL_PROTOCOL__request_t request = {}; size_t request_size = 0; - /* Validate arch */ - auto status = Control::validate_arch_supported(device); - CHECK_SUCCESS(status); - auto common_status = CONTROL_PROTOCOL__pack_set_throttling_state_request(&request, &request_size, device.get_control_sequence(), should_activate); - status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE; + auto status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE; CHECK_SUCCESS(status); uint8_t response_buffer[RESPONSE_MAX_BUFFER_SIZE] = {}; @@ -436,7 +428,7 @@ hailo_status Control::set_throttling_state(Device &device, bool should_activate) CONTROL_PROTOCOL__response_header_t *header = NULL; CONTROL_PROTOCOL__payload_t *payload = NULL; status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload, - &request); + &request, device); CHECK_SUCCESS(status); return HAILO_SUCCESS; @@ -454,10 +446,6 @@ Expected Control::get_throttling_state(Device &device) CONTROL_PROTOCOL__payload_t *payload = NULL; CONTROL_PROTOCOL__get_throttling_state_response_t *get_throttling_state_response = NULL; - /* Validate arch */ - status = Control::validate_arch_supported(device); - CHECK_SUCCESS_AS_EXPECTED(status); - common_status = CONTROL_PROTOCOL__pack_get_throttling_state_request(&request, &request_size, device.get_control_sequence()); status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE; @@ -467,7 +455,7 @@ Expected Control::get_throttling_state(Device &device) CHECK_SUCCESS_AS_EXPECTED(status); /* Parse response */ - status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload, &request); + status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload, &request, device); CHECK_SUCCESS_AS_EXPECTED(status); get_throttling_state_response = (CONTROL_PROTOCOL__get_throttling_state_response_t *)(payload->parameters); @@ -479,13 +467,9 @@ hailo_status Control::set_overcurrent_state(Device &device, bool should_activate CONTROL_PROTOCOL__request_t request = {}; size_t request_size = 0; - /* Validate arch */ - auto status = Control::validate_arch_supported(device); - CHECK_SUCCESS(status); - auto common_status = CONTROL_PROTOCOL__pack_set_overcurrent_state_request(&request, &request_size, device.get_control_sequence(), should_activate); - status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE; + auto status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE; CHECK_SUCCESS(status); uint8_t response_buffer[RESPONSE_MAX_BUFFER_SIZE] = {}; @@ -496,7 +480,7 @@ hailo_status Control::set_overcurrent_state(Device &device, bool should_activate /* Parse response */ CONTROL_PROTOCOL__response_header_t *header = NULL; CONTROL_PROTOCOL__payload_t *payload = NULL; - status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload, &request); + status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload, &request, device); CHECK_SUCCESS(status); return HAILO_SUCCESS; @@ -514,10 +498,6 @@ Expected Control::get_overcurrent_state(Device &device) CONTROL_PROTOCOL__payload_t *payload = NULL; CONTROL_PROTOCOL__get_overcurrent_state_response_t *get_overcurrent_state_response = NULL; - /* Validate arch */ - status = Control::validate_arch_supported(device); - CHECK_SUCCESS_AS_EXPECTED(status); - common_status = CONTROL_PROTOCOL__pack_get_overcurrent_state_request(&request, &request_size, device.get_control_sequence()); status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE; @@ -527,7 +507,7 @@ Expected Control::get_overcurrent_state(Device &device) CHECK_SUCCESS_AS_EXPECTED(status); /* Parse response */ - status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload, &request); + status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload, &request, device); CHECK_SUCCESS_AS_EXPECTED(status); get_overcurrent_state_response = (CONTROL_PROTOCOL__get_overcurrent_state_response_t *)(payload->parameters); @@ -538,6 +518,7 @@ Expected Control::get_hw_consts(Device &device) { size_t request_size = 0; CONTROL_PROTOCOL__request_t request = {}; + auto common_status = CONTROL_PROTOCOL__pack_get_hw_consts_request(&request, &request_size, device.get_control_sequence()); auto status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE; CHECK_SUCCESS_AS_EXPECTED(status); @@ -549,7 +530,8 @@ Expected Control::get_hw_consts(Device &device) CONTROL_PROTOCOL__response_header_t *header = NULL; CONTROL_PROTOCOL__payload_t *payload = NULL; - status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload, &request); + status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload, &request, + device); CHECK_SUCCESS_AS_EXPECTED(status); const auto &response = *reinterpret_cast(payload->parameters); @@ -587,7 +569,7 @@ hailo_status Control::write_memory_chunk(Device &device, uint32_t address, const /* Parse response */ status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload, - &request); + &request, device); if (HAILO_SUCCESS != status) { goto exit; } @@ -663,7 +645,7 @@ hailo_status Control::read_memory_chunk(Device &device, uint32_t address, uint8_ /* Parse response */ status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload, - &request); + &request, device); if (HAILO_SUCCESS != status) { goto exit; } @@ -726,12 +708,6 @@ hailo_status Control::open_stream(Device &device, uint8_t dataflow_manager_id, b CONTROL_PROTOCOL__response_header_t *header = NULL; CONTROL_PROTOCOL__payload_t *payload = NULL; - /* Validate arch */ - status = Control::validate_arch_supported(device); - if (HAILO_SUCCESS != status) { - goto exit; - } - common_status = CONTROL_PROTOCOL__pack_open_stream_request(&request, &request_size, device.get_control_sequence(), dataflow_manager_id, is_input); status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE; @@ -746,7 +722,7 @@ hailo_status Control::open_stream(Device &device, uint8_t dataflow_manager_id, b /* Parse response */ status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload, - &request); + &request, device); if (HAILO_SUCCESS != status) { goto exit; } @@ -767,12 +743,6 @@ hailo_status Control::close_stream(Device &device, uint8_t dataflow_manager_id, CONTROL_PROTOCOL__response_header_t *header = NULL; CONTROL_PROTOCOL__payload_t *payload = NULL; - /* Validate arch */ - status = Control::validate_arch_supported(device); - if (HAILO_SUCCESS != status) { - goto exit; - } - common_status = CONTROL_PROTOCOL__pack_close_stream_request(&request, &request_size, device.get_control_sequence(), dataflow_manager_id, is_input); status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE; @@ -787,7 +757,7 @@ hailo_status Control::close_stream(Device &device, uint8_t dataflow_manager_id, /* Parse response */ status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload, - &request); + &request, device); if (HAILO_SUCCESS != status) { goto exit; } @@ -834,12 +804,6 @@ hailo_status Control::config_stream_udp_input(Device &device, CONTROL_PROTOCOL__ /* Validate arguments */ CHECK_ARG_NOT_NULL(params); - /* Validate arch */ - status = Control::validate_arch_supported(device); - if (HAILO_SUCCESS != status) { - goto exit; - } - common_status = CONTROL_PROTOCOL__pack_config_stream_udp_input_request(&request, &request_size, device.get_control_sequence(), params); status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE; @@ -854,7 +818,7 @@ hailo_status Control::config_stream_udp_input(Device &device, CONTROL_PROTOCOL__ /* Parse response */ status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload, - &request); + &request, device); if (HAILO_SUCCESS != status) { goto exit; } @@ -891,12 +855,6 @@ hailo_status Control::config_stream_udp_output(Device &device, CONTROL_PROTOCOL_ /* Validate arguments */ CHECK_ARG_NOT_NULL(params); - /* Validate arch */ - status = Control::validate_arch_supported(device); - if (HAILO_SUCCESS != status) { - goto exit; - } - common_status = CONTROL_PROTOCOL__pack_config_stream_udp_output_request(&request, &request_size, device.get_control_sequence(), params); status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE; @@ -911,7 +869,7 @@ hailo_status Control::config_stream_udp_output(Device &device, CONTROL_PROTOCOL_ /* Parse response */ status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload, - &request); + &request, device); if (HAILO_SUCCESS != status) { goto exit; } @@ -948,12 +906,6 @@ hailo_status Control::config_stream_mipi_input(Device &device, CONTROL_PROTOCOL_ /* Validate arguments */ CHECK_ARG_NOT_NULL(params); - /* Validate arch */ - status = Control::validate_arch_supported(device); - if (HAILO_SUCCESS != status) { - goto exit; - } - common_status = CONTROL_PROTOCOL__pack_config_stream_mipi_input_request(&request, &request_size, device.get_control_sequence(), params); status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE; @@ -968,7 +920,7 @@ hailo_status Control::config_stream_mipi_input(Device &device, CONTROL_PROTOCOL_ /* Parse response */ status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload, - &request); + &request, device); if (HAILO_SUCCESS != status) { goto exit; } @@ -1005,12 +957,6 @@ hailo_status Control::config_stream_mipi_output(Device &device, CONTROL_PROTOCOL /* Validate arguments */ CHECK_ARG_NOT_NULL(params); - /* Validate arch */ - status = Control::validate_arch_supported(device); - if (HAILO_SUCCESS != status) { - goto exit; - } - common_status = CONTROL_PROTOCOL__pack_config_stream_mipi_output_request(&request, &request_size, device.get_control_sequence(), params); status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE; @@ -1025,7 +971,7 @@ hailo_status Control::config_stream_mipi_output(Device &device, CONTROL_PROTOCOL /* Parse response */ status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload, - &request); + &request, device); if (HAILO_SUCCESS != status) { goto exit; } @@ -1062,12 +1008,6 @@ hailo_status Control::config_stream_pcie_input(Device &device, CONTROL_PROTOCOL_ /* Validate arguments */ CHECK_ARG_NOT_NULL(params); - /* Validate arch */ - status = Control::validate_arch_supported(device); - if (HAILO_SUCCESS != status) { - goto exit; - } - common_status = CONTROL_PROTOCOL__pack_config_stream_pcie_input_request(&request, &request_size, device.get_control_sequence(), params); status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE; @@ -1082,7 +1022,7 @@ hailo_status Control::config_stream_pcie_input(Device &device, CONTROL_PROTOCOL_ /* Parse response */ status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload, - &request); + &request, device); if (HAILO_SUCCESS != status) { goto exit; } @@ -1119,12 +1059,6 @@ hailo_status Control::config_stream_pcie_output(Device &device, CONTROL_PROTOCOL /* Validate arguments */ CHECK_ARG_NOT_NULL(params); - /* Validate arch */ - status = Control::validate_arch_supported(device); - if (HAILO_SUCCESS != status) { - goto exit; - } - common_status = CONTROL_PROTOCOL__pack_config_stream_pcie_output_request(&request, &request_size, device.get_control_sequence(), params); status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE; @@ -1139,7 +1073,7 @@ hailo_status Control::config_stream_pcie_output(Device &device, CONTROL_PROTOCOL /* Parse response */ status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload, - &request); + &request, device); if (HAILO_SUCCESS != status) { goto exit; } @@ -1175,12 +1109,6 @@ hailo_status Control::power_measurement(Device &device, CONTROL_PROTOCOL__dvm_op CONTROL_PROTOCOL__payload_t *payload = NULL; CONTROL_PROTOCOL__power_measurement_response_t *response = NULL; - /* Validate arch */ - status = Control::validate_arch_supported(device); - if (HAILO_SUCCESS != status) { - goto exit; - } - /* Validate arguments */ CHECK_ARG_NOT_NULL(measurement); @@ -1198,7 +1126,7 @@ hailo_status Control::power_measurement(Device &device, CONTROL_PROTOCOL__dvm_op /* Parse response */ status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload, - &request); + &request, device); if (HAILO_SUCCESS != status) { goto exit; } @@ -1230,12 +1158,6 @@ hailo_status Control::set_power_measurement(Device &device, hailo_measurement_bu CONTROL_PROTOCOL__payload_t *payload = NULL; CONTROL_PROTOCOL__set_power_measurement_response_t *response = NULL; - /* Validate arch */ - status = Control::validate_arch_supported(device); - if (HAILO_SUCCESS != status) { - goto exit; - } - CHECK(CONTROL_PROTOCOL__MAX_NUMBER_OF_POWER_MEASUREMETS > buffer_index, HAILO_INVALID_ARGUMENT, "Invalid power measurement index {}", buffer_index); @@ -1253,7 +1175,7 @@ hailo_status Control::set_power_measurement(Device &device, hailo_measurement_bu /* Parse response */ status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload, - &request); + &request, device); if (HAILO_SUCCESS != status) { goto exit; } @@ -1283,12 +1205,6 @@ hailo_status Control::get_power_measurement(Device &device, hailo_measurement_bu CONTROL_PROTOCOL__payload_t *payload = NULL; CONTROL_PROTOCOL__get_power_measurement_response_t *get_power_response = NULL; - /* Validate arch */ - status = Control::validate_arch_supported(device); - if (HAILO_SUCCESS != status) { - goto exit; - } - /* Validate arguments */ CHECK(CONTROL_PROTOCOL__MAX_NUMBER_OF_POWER_MEASUREMETS > buffer_index, HAILO_INVALID_ARGUMENT, "Invalid power measurement index {}", buffer_index); @@ -1305,7 +1221,7 @@ hailo_status Control::get_power_measurement(Device &device, hailo_measurement_bu } /* Parse response */ status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload, - &request); + &request, device); if (HAILO_SUCCESS != status) { goto exit; } @@ -1339,12 +1255,6 @@ hailo_status Control::start_power_measurement(Device &device, CONTROL_PROTOCOL__payload_t *payload = NULL; uint32_t delay_milliseconds = 0; - /* Validate arch */ - status = Control::validate_arch_supported(device); - if (HAILO_SUCCESS != status) { - goto exit; - } - delay_milliseconds = POWER_MEASUREMENT_DELAY_MS(sampling_period, averaging_factor); // There is no logical way that measurement delay can be 0 - because sampling_period and averaging_factor cant be 0 // Hence if it is 0 - it means it was 0.xx and we want to round up to 1 in that case @@ -1366,7 +1276,7 @@ hailo_status Control::start_power_measurement(Device &device, /* Parse response */ status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload, - &request); + &request, device); if (HAILO_SUCCESS != status) { goto exit; } @@ -1387,12 +1297,6 @@ hailo_status Control::stop_power_measurement(Device &device) CONTROL_PROTOCOL__response_header_t *header = NULL; CONTROL_PROTOCOL__payload_t *payload = NULL; - /* Validate arch */ - status = Control::validate_arch_supported(device); - if (HAILO_SUCCESS != status) { - goto exit; - } - common_status = CONTROL_PROTOCOL__pack_stop_power_measurement_request(&request, &request_size, device.get_control_sequence()); status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE; if (HAILO_SUCCESS != status) { @@ -1406,7 +1310,7 @@ hailo_status Control::stop_power_measurement(Device &device) /* Parse response */ status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload, - &request); + &request, device); if (HAILO_SUCCESS != status) { goto exit; } @@ -1432,12 +1336,6 @@ hailo_status Control::i2c_write(Device &device, const hailo_i2c_slave_config_t * CHECK_ARG_NOT_NULL(slave_config); CHECK_ARG_NOT_NULL(data); - /* Validate arch */ - status = Control::validate_arch_supported(device); - if (HAILO_SUCCESS != status) { - goto exit; - } - /* Pack request */ common_status = CONTROL_PROTOCOL__pack_i2c_write_request(&request, &request_size, device.get_control_sequence(), register_address, static_cast(slave_config->endianness), @@ -1455,7 +1353,7 @@ hailo_status Control::i2c_write(Device &device, const hailo_i2c_slave_config_t * /* Parse response */ status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload, - &request); + &request, device); if (HAILO_SUCCESS != status) { goto exit; } @@ -1483,12 +1381,6 @@ hailo_status Control::i2c_read(Device &device, const hailo_i2c_slave_config_t *s CHECK_ARG_NOT_NULL(slave_config); CHECK_ARG_NOT_NULL(data); - /* Validate arch */ - status = Control::validate_arch_supported(device); - if (HAILO_SUCCESS != status) { - goto exit; - } - /* Pack request */ common_status = CONTROL_PROTOCOL__pack_i2c_read_request(&request, &request_size, device.get_control_sequence(), register_address, static_cast(slave_config->endianness), @@ -1507,7 +1399,7 @@ hailo_status Control::i2c_read(Device &device, const hailo_i2c_slave_config_t *s /* Parse response */ status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload, - &request); + &request, device); if (HAILO_SUCCESS != status) { goto exit; } @@ -1546,12 +1438,6 @@ hailo_status Control::config_core_top(Device &device, CONTROL_PROTOCOL__config_c /* Validate arguments */ CHECK_ARG_NOT_NULL(params); - /* Validate arch */ - status = Control::validate_arch_supported(device); - if (HAILO_SUCCESS != status) { - goto exit; - } - common_status = CONTROL_PROTOCOL__pack_config_core_top_request(&request, &request_size, device.get_control_sequence(), config_type, params); status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE; if (HAILO_SUCCESS != status) { @@ -1565,7 +1451,7 @@ hailo_status Control::config_core_top(Device &device, CONTROL_PROTOCOL__config_c /* Parse response */ status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload, - &request); + &request, device); if (HAILO_SUCCESS != status) { goto exit; } @@ -1586,12 +1472,6 @@ hailo_status Control::phy_operation(Device &device, CONTROL_PROTOCOL__phy_operat CONTROL_PROTOCOL__response_header_t *header = NULL; CONTROL_PROTOCOL__payload_t *payload = NULL; - /* Validate arch */ - status = Control::validate_arch_supported(device); - if (HAILO_SUCCESS != status) { - goto exit; - } - common_status = CONTROL_PROTOCOL__pack_phy_operation_request(&request, &request_size, device.get_control_sequence(), operation_type); status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE; if (HAILO_SUCCESS != status) { @@ -1605,7 +1485,7 @@ hailo_status Control::phy_operation(Device &device, CONTROL_PROTOCOL__phy_operat /* Parse response */ status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload, - &request); + &request, device); if (HAILO_SUCCESS != status) { goto exit; } @@ -1630,12 +1510,6 @@ hailo_status Control::examine_user_config(Device &device, hailo_fw_user_config_i /* Validate arguments */ CHECK_ARG_NOT_NULL(info); - /* Validate arch */ - status = Control::validate_arch_supported(device); - if (HAILO_SUCCESS != status) { - goto exit; - } - common_status = CONTROL_PROTOCOL__pack_examine_user_config(&request, &request_size, device.get_control_sequence()); status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE; if (HAILO_SUCCESS != status) { @@ -1649,7 +1523,7 @@ hailo_status Control::examine_user_config(Device &device, hailo_fw_user_config_i /* Parse response */ status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload, - &request); + &request, device); if (HAILO_SUCCESS != status) { goto exit; } @@ -1678,10 +1552,6 @@ hailo_status Control::read_user_config_chunk(Device &device, uint32_t read_offse CONTROL_PROTOCOL__payload_t *payload = NULL; CONTROL_PROTOCOL__read_user_config_response_t *response = NULL; - /* Validate arch */ - status = Control::validate_arch_supported(device); - CHECK_SUCCESS(status); - common_status = CONTROL_PROTOCOL__pack_read_user_config(&request, &request_size, device.get_control_sequence(), read_offset, read_length); status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE; @@ -1693,7 +1563,7 @@ hailo_status Control::read_user_config_chunk(Device &device, uint32_t read_offse /* Parse response */ status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload, - &request); + &request, device); CHECK_SUCCESS(status); response = (CONTROL_PROTOCOL__read_user_config_response_t *)(payload->parameters); @@ -1713,10 +1583,6 @@ hailo_status Control::read_user_config(Device &device, uint8_t *buffer, uint32_t /* Validate arguments */ CHECK_ARG_NOT_NULL(buffer); - /* Validate arch */ - status = Control::validate_arch_supported(device); - CHECK_SUCCESS(status); - status = examine_user_config(device, &user_config_info); CHECK_SUCCESS(status); @@ -1747,10 +1613,6 @@ hailo_status Control::write_user_config_chunk(Device &device, uint32_t offset, c CONTROL_PROTOCOL__response_header_t *header = NULL; CONTROL_PROTOCOL__payload_t *payload = NULL; - /* Validate arch */ - status = Control::validate_arch_supported(device); - CHECK_SUCCESS(status); - common_status = CONTROL_PROTOCOL__pack_write_user_config_request(&request, &request_size, device.get_control_sequence(), offset, data + offset, chunk_size); status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE; @@ -1762,7 +1624,7 @@ hailo_status Control::write_user_config_chunk(Device &device, uint32_t offset, c /* Parse response */ status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload, - &request); + &request, device); CHECK_SUCCESS(status); return HAILO_SUCCESS; @@ -1777,10 +1639,6 @@ hailo_status Control::write_user_config(Device &device, const uint8_t *data, uin /* Validate arguments */ CHECK_ARG_NOT_NULL(data); - /* Validate arch */ - status = Control::validate_arch_supported(device); - CHECK_SUCCESS(status); - while (offset < data_length) { chunk_size = MIN(WRITE_CHUNK_SIZE, (data_length - offset)); status = write_user_config_chunk(device, offset, data, chunk_size); @@ -1802,12 +1660,6 @@ hailo_status Control::erase_user_config(Device &device) CONTROL_PROTOCOL__response_header_t *header = NULL; CONTROL_PROTOCOL__payload_t *payload = NULL; - /* Validate arch */ - status = Control::validate_arch_supported(device); - if (HAILO_SUCCESS != status) { - goto exit; - } - common_status = CONTROL_PROTOCOL__pack_erase_user_config_request(&request, &request_size, device.get_control_sequence()); status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE; if (HAILO_SUCCESS != status) { @@ -1821,7 +1673,7 @@ hailo_status Control::erase_user_config(Device &device) /* Parse response */ status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload, - &request); + &request, device); if (HAILO_SUCCESS != status) { goto exit; } @@ -1849,10 +1701,6 @@ hailo_status Control::read_board_config(Device &device, uint8_t *buffer, uint32_ /* Validate arguments */ CHECK_ARG_NOT_NULL(buffer); - /* Validate arch */ - status = Control::validate_arch_supported(device); - CHECK_SUCCESS(status); - CHECK(buffer_length >= BOARD_CONFIG_SIZE, HAILO_INSUFFICIENT_BUFFER, "read buffer is too small. provided buffer size: {} bytes, board config size: {} bytes", buffer_length, BOARD_CONFIG_SIZE); @@ -1870,7 +1718,7 @@ hailo_status Control::read_board_config(Device &device, uint8_t *buffer, uint32_ /* Parse response */ status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload, - &request); + &request, device); CHECK_SUCCESS(status); response = (CONTROL_PROTOCOL__read_board_config_response_t *)(payload->parameters); actual_read_data_length = BYTE_ORDER__ntohl(response->data_length); @@ -1896,10 +1744,6 @@ hailo_status Control::write_board_config(Device &device, const uint8_t *data, ui /* Validate arguments */ CHECK_ARG_NOT_NULL(data); - /* Validate arch */ - status = Control::validate_arch_supported(device); - CHECK_SUCCESS(status); - CHECK(BOARD_CONFIG_SIZE >= data_length, HAILO_INVALID_OPERATION, "Invalid size of board config. data_length={}, max_size={}" , data_length, BOARD_CONFIG_SIZE); @@ -1914,7 +1758,7 @@ hailo_status Control::write_board_config(Device &device, const uint8_t *data, ui /* Parse response */ status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload, - &request); + &request, device); CHECK_SUCCESS(status); return HAILO_SUCCESS; @@ -1934,12 +1778,6 @@ hailo_status Control::write_second_stage_to_internal_memory(Device &device, uint /* Validate arguments */ CHECK_ARG_NOT_NULL(data); - /* Validate arch */ - status = Control::validate_arch_supported(device); - if (HAILO_SUCCESS != status) { - goto exit; - } - common_status = CONTROL_PROTOCOL__write_second_stage_to_internal_memory_request(&request, &request_size, device.get_control_sequence(), offset, data, data_length); status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE; @@ -1954,7 +1792,7 @@ hailo_status Control::write_second_stage_to_internal_memory(Device &device, uint /* Parse response */ status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload, - &request); + &request, device); if (HAILO_SUCCESS != status) { goto exit; } @@ -1979,12 +1817,6 @@ hailo_status Control::copy_second_stage_to_flash(Device &device, MD5_SUM_t *expe /* Validate arguments */ CHECK_ARG_NOT_NULL(expected_md5); - /* Validate arch */ - status = Control::validate_arch_supported(device); - if (HAILO_SUCCESS != status) { - goto exit; - } - common_status = CONTROL_PROTOCOL__copy_second_stage_to_flash_request(&request, &request_size, device.get_control_sequence(), expected_md5, second_stage_size); status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE; if (HAILO_SUCCESS != status) { @@ -1998,7 +1830,7 @@ hailo_status Control::copy_second_stage_to_flash(Device &device, MD5_SUM_t *expe /* Parse response */ status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload, - &request); + &request, device); if (HAILO_SUCCESS != status) { goto exit; } @@ -2019,12 +1851,6 @@ hailo_status Control::start_firmware_update(Device &device) CONTROL_PROTOCOL__response_header_t *header = NULL; CONTROL_PROTOCOL__payload_t *payload = NULL; - /* Validate arch */ - status = Control::validate_arch_supported(device); - if (HAILO_SUCCESS != status) { - goto exit; - } - common_status = CONTROL_PROTOCOL__pack_start_firmware_update_request(&request, &request_size, device.get_control_sequence()); status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE; if (HAILO_SUCCESS != status) { @@ -2038,7 +1864,7 @@ hailo_status Control::start_firmware_update(Device &device) /* Parse response */ status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload, - &request); + &request, device); if (HAILO_SUCCESS != status) { goto exit; } @@ -2059,12 +1885,6 @@ hailo_status Control::finish_firmware_update(Device &device) CONTROL_PROTOCOL__response_header_t *header = NULL; CONTROL_PROTOCOL__payload_t *payload = NULL; - /* Validate arch */ - status = Control::validate_arch_supported(device); - if (HAILO_SUCCESS != status) { - goto exit; - } - common_status = CONTROL_PROTOCOL__pack_finish_firmware_update_request(&request, &request_size, device.get_control_sequence()); status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE; if (HAILO_SUCCESS != status) { @@ -2078,7 +1898,7 @@ hailo_status Control::finish_firmware_update(Device &device) /* Parse response */ status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload, - &request); + &request, device); if (HAILO_SUCCESS != status) { goto exit; } @@ -2102,12 +1922,6 @@ hailo_status Control::write_firmware_update(Device &device, uint32_t offset, con /* Validate arguments */ CHECK_ARG_NOT_NULL(data); - /* Validate arch */ - status = Control::validate_arch_supported(device); - if (HAILO_SUCCESS != status) { - goto exit; - } - common_status = CONTROL_PROTOCOL__write_firmware_update_request(&request, &request_size, device.get_control_sequence(), offset, data, data_length); status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE; @@ -2122,7 +1936,7 @@ hailo_status Control::write_firmware_update(Device &device, uint32_t offset, con /* Parse response */ status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload, - &request); + &request, device); if (HAILO_SUCCESS != status) { goto exit; } @@ -2146,12 +1960,6 @@ hailo_status Control::validate_firmware_update(Device &device, MD5_SUM_t *expect /* Validate arguments */ CHECK_ARG_NOT_NULL(expected_md5); - /* Validate arch */ - status = Control::validate_arch_supported(device); - if (HAILO_SUCCESS != status) { - goto exit; - } - common_status = CONTROL_PROTOCOL__pack_validate_firmware_update_request(&request, &request_size, device.get_control_sequence(), expected_md5, firmware_size); status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE; @@ -2166,7 +1974,7 @@ hailo_status Control::validate_firmware_update(Device &device, MD5_SUM_t *expect /* Parse response */ status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload, - &request); + &request, device); if (HAILO_SUCCESS != status) { goto exit; } @@ -2191,12 +1999,6 @@ hailo_status Control::latency_measurement_read(Device &device, uint32_t *inbound /* Validate arguments */ CHECK_ARG_NOT_NULL(inbound_to_outbound_latency_nsec); - /* Validate arch */ - status = Control::validate_arch_supported(device); - if (HAILO_SUCCESS != status) { - goto exit; - } - common_status = CONTROL_PROTOCOL__pack_latency_measurement_read_request(&request, &request_size, device.get_control_sequence()); status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE; if (HAILO_SUCCESS != status) { @@ -2210,7 +2012,7 @@ hailo_status Control::latency_measurement_read(Device &device, uint32_t *inbound /* Parse response */ status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload, - &request); + &request, device); if (HAILO_SUCCESS != status) { goto exit; } @@ -2236,12 +2038,6 @@ hailo_status Control::latency_measurement_config(Device &device, uint8_t latency CONTROL_PROTOCOL__response_header_t *header = NULL; CONTROL_PROTOCOL__payload_t *payload = NULL; - /* Validate arch */ - status = Control::validate_arch_supported(device); - if (HAILO_SUCCESS != status) { - goto exit; - } - common_status = CONTROL_PROTOCOL__pack_latency_measurement_config_request(&request, &request_size, device.get_control_sequence(), latency_measurement_en, inbound_start_buffer_number, outbound_stop_buffer_number, inbound_stream_index, outbound_stream_index); @@ -2257,7 +2053,7 @@ hailo_status Control::latency_measurement_config(Device &device, uint8_t latency /* Parse response */ status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload, - &request); + &request, device); if (HAILO_SUCCESS != status) { goto exit; } @@ -2286,12 +2082,6 @@ hailo_status Control::sensor_store_config(Device &device, uint32_t is_first, uin CHECK_ARG_NOT_NULL(data); CHECK_ARG_NOT_NULL(config_name); - /* Validate arch */ - status = Control::validate_arch_supported(device); - if (HAILO_SUCCESS != status) { - goto exit; - } - common_status = CONTROL_PROTOCOL__pack_sensor_store_config_request(&request, &request_size, device.get_control_sequence(), is_first, section_index, start_offset, reset_data_size, sensor_type, total_data_size, data, data_length, config_height, config_width, config_fps, config_name_length, config_name); @@ -2308,7 +2098,7 @@ hailo_status Control::sensor_store_config(Device &device, uint32_t is_first, uin /* Parse response */ status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload, - &request); + &request, device); if (HAILO_SUCCESS != status) { goto exit; } @@ -2328,10 +2118,6 @@ hailo_status Control::sensor_set_i2c_bus_index(Device &device, uint32_t sensor_t CONTROL_PROTOCOL__response_header_t *header = NULL; CONTROL_PROTOCOL__payload_t *payload = NULL; - /* Validate arch */ - status = Control::validate_arch_supported(device); - CHECK_SUCCESS(status); - status = CONTROL_PROTOCOL__pack_sensor_set_i2c_bus_index_request(&request, &request_size, device.get_control_sequence(), sensor_type, bus_index); CHECK_SUCCESS(status); @@ -2339,7 +2125,7 @@ hailo_status Control::sensor_set_i2c_bus_index(Device &device, uint32_t sensor_t CHECK_SUCCESS(status); /* Parse response */ - status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload, &request); + status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload, &request, device); CHECK_SUCCESS(status); return HAILO_SUCCESS; @@ -2356,12 +2142,6 @@ hailo_status Control::sensor_load_and_start_config(Device &device, uint32_t sect CONTROL_PROTOCOL__response_header_t *header = NULL; CONTROL_PROTOCOL__payload_t *payload = NULL; - /* Validate arch */ - status = Control::validate_arch_supported(device); - if (HAILO_SUCCESS != status) { - goto exit; - } - common_status = CONTROL_PROTOCOL__pack_sensor_load_and_start_config_request(&request, &request_size, device.get_control_sequence(), section_index); status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE; if (HAILO_SUCCESS != status) { @@ -2375,7 +2155,7 @@ hailo_status Control::sensor_load_and_start_config(Device &device, uint32_t sect /* Parse response */ status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload, - &request); + &request, device); if (HAILO_SUCCESS != status) { goto exit; } @@ -2396,12 +2176,6 @@ hailo_status Control::sensor_reset(Device &device, uint32_t section_index) CONTROL_PROTOCOL__response_header_t *header = NULL; CONTROL_PROTOCOL__payload_t *payload = NULL; - /* Validate arch */ - status = Control::validate_arch_supported(device); - if (HAILO_SUCCESS != status) { - goto exit; - } - common_status = CONTROL_PROTOCOL__pack_sensor_reset_request(&request, &request_size, device.get_control_sequence(), section_index); status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE; if (HAILO_SUCCESS != status) { @@ -2415,7 +2189,7 @@ hailo_status Control::sensor_reset(Device &device, uint32_t section_index) /* Parse response */ status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload, - &request); + &request, device); if (HAILO_SUCCESS != status) { goto exit; } @@ -2437,12 +2211,6 @@ hailo_status Control::sensor_set_generic_i2c_slave(Device &device, uint16_t slav CONTROL_PROTOCOL__response_header_t *header = NULL; CONTROL_PROTOCOL__payload_t *payload = NULL; - /* Validate arch */ - status = Control::validate_arch_supported(device); - if (HAILO_SUCCESS != status) { - goto exit; - } - common_status = CONTROL_PROTOCOL__pack_sensor_set_generic_i2c_slave_request(&request, &request_size, device.get_control_sequence(), slave_address, register_address_size, bus_index, should_hold_bus, endianness); status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE; if (HAILO_SUCCESS != status) { @@ -2456,7 +2224,7 @@ hailo_status Control::sensor_set_generic_i2c_slave(Device &device, uint16_t slav /* Parse response */ status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload, - &request); + &request, device); if (HAILO_SUCCESS != status) { goto exit; } @@ -2484,12 +2252,6 @@ hailo_status Control::sensor_get_config(Device &device, uint32_t section_index, /* Validate arguments */ CHECK_ARG_NOT_NULL(data); - /* Validate arch */ - status = Control::validate_arch_supported(device); - if (HAILO_SUCCESS != status) { - goto exit; - } - common_status = CONTROL_PROTOCOL__pack_sensor_get_config_request(&request, &request_size, device.get_control_sequence(), section_index, offset, data_length); status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE; @@ -2504,7 +2266,7 @@ hailo_status Control::sensor_get_config(Device &device, uint32_t section_index, /* Parse response */ status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload, - &request); + &request, device); if (HAILO_SUCCESS != status) { goto exit; } @@ -2539,12 +2301,6 @@ hailo_status Control::sensor_get_sections_info(Device &device, uint8_t *data) /* Validate arguments */ CHECK_ARG_NOT_NULL(data); - /* Validate arch */ - status = Control::validate_arch_supported(device); - if (HAILO_SUCCESS != status) { - goto exit; - } - common_status = CONTROL_PROTOCOL__pack_sensor_get_sections_info_request(&request, &request_size, device.get_control_sequence()); status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE; @@ -2559,7 +2315,7 @@ hailo_status Control::sensor_get_sections_info(Device &device, uint8_t *data) /* Parse response */ status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload, - &request); + &request, device); if (HAILO_SUCCESS != status) { goto exit; } @@ -2604,7 +2360,7 @@ hailo_status Control::context_switch_set_network_group_header(Device &device, /* Parse response */ status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload, - &request); + &request, device); if (HAILO_SUCCESS != status) { goto exit; } @@ -2640,7 +2396,7 @@ hailo_status Control::context_switch_set_context_info_chunk(Device &device, /* Parse response */ status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload, - &request); + &request, device); if (HAILO_SUCCESS != status) { /* In case of max memory error, add LOGGER ERROR, and set indicative error to the user */ CHECK((CONTEXT_SWITCH_TASK_STATUS_ADD_TRIGGER_FUNCTION_REACHED_FORBIDDEN_MEMORY_SPACE != header->status.major_status), @@ -2679,12 +2435,6 @@ hailo_status Control::idle_time_get_measurement(Device &device, uint64_t *measur /* Validate arguments */ CHECK_ARG_NOT_NULL(measurement); - /* Validate arch */ - status = Control::validate_arch_supported(device); - if (HAILO_SUCCESS != status) { - goto exit; - } - common_status = CONTROL_PROTOCOL__pack_idle_time_get_measuremment_request(&request, &request_size, device.get_control_sequence()); status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE; if (HAILO_SUCCESS != status) { @@ -2700,7 +2450,7 @@ hailo_status Control::idle_time_get_measurement(Device &device, uint64_t *measur /* Parse response */ status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload, - &request); + &request, device); if (HAILO_SUCCESS != status) { LOGGER__ERROR("failed validating idle_time_get_measurement control response with status {}", status); goto exit; @@ -2732,12 +2482,6 @@ hailo_status Control::idle_time_set_measurement(Device &device, uint8_t measurem CONTROL_PROTOCOL__response_header_t *header = NULL; CONTROL_PROTOCOL__payload_t *payload = NULL; - /* Validate arch */ - status = Control::validate_arch_supported(device); - if (HAILO_SUCCESS != status) { - goto exit; - } - common_status = CONTROL_PROTOCOL__pack_idle_time_set_measuremment_request(&request, &request_size, device.get_control_sequence(), measurement_enable); status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE; if (HAILO_SUCCESS != status) { @@ -2752,7 +2496,7 @@ hailo_status Control::idle_time_set_measurement(Device &device, uint8_t measurem /* Parse response */ status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload, - &request); + &request, device); if (HAILO_SUCCESS != status) { LOGGER__ERROR("failed idle_time_set_measurement control with status {}", status); goto exit; @@ -2767,13 +2511,9 @@ hailo_status Control::set_pause_frames(Device &device, uint8_t rx_pause_frames_e CONTROL_PROTOCOL__request_t request = {}; size_t request_size = 0; - /* Validate arch */ - auto status = Control::validate_arch_supported(device); - CHECK_SUCCESS(status); - HAILO_COMMON_STATUS_t common_status = CONTROL_PROTOCOL__pack_set_pause_frames_request(&request, &request_size, device.get_control_sequence(), rx_pause_frames_enable); - status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE; + auto status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE; CHECK_SUCCESS(status); uint8_t response_buffer[RESPONSE_MAX_BUFFER_SIZE] = {}; @@ -2785,7 +2525,7 @@ hailo_status Control::set_pause_frames(Device &device, uint8_t rx_pause_frames_e CONTROL_PROTOCOL__response_header_t *header = NULL; CONTROL_PROTOCOL__payload_t *payload = NULL; status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload, - &request); + &request, device); CHECK_SUCCESS(status); return HAILO_SUCCESS; @@ -2826,7 +2566,7 @@ hailo_status Control::download_context_action_list_chunk(Device &device, uint32_ /* Parse response */ status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload, - &request); + &request, device); if (HAILO_SUCCESS != status) { goto exit; } @@ -2905,7 +2645,8 @@ hailo_status Control::download_context_action_list(Device &device, uint32_t netw hailo_status Control::change_context_switch_status(Device &device, CONTROL_PROTOCOL__CONTEXT_SWITCH_STATUS_t state_machine_status, - uint8_t network_group_index, uint16_t dynamic_batch_size, bool keep_nn_config_during_reset) + uint8_t network_group_index, uint16_t dynamic_batch_size, uint16_t batch_count, + bool keep_nn_config_during_reset) { hailo_status status = HAILO_UNINITIALIZED; HAILO_COMMON_STATUS_t common_status = HAILO_COMMON_STATUS__UNINITIALIZED; @@ -2918,7 +2659,7 @@ hailo_status Control::change_context_switch_status(Device &device, common_status = CONTROL_PROTOCOL__pack_change_context_switch_status_request(&request, &request_size, device.get_control_sequence(), state_machine_status, network_group_index, dynamic_batch_size, - keep_nn_config_during_reset); + batch_count, keep_nn_config_during_reset); status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE; if (HAILO_SUCCESS != status) { goto exit; @@ -2931,7 +2672,7 @@ hailo_status Control::change_context_switch_status(Device &device, /* Parse response */ status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload, - &request); + &request, device); if (HAILO_SUCCESS != status) { goto exit; } @@ -2941,19 +2682,20 @@ exit: return status; } -hailo_status Control::enable_core_op(Device &device, uint8_t network_group_index, uint16_t dynamic_batch_size) +hailo_status Control::enable_core_op(Device &device, uint8_t network_group_index, uint16_t dynamic_batch_size, + uint16_t batch_count) { - static const auto REMOVE_NN_CONFIG_DURING_RESET = false; return Control::change_context_switch_status(device, CONTROL_PROTOCOL__CONTEXT_SWITCH_STATUS_ENABLED, - network_group_index, dynamic_batch_size, REMOVE_NN_CONFIG_DURING_RESET); + network_group_index, dynamic_batch_size, batch_count); } hailo_status Control::reset_context_switch_state_machine(Device &device, bool keep_nn_config_during_reset) { static const auto IGNORE_NETWORK_GROUP_INDEX = 0; static const auto IGNORE_DYNAMIC_BATCH_SIZE = 0; + static const auto DEFAULT_BATCH_COUNT = 0; return Control::change_context_switch_status(device, CONTROL_PROTOCOL__CONTEXT_SWITCH_STATUS_RESET, - IGNORE_NETWORK_GROUP_INDEX, IGNORE_DYNAMIC_BATCH_SIZE, keep_nn_config_during_reset); + IGNORE_NETWORK_GROUP_INDEX, IGNORE_DYNAMIC_BATCH_SIZE, DEFAULT_BATCH_COUNT, keep_nn_config_during_reset); } hailo_status Control::wd_enable(Device &device, uint8_t cpu_id, bool should_enable) @@ -2967,12 +2709,6 @@ hailo_status Control::wd_enable(Device &device, uint8_t cpu_id, bool should_enab CONTROL_PROTOCOL__response_header_t *header = NULL; CONTROL_PROTOCOL__payload_t *payload = NULL; - /* Validate arch */ - status = Control::validate_arch_supported(device); - if (HAILO_SUCCESS != status) { - goto exit; - } - common_status = CONTROL_PROTOCOL__pack_wd_enable(&request, &request_size, device.get_control_sequence(), cpu_id, should_enable); status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE; if (HAILO_SUCCESS != status) { @@ -2987,7 +2723,7 @@ hailo_status Control::wd_enable(Device &device, uint8_t cpu_id, bool should_enab /* Parse response */ status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload, - &request); + &request, device); if (HAILO_SUCCESS != status) { LOGGER__ERROR("failed wd_enable control with status {}", status); goto exit; @@ -3008,12 +2744,6 @@ hailo_status Control::wd_config(Device &device, uint8_t cpu_id, uint32_t wd_cycl CONTROL_PROTOCOL__response_header_t *header = NULL; CONTROL_PROTOCOL__payload_t *payload = NULL; - /* Validate arch */ - status = Control::validate_arch_supported(device); - if (HAILO_SUCCESS != status) { - goto exit; - } - common_status = CONTROL_PROTOCOL__pack_wd_config(&request, &request_size, device.get_control_sequence(), cpu_id, wd_cycles, wd_mode); status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE; if (HAILO_SUCCESS != status) { @@ -3028,7 +2758,7 @@ hailo_status Control::wd_config(Device &device, uint8_t cpu_id, uint32_t wd_cycl /* Parse response */ status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload, - &request); + &request, device); if (HAILO_SUCCESS != status) { LOGGER__ERROR("failed wd_config control with status {}", status); goto exit; @@ -3053,12 +2783,6 @@ hailo_status Control::previous_system_state(Device &device, uint8_t cpu_id, CONT CHECK_ARG_NOT_NULL(system); - /* Validate arch */ - status = Control::validate_arch_supported(device); - if (HAILO_SUCCESS != status) { - goto exit; - } - common_status = CONTROL_PROTOCOL__pack_previous_system_state(&request, &request_size, device.get_control_sequence(), cpu_id); status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE; if (HAILO_SUCCESS != status) { @@ -3073,7 +2797,7 @@ hailo_status Control::previous_system_state(Device &device, uint8_t cpu_id, CONT /* Parse response */ status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload, - &request); + &request, device); if (HAILO_SUCCESS != status) { LOGGER__ERROR("failed previous_system_state control with status {}", status); goto exit; @@ -3115,7 +2839,7 @@ hailo_status Control::set_dataflow_interrupt(Device &device, uint8_t interrupt_t /* Parse response */ status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload, - &request); + &request, device); if (HAILO_SUCCESS != status) { goto exit; } @@ -3152,7 +2876,7 @@ hailo_status Control::d2h_notification_manager_set_host_info(Device &device, uin /* Parse response */ status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload, - &request); + &request, device); if (HAILO_SUCCESS != status) { goto exit; } @@ -3186,7 +2910,7 @@ hailo_status Control::d2h_notification_manager_send_host_info_notification(Devic /* Parse response */ status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload, - &request); + &request, device); if (HAILO_SUCCESS != status) { goto exit; } @@ -3223,7 +2947,7 @@ hailo_status Control::clear_configured_apps(Device &device) } /* Parse response */ - status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload, &request); + status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload, &request, device); if (HAILO_SUCCESS != status) { LOGGER__ERROR("failed clear_configured_apps control with status {}", status); goto exit; @@ -3246,12 +2970,6 @@ hailo_status Control::get_chip_temperature(Device &device, hailo_chip_temperatur CONTROL_PROTOCOL__payload_t *payload = NULL; CONTROL_PROTOCOL__get_chip_temperature_response_t* temps = NULL; - /* Validate arch */ - status = Control::validate_arch_supported(device); - if (HAILO_SUCCESS != status) { - goto exit; - } - common_status = CONTROL_PROTOCOL__pack_get_chip_temperature_request(&request, &request_size, device.get_control_sequence()); status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE; if (HAILO_SUCCESS != status) { @@ -3265,7 +2983,7 @@ hailo_status Control::get_chip_temperature(Device &device, hailo_chip_temperatur /* Parse response */ status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload, - &request); + &request, device); if (HAILO_SUCCESS != status) { goto exit; } @@ -3291,12 +3009,6 @@ hailo_status Control::enable_debugging(Device &device, bool is_rma) CONTROL_PROTOCOL__response_header_t *header = NULL; CONTROL_PROTOCOL__payload_t *payload = NULL; - /* Validate arch */ - status = Control::validate_arch_supported(device); - if (HAILO_SUCCESS != status) { - goto exit; - } - common_status = CONTROL_PROTOCOL__pack_enable_debugging_request(&request, &request_size, device.get_control_sequence(), is_rma); status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE; if (HAILO_SUCCESS != status) { @@ -3310,7 +3022,7 @@ hailo_status Control::enable_debugging(Device &device, bool is_rma) /* Parse response */ status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload, - &request); + &request, device); if (HAILO_SUCCESS != status) { goto exit; } @@ -3331,12 +3043,6 @@ Expected Control:: CONTROL_PROTOCOL__response_header_t *header = NULL; CONTROL_PROTOCOL__payload_t *payload = NULL; - /* Validate arguments */ - - /* Validate arch */ - status = Control::validate_arch_supported(device); - CHECK_SUCCESS_AS_EXPECTED(status); - common_status = CONTROL_PROTOCOL__pack_get_extended_device_information_request(&request, &request_size, device.get_control_sequence()); status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE; CHECK_SUCCESS_AS_EXPECTED(status); @@ -3345,7 +3051,7 @@ Expected Control:: CHECK_SUCCESS_AS_EXPECTED(status); /* Parse response */ - status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload, &request); + status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload, &request, device); CHECK_SUCCESS_AS_EXPECTED(status); return std::move(*(CONTROL_PROTOCOL__get_extended_device_information_response_t *)(payload->parameters)); @@ -3383,12 +3089,6 @@ Expected Control::get_health_information(Device &device) CONTROL_PROTOCOL__payload_t *payload = NULL; CONTROL_PROTOCOL__get_health_information_response_t *get_health_information_response = NULL; - /* Validate arguments */ - - /* Validate arch */ - status = Control::validate_arch_supported(device); - CHECK_SUCCESS_AS_EXPECTED(status); - common_status = CONTROL_PROTOCOL__pack_get_health_information_request(&request, &request_size, device.get_control_sequence()); status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE; CHECK_SUCCESS_AS_EXPECTED(status); @@ -3397,7 +3097,8 @@ Expected Control::get_health_information(Device &device) CHECK_SUCCESS_AS_EXPECTED(status); /* Parse response */ - status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload, &request); + status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload, &request, + device); CHECK_SUCCESS_AS_EXPECTED(status); get_health_information_response = (CONTROL_PROTOCOL__get_health_information_response_t *)(payload->parameters); @@ -3428,7 +3129,7 @@ hailo_status Control::config_context_switch_breakpoint(Device &device, uint8_t b /* Parse response */ status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload, - &request); + &request, device); if (HAILO_SUCCESS != status) { goto exit; } @@ -3463,7 +3164,7 @@ hailo_status Control::get_context_switch_breakpoint_status(Device &device, uint8 /* Parse response */ status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload, - &request); + &request, device); if (HAILO_SUCCESS != status) { goto exit; } @@ -3503,7 +3204,7 @@ hailo_status Control::get_context_switch_main_header(Device &device, CONTROL_PRO /* Parse response */ status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload, - &request); + &request, device); if (HAILO_SUCCESS != status) { goto exit; } @@ -3539,7 +3240,7 @@ hailo_status Control::config_context_switch_timestamp(Device &device, uint16_t b /* Parse response */ status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload, - &request); + &request, device); CHECK_SUCCESS(status); return HAILO_SUCCESS; @@ -3598,14 +3299,10 @@ hailo_status Control::run_bist_test(Device &device, bool is_top_test, uint32_t t CONTROL_PROTOCOL__response_header_t *header = NULL; CONTROL_PROTOCOL__payload_t *payload = NULL; - /* Validate arch */ - auto status = Control::validate_arch_supported(device); - CHECK_SUCCESS(status); - auto common_status = CONTROL_PROTOCOL__pack_run_bist_test_request( &request, &request_size, device.get_control_sequence(), is_top_test, top_bypass_bitmap, cluster_index, cluster_bypass_bitmap_0, cluster_bypass_bitmap_1); - status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE; + auto status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE; CHECK_SUCCESS(status); status = device.fw_interact((uint8_t*)(&request), request_size, (uint8_t*)&response_buffer, &response_size); @@ -3613,7 +3310,7 @@ hailo_status Control::run_bist_test(Device &device, bool is_top_test, uint32_t t /* Parse response */ status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload, - &request); + &request, device); CHECK_SUCCESS(status); return HAILO_SUCCESS; @@ -3628,13 +3325,9 @@ hailo_status Control::set_sleep_state(Device &device, hailo_sleep_state_t sleep_ CONTROL_PROTOCOL__response_header_t *header = NULL; CONTROL_PROTOCOL__payload_t *payload = NULL; - /* Validate arch */ - auto status = Control::validate_arch_supported(device); - CHECK_SUCCESS(status); - auto common_status = CONTROL_PROTOCOL__pack_set_sleep_state_request( &request, &request_size, device.get_control_sequence(), static_cast(sleep_state)); - status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE; + auto status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE; CHECK_SUCCESS(status); status = device.fw_interact((uint8_t*)(&request), request_size, (uint8_t*)&response_buffer, &response_size); @@ -3642,14 +3335,14 @@ hailo_status Control::set_sleep_state(Device &device, hailo_sleep_state_t sleep_ /* Parse response */ status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload, - &request); + &request, device); CHECK_SUCCESS(status); return HAILO_SUCCESS; } hailo_status Control::change_hw_infer_status(Device &device, CONTROL_PROTOCOL__hw_infer_state_t state, - uint8_t network_group_index, uint16_t dynamic_batch_size, + uint8_t network_group_index, uint16_t dynamic_batch_size, uint16_t batch_count, CONTROL_PROTOCOL__hw_infer_channels_info_t *channels_info, CONTROL_PROTOCOL__hw_only_infer_results_t *results) { CONTROL_PROTOCOL__request_t request = {}; @@ -3664,7 +3357,7 @@ hailo_status Control::change_hw_infer_status(Device &device, CONTROL_PROTOCOL__h auto common_status = CONTROL_PROTOCOL__pack_change_hw_infer_status_request( &request, &request_size, device.get_control_sequence(), static_cast(state), - network_group_index, dynamic_batch_size, channels_info); + network_group_index, dynamic_batch_size, batch_count, channels_info); auto status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE; CHECK_SUCCESS(status); @@ -3673,7 +3366,7 @@ hailo_status Control::change_hw_infer_status(Device &device, CONTROL_PROTOCOL__h /* Parse response */ status = parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload, - &request); + &request, device); CHECK_SUCCESS(status); change_hw_infer_status_response = (CONTROL_PROTOCOL__change_hw_infer_status_response_t *)(payload->parameters); @@ -3684,20 +3377,21 @@ hailo_status Control::change_hw_infer_status(Device &device, CONTROL_PROTOCOL__h } hailo_status Control::start_hw_only_infer(Device &device, uint8_t network_group_index, uint16_t dynamic_batch_size, - CONTROL_PROTOCOL__hw_infer_channels_info_t *channels_info) + uint16_t batch_count, CONTROL_PROTOCOL__hw_infer_channels_info_t *channels_info) { CONTROL_PROTOCOL__hw_only_infer_results_t results = {}; return Control::change_hw_infer_status(device, CONTROL_PROTOCOL__HW_INFER_STATE_START, - network_group_index, dynamic_batch_size, channels_info ,&results); + network_group_index, dynamic_batch_size, batch_count, channels_info ,&results); } hailo_status Control::stop_hw_only_infer(Device &device, CONTROL_PROTOCOL__hw_only_infer_results_t *results) { const uint8_t DEFAULT_NETWORK_GROUP = 0; const uint16_t DEFAULT_DYNAMIC_BATCH_SIZE = 1; + const uint16_t DEFAULT_BATCH_COUNT = 1; CONTROL_PROTOCOL__hw_infer_channels_info_t channels_info_default = {}; return Control::change_hw_infer_status(device, CONTROL_PROTOCOL__HW_INFER_STATE_STOP, - DEFAULT_NETWORK_GROUP, DEFAULT_DYNAMIC_BATCH_SIZE, &channels_info_default, results); + DEFAULT_NETWORK_GROUP, DEFAULT_DYNAMIC_BATCH_SIZE, DEFAULT_BATCH_COUNT, &channels_info_default, results); } } /* namespace hailort */ diff --git a/hailort/libhailort/src/device_common/control.hpp b/hailort/libhailort/src/device_common/control.hpp index d79ad85..aa65dd5 100644 --- a/hailort/libhailort/src/device_common/control.hpp +++ b/hailort/libhailort/src/device_common/control.hpp @@ -42,7 +42,7 @@ public: static hailo_status parse_and_validate_response(uint8_t *message, uint32_t message_size, CONTROL_PROTOCOL__response_header_t **header, CONTROL_PROTOCOL__payload_t **payload, - CONTROL_PROTOCOL__request_t *request); + CONTROL_PROTOCOL__request_t *request, Device &device); /** * Receive information about the device. @@ -288,11 +288,14 @@ public: * Enable core-op * * @param[in] device - The Hailo device. - * @param[in] core_op_index - core_op index + * @param[in] core_op_index - core_op index + * @param[in] dynamic_batch_size - actual batch size + * @param[in] batch_count - number of batches user wish to run on hailo chip * * @return Upon success, returns @a HAILO_SUCCESS. Otherwise, returns an @a static hailo_status error. */ - static hailo_status enable_core_op(Device &device, uint8_t core_op_index, uint16_t dynamic_batch_size); + static hailo_status enable_core_op(Device &device, uint8_t core_op_index, uint16_t dynamic_batch_size, + uint16_t batch_count); /** * reset context switch state machine * @@ -373,10 +376,10 @@ public: static Expected get_hw_consts(Device &device); static hailo_status set_sleep_state(Device &device, hailo_sleep_state_t sleep_state); static hailo_status change_hw_infer_status(Device &device, CONTROL_PROTOCOL__hw_infer_state_t state, - uint8_t network_group_index, uint16_t dynamic_batch_size, + uint8_t network_group_index, uint16_t dynamic_batch_size, uint16_t batch_count, CONTROL_PROTOCOL__hw_infer_channels_info_t *channels_info, CONTROL_PROTOCOL__hw_only_infer_results_t *results); static hailo_status start_hw_only_infer(Device &device, uint8_t network_group_index, uint16_t dynamic_batch_size, - CONTROL_PROTOCOL__hw_infer_channels_info_t *channels_info); + uint16_t batch_count, CONTROL_PROTOCOL__hw_infer_channels_info_t *channels_info); static hailo_status stop_hw_only_infer(Device &device, CONTROL_PROTOCOL__hw_only_infer_results_t *results); // TODO: needed? static hailo_status power_measurement(Device &device, CONTROL_PROTOCOL__dvm_options_t dvm, @@ -403,11 +406,11 @@ private: bool *is_action_list_end, uint32_t *batch_counter); static hailo_status context_switch_set_context_info_chunk(Device &device, const CONTROL_PROTOCOL__context_switch_context_info_single_control_t &context_info); - static hailo_status change_context_switch_status(Device &device, + static hailo_status change_context_switch_status(Device &device, CONTROL_PROTOCOL__CONTEXT_SWITCH_STATUS_t state_machine_status, - uint8_t network_group_index, uint16_t dynamic_batch_size, bool keep_nn_config_during_reset); + uint8_t network_group_index, uint16_t dynamic_batch_size, uint16_t batch_count, + bool keep_nn_config_during_reset = false); static Expected get_extended_device_info_response(Device &device); - static hailo_status validate_arch_supported(Device &device, const std::vector &supported_archs = { HAILO_ARCH_HAILO8, HAILO_ARCH_HAILO8L }); }; } /* namespace hailort */ diff --git a/hailort/libhailort/src/device_common/control_protocol.cpp b/hailort/libhailort/src/device_common/control_protocol.cpp index 92412bb..1c5b38c 100644 --- a/hailort/libhailort/src/device_common/control_protocol.cpp +++ b/hailort/libhailort/src/device_common/control_protocol.cpp @@ -57,7 +57,7 @@ const char *CONTROL_PROTOCOL__get_textual_opcode(CONTROL_PROTOCOL__OPCODE_t opco return CONTROL_PROTOCOL__textual_format[opcode]; } -#define CHANGE_HW_INFER_REQUEST_PARAMETER_COUNT (4) +#define CHANGE_HW_INFER_REQUEST_PARAMETER_COUNT (5) /* Functions declarations */ HAILO_COMMON_STATUS_t control_protocol__parse_message(uint8_t *message, @@ -1810,10 +1810,11 @@ exit: return status; } +#define CONTEXT_SWITCH_SWITCH_STATUS_REQUEST_PARAMS (5) HAILO_COMMON_STATUS_t CONTROL_PROTOCOL__pack_change_context_switch_status_request( CONTROL_PROTOCOL__request_t *request, size_t *request_size, uint32_t sequence, CONTROL_PROTOCOL__CONTEXT_SWITCH_STATUS_t state_machine_status, uint8_t application_index, - uint16_t dynamic_batch_size, bool keep_nn_config_during_reset) + uint16_t dynamic_batch_size, uint16_t batch_count, bool keep_nn_config_during_reset) { HAILO_COMMON_STATUS_t status = HAILO_COMMON_STATUS__UNINITIALIZED; size_t local_request_size = 0; @@ -1826,7 +1827,8 @@ HAILO_COMMON_STATUS_t CONTROL_PROTOCOL__pack_change_context_switch_status_reques /* Header */ local_request_size = CONTROL_PROTOCOL__REQUEST_BASE_SIZE + sizeof(CONTROL_PROTOCOL__change_context_switch_status_request_t); - control_protocol__pack_request_header(request, sequence, HAILO_CONTROL_OPCODE_CHANGE_CONTEXT_SWITCH_STATUS, 4); + control_protocol__pack_request_header(request, sequence, + HAILO_CONTROL_OPCODE_CHANGE_CONTEXT_SWITCH_STATUS, CONTEXT_SWITCH_SWITCH_STATUS_REQUEST_PARAMS); /* state_machine_status */ request->parameters.change_context_switch_status_request.state_machine_status_length = @@ -1844,8 +1846,13 @@ HAILO_COMMON_STATUS_t CONTROL_PROTOCOL__pack_change_context_switch_status_reques request->parameters.change_context_switch_status_request.dynamic_batch_size_length = BYTE_ORDER__htonl(sizeof(request->parameters.change_context_switch_status_request.dynamic_batch_size)); request->parameters.change_context_switch_status_request.dynamic_batch_size = dynamic_batch_size; - - /* dynamic_batch_size */ + + /* batch_count */ + request->parameters.change_context_switch_status_request.batch_count_length = + BYTE_ORDER__htonl(sizeof(request->parameters.change_context_switch_status_request.batch_count)); + request->parameters.change_context_switch_status_request.batch_count = batch_count; + + /* keep_nn_config_during_reset */ request->parameters.change_context_switch_status_request.keep_nn_config_during_reset_length = BYTE_ORDER__htonl(sizeof(request->parameters.change_context_switch_status_request.keep_nn_config_during_reset)); request->parameters.change_context_switch_status_request.keep_nn_config_during_reset = keep_nn_config_during_reset; @@ -2392,7 +2399,7 @@ exit: HAILO_COMMON_STATUS_t CONTROL_PROTOCOL__pack_change_hw_infer_status_request( CONTROL_PROTOCOL__request_t *request, size_t *request_size, uint32_t sequence, uint8_t hw_infer_state, uint8_t network_group_index, uint16_t dynamic_batch_size, - CONTROL_PROTOCOL__hw_infer_channels_info_t *channels_info) + uint16_t batch_count, CONTROL_PROTOCOL__hw_infer_channels_info_t *channels_info) { HAILO_COMMON_STATUS_t status = HAILO_COMMON_STATUS__UNINITIALIZED; size_t local_request_size = 0; @@ -2423,6 +2430,11 @@ HAILO_COMMON_STATUS_t CONTROL_PROTOCOL__pack_change_hw_infer_status_request( BYTE_ORDER__htonl(sizeof(request->parameters.change_hw_infer_status_request.dynamic_batch_size)); request->parameters.change_hw_infer_status_request.dynamic_batch_size = dynamic_batch_size; + /* batch_count */ + request->parameters.change_hw_infer_status_request.batch_count_length = + BYTE_ORDER__htonl(sizeof(request->parameters.change_hw_infer_status_request.batch_count)); + request->parameters.change_hw_infer_status_request.batch_count = batch_count; + /* channels_info */ request->parameters.change_hw_infer_status_request.channels_info_length = BYTE_ORDER__htonl(sizeof(request->parameters.change_hw_infer_status_request.channels_info)); diff --git a/hailort/libhailort/src/device_common/control_protocol.hpp b/hailort/libhailort/src/device_common/control_protocol.hpp index 544f4e2..ade0260 100644 --- a/hailort/libhailort/src/device_common/control_protocol.hpp +++ b/hailort/libhailort/src/device_common/control_protocol.hpp @@ -106,7 +106,7 @@ HAILO_COMMON_STATUS_t CONTROL_PROTOCOL__pack_download_context_action_list_reques HAILO_COMMON_STATUS_t CONTROL_PROTOCOL__pack_change_context_switch_status_request( CONTROL_PROTOCOL__request_t *request, size_t *request_size, uint32_t sequence, CONTROL_PROTOCOL__CONTEXT_SWITCH_STATUS_t state_machine_status, uint8_t application_index, - uint16_t dynamic_batch_size, bool keep_nn_config_during_reset); + uint16_t dynamic_batch_size, uint16_t batch_count, bool keep_nn_config_during_reset); HAILO_COMMON_STATUS_t CONTROL_PROTOCOL__pack_wd_enable( CONTROL_PROTOCOL__request_t *request, size_t *request_size, @@ -172,6 +172,6 @@ HAILO_COMMON_STATUS_t CONTROL_PROTOCOL__pack_get_hw_consts_request(CONTROL_PROTO HAILO_COMMON_STATUS_t CONTROL_PROTOCOL__pack_set_sleep_state_request(CONTROL_PROTOCOL__request_t *request, size_t *request_size, uint32_t sequence, uint8_t sleep_state); HAILO_COMMON_STATUS_t CONTROL_PROTOCOL__pack_change_hw_infer_status_request(CONTROL_PROTOCOL__request_t *request, size_t *request_size, uint32_t sequence, uint8_t hw_infer_state, uint8_t network_group_index, - uint16_t dynamic_batch_size, CONTROL_PROTOCOL__hw_infer_channels_info_t *channels_info); + uint16_t dynamic_batch_size, uint16_t batch_count, CONTROL_PROTOCOL__hw_infer_channels_info_t *channels_info); #endif /* _CONTROL_PROTOCOL_HPP_ */ \ No newline at end of file diff --git a/hailort/libhailort/src/device_common/d2h_events_parser.cpp b/hailort/libhailort/src/device_common/d2h_events_parser.cpp index 412e928..53384e2 100644 --- a/hailort/libhailort/src/device_common/d2h_events_parser.cpp +++ b/hailort/libhailort/src/device_common/d2h_events_parser.cpp @@ -43,6 +43,7 @@ static HAILO_COMMON_STATUS_t D2H_EVENTS__parse_health_monitor_cpu_ecc_error_noti static HAILO_COMMON_STATUS_t D2H_EVENTS__parse_health_monitor_cpu_ecc_fatal_notification(D2H_EVENT_MESSAGE_t *d2h_notification_message); static HAILO_COMMON_STATUS_t D2H_EVENTS__parse_context_switch_breakpoint_reached(D2H_EVENT_MESSAGE_t *d2h_notification_message); static HAILO_COMMON_STATUS_t D2H_EVENTS__parse_health_monitor_clock_changed_event_notification(D2H_EVENT_MESSAGE_t *d2h_notification_message); +static HAILO_COMMON_STATUS_t D2H_EVENTS__parse_hw_infer_manager_infer_done_notification(D2H_EVENT_MESSAGE_t *d2h_notification_message); /********************************************************************** * Globals @@ -58,7 +59,8 @@ firmware_notifications_parser_t g_firmware_notifications_parser[D2H_EVENT_ID_COU D2H_EVENTS__parse_health_monitor_cpu_ecc_error_notification, D2H_EVENTS__parse_health_monitor_cpu_ecc_fatal_notification, D2H_EVENTS__parse_context_switch_breakpoint_reached, - D2H_EVENTS__parse_health_monitor_clock_changed_event_notification + D2H_EVENTS__parse_health_monitor_clock_changed_event_notification, + D2H_EVENTS__parse_hw_infer_manager_infer_done_notification }; /********************************************************************** * Internal Functions @@ -176,6 +178,25 @@ l_exit: return status; } +static HAILO_COMMON_STATUS_t D2H_EVENTS__parse_hw_infer_manager_infer_done_notification(D2H_EVENT_MESSAGE_t *d2h_notification_message) +{ + HAILO_COMMON_STATUS_t status = HAILO_COMMON_STATUS__UNINITIALIZED; + + if (D2H_EVENT_HW_INFER_MANAGER_INFER_DONE_PARAMETER_COUNT != d2h_notification_message->header.parameter_count) { + LOGGER__ERROR("d2h notification invalid parameter count: {}", d2h_notification_message->header.parameter_count); + status = HAILO_STATUS__D2H_EVENTS__INCORRECT_PARAMETER_COUNT; + goto l_exit; + } + + LOGGER__INFO("Got hw infer done notification - Infer took {} cycles", + d2h_notification_message->message_parameters.hw_infer_manager_infer_done_event.infer_cycles); + + status = HAILO_COMMON_STATUS__SUCCESS; + +l_exit: + return status; +} + static HAILO_COMMON_STATUS_t D2H_EVENTS__parse_health_monitor_closed_streams_notification(D2H_EVENT_MESSAGE_t *d2h_notification_message) { HAILO_COMMON_STATUS_t status = HAILO_COMMON_STATUS__UNINITIALIZED; diff --git a/hailort/libhailort/src/device_common/device.cpp b/hailort/libhailort/src/device_common/device.cpp index 2043e3e..22bb85a 100644 --- a/hailort/libhailort/src/device_common/device.cpp +++ b/hailort/libhailort/src/device_common/device.cpp @@ -95,9 +95,9 @@ Expected> Device::create() { auto device_ids = scan(); CHECK_EXPECTED(device_ids, "Failed scan devices"); - CHECK_AS_EXPECTED(device_ids->size() == 1, HAILO_INVALID_OPERATION, - "Expected only 1 device on the system (found {}). Pass device_id to create a specific device", device_ids->size()); + CHECK_AS_EXPECTED(device_ids->size() >= 1, HAILO_INVALID_OPERATION, "There is no hailo device on the system"); + // Choose the first device. return Device::create(device_ids->at(0)); } @@ -155,6 +155,31 @@ Expected> Device::create_eth(const std::string &ip_addr) return device; } +Expected> Device::create_eth(const std::string &device_address, uint16_t port, + uint32_t timeout_milliseconds, uint8_t max_number_of_attempts) +{ + /* Validate address length */ + CHECK_AS_EXPECTED(INET_ADDRSTRLEN >= device_address.size(), + HAILO_INVALID_ARGUMENT, "device_address is too long"); + + hailo_eth_device_info_t device_info = {}; + device_info.host_address.sin_family = AF_INET; + device_info.host_address.sin_port = HAILO_ETH_PORT_ANY; + auto status = Socket::pton(AF_INET, HAILO_ETH_ADDRESS_ANY, &(device_info.host_address.sin_addr)); + CHECK_SUCCESS_AS_EXPECTED(status); + + device_info.device_address.sin_family = AF_INET; + device_info.device_address.sin_port = port; + status = Socket::pton(AF_INET, device_address.c_str(), &(device_info.device_address.sin_addr)); + CHECK_SUCCESS_AS_EXPECTED(status); + + device_info.timeout_millis = timeout_milliseconds; + device_info.max_number_of_attempts = max_number_of_attempts; + device_info.max_payload_size = HAILO_DEFAULT_ETH_MAX_PAYLOAD_SIZE; + + return create_eth(device_info); +} + Expected Device::parse_pcie_device_info(const std::string &device_info_str) { const bool LOG_ON_FAILURE = true; @@ -184,6 +209,28 @@ Expected Device::get_device_type(const std::string &device_id) } } +bool Device::device_ids_equal(const std::string &first, const std::string &second) +{ + const bool DONT_LOG_ON_FAILURE = false; + if (IntegratedDevice::DEVICE_ID == first) { + // On integrated devices device all ids should be the same + return first == second; + } else if (auto first_pcie_info = PcieDevice::parse_pcie_device_info(first, DONT_LOG_ON_FAILURE)) { + auto second_pcie_info = PcieDevice::parse_pcie_device_info(second, DONT_LOG_ON_FAILURE); + if (!second_pcie_info) { + // second is not pcie + return false; + } + return PcieDevice::pcie_device_infos_equal(*first_pcie_info, *second_pcie_info); + } else if (auto eth_info = EthernetDevice::parse_eth_device_info(first, DONT_LOG_ON_FAILURE)) { + // On ethernet devices, device ids should e equal + return first == second; + } else { + // first device does not match. + return false; + } +} + uint32_t Device::get_control_sequence() { return m_control_sequence; diff --git a/hailort/libhailort/src/device_common/device_internal.cpp b/hailort/libhailort/src/device_common/device_internal.cpp index 5fd1ea8..3045ded 100644 --- a/hailort/libhailort/src/device_common/device_internal.cpp +++ b/hailort/libhailort/src/device_common/device_internal.cpp @@ -570,8 +570,6 @@ void DeviceBase::d2h_notification_thread_main(const std::string &device_id) continue; } - LOGGER__INFO("[{}] Got notification from fw with id: {}", device_id, hailo_notification_id); - std::shared_ptr callback_func = nullptr; void *callback_opaque = nullptr; { @@ -665,6 +663,9 @@ hailo_status DeviceBase::fw_notification_id_to_hailo(D2H_EVENT_ID_t fw_notificat case HEALTH_MONITOR_CLOCK_CHANGED_EVENT_ID: *hailo_notification_id = HAILO_NOTIFICATION_ID_HEALTH_MONITOR_CLOCK_CHANGED_EVENT; break; + case HW_INFER_MANAGER_INFER_DONE: + *hailo_notification_id = HAILO_NOTIFICATION_ID_HW_INFER_MANAGER_INFER_DONE; + break; default: status = HAILO_INVALID_ARGUMENT; goto l_exit; diff --git a/hailort/libhailort/src/device_common/device_internal.hpp b/hailort/libhailort/src/device_common/device_internal.hpp index 58a51ad..8ffe767 100644 --- a/hailort/libhailort/src/device_common/device_internal.hpp +++ b/hailort/libhailort/src/device_common/device_internal.hpp @@ -83,6 +83,14 @@ public: virtual hailo_status erase_user_config() override; static hailo_device_architecture_t hef_arch_to_device_arch(ProtoHEFHwArch hef_arch); + virtual Expected get_architecture() const override + { + // FW is always up if we got here (device implementations's ctor would fail otherwise) + // Hence, just return it + return Expected(m_device_architecture); + } + + protected: struct NotificationThreadSharedParams { NotificationThreadSharedParams() : is_running(false) {} diff --git a/hailort/libhailort/src/eth/eth_device.cpp b/hailort/libhailort/src/eth/eth_device.cpp index 32f955d..9b4eeca 100644 --- a/hailort/libhailort/src/eth/eth_device.cpp +++ b/hailort/libhailort/src/eth/eth_device.cpp @@ -79,7 +79,8 @@ hailo_status EthernetDevice::wait_for_wakeup() CHECK_SUCCESS(status); /* Parse and validate the response */ - return Control::parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload, &request); + return Control::parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, &payload, &request, + *this); } Expected> EthernetDevice::create(const hailo_eth_device_info_t &device_info) @@ -213,11 +214,10 @@ Expected> EthernetDevice::scan(const std::s std::chrono::milliseconds timeout) { // Convert interface name to IP address - std::array interface_ip_address{}; - auto status = EthernetUtils::get_ip_from_interface(interface_name.c_str(), interface_ip_address.data(), interface_ip_address.size()); - CHECK_SUCCESS_AS_EXPECTED(status); + auto interface_ip_address = EthernetUtils::get_ip_from_interface(interface_name); + CHECK_EXPECTED(interface_ip_address); - return scan_by_host_address(interface_ip_address.data(), timeout); + return scan_by_host_address(*interface_ip_address, timeout); } hailo_status get_udp_broadcast_params(const char *host_address, struct in_addr &interface_ip_address, @@ -348,7 +348,7 @@ hailo_status EthernetDevice::reset_impl(CONTROL_PROTOCOL__reset_type_t reset_typ // TODO: fix logic with respect to is_expecting_response if (0 != response_size) { status = Control::parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, - &payload, &request); + &payload, &request, *this); CHECK_SUCCESS(status); CHECK(is_expecting_response, HAILO_INTERNAL_FAILURE, "Recived valid response from FW for control who is not expecting one."); @@ -361,13 +361,6 @@ hailo_status EthernetDevice::reset_impl(CONTROL_PROTOCOL__reset_type_t reset_typ return HAILO_SUCCESS; } -Expected EthernetDevice::get_architecture() const -{ - // FW is always up if we got here (EthernetDevice's ctor would fail otherwise) - // Hence, just return it - return Expected(m_device_architecture); -} - hailo_eth_device_info_t EthernetDevice::get_device_info() const { return m_device_info; @@ -438,11 +431,9 @@ Expected EthernetDevice::create_networks_group_vec auto core_op_metadata = hef.pimpl->get_core_op_metadata(network_group_name); CHECK_EXPECTED(core_op_metadata); + auto core_op_metadata_ptr = core_op_metadata.release(); - auto core_op_metadata_ptr = make_shared_nothrow(core_op_metadata.release()); - CHECK_AS_EXPECTED(nullptr != core_op_metadata_ptr, HAILO_OUT_OF_HOST_MEMORY); - - auto net_flow_ops = hef.pimpl->post_process_ops(core_op_metadata_ptr->core_op_name()); + auto metadata = hef.pimpl->network_group_metadata(core_op_metadata_ptr->core_op_name()); auto status = HAILO_UNINITIALIZED; auto single_context_app = HcpConfigCoreOp(*this, m_active_core_op_holder, net_group_config.release(), @@ -462,7 +453,7 @@ Expected EthernetDevice::create_networks_group_vec m_core_ops.push_back(core_op_ptr); core_ops_ptrs.push_back(core_op_ptr); - auto net_group_expected = ConfiguredNetworkGroupBase::create(config_params, std::move(core_ops_ptrs), std::move(net_flow_ops)); + auto net_group_expected = ConfiguredNetworkGroupBase::create(config_params, std::move(core_ops_ptrs), std::move(metadata)); CHECK_EXPECTED(net_group_expected); auto net_group_ptr = net_group_expected.release(); diff --git a/hailort/libhailort/src/eth/eth_device.hpp b/hailort/libhailort/src/eth/eth_device.hpp index e79f93b..fca41f1 100644 --- a/hailort/libhailort/src/eth/eth_device.hpp +++ b/hailort/libhailort/src/eth/eth_device.hpp @@ -56,7 +56,6 @@ public: static Expected> create(const hailo_eth_device_info_t &device_info); static Expected> create(const std::string &ip_addr); - virtual Expected get_architecture() const override; hailo_eth_device_info_t get_device_info() const; virtual const char* get_dev_id() const override; diff --git a/hailort/libhailort/src/eth/eth_stream.cpp b/hailort/libhailort/src/eth/eth_stream.cpp index 5f6919d..8b6cada 100644 --- a/hailort/libhailort/src/eth/eth_stream.cpp +++ b/hailort/libhailort/src/eth/eth_stream.cpp @@ -138,20 +138,19 @@ Expected EthernetInputStream::sync_write_raw_buffer(const MemoryView &bu return size; } -hailo_status EthernetInputStream::sync_write_all_raw_buffer_no_transform_impl(void *buffer, size_t offset, size_t size) +hailo_status EthernetInputStream::write_impl(const MemoryView &buffer) { hailo_status status = HAILO_UNINITIALIZED; - ASSERT(NULL != buffer); - - CHECK(size >= MIN_UDP_PAYLOAD_SIZE, HAILO_INVALID_ARGUMENT, "Input must be larger than {}", MIN_UDP_PAYLOAD_SIZE); - CHECK(((size % HailoRTCommon::HW_DATA_ALIGNMENT) == 0), HAILO_INVALID_ARGUMENT, - "Input must be aligned to {} (got {})", HailoRTCommon::HW_DATA_ALIGNMENT, size); + CHECK(buffer.size() >= MIN_UDP_PAYLOAD_SIZE, HAILO_INVALID_ARGUMENT, "Input must be larger than {}", MIN_UDP_PAYLOAD_SIZE); + CHECK(((buffer.size() % HailoRTCommon::HW_DATA_ALIGNMENT) == 0), HAILO_INVALID_ARGUMENT, + "Input must be aligned to {} (got {})", HailoRTCommon::HW_DATA_ALIGNMENT, buffer.size()); + const size_t offset = 0; if (this->configuration.is_sync_enabled) { - status = eth_stream__write_all_with_sync(buffer, offset, size); + status = eth_stream__write_all_with_sync(buffer.data(), offset, buffer.size()); } else { - status = eth_stream__write_all_no_sync(buffer, offset, size); + status = eth_stream__write_all_no_sync(buffer.data(), offset, buffer.size()); } if (HAILO_STREAM_ABORTED_BY_USER == status) { LOGGER__INFO("eth_stream__write_all was aborted!"); @@ -163,7 +162,7 @@ hailo_status EthernetInputStream::sync_write_all_raw_buffer_no_transform_impl(vo return HAILO_SUCCESS; } -hailo_status EthernetInputStream::eth_stream__write_all_no_sync(void *buffer, size_t offset, size_t size) { +hailo_status EthernetInputStream::eth_stream__write_all_no_sync(const void *buffer, size_t offset, size_t size) { size_t remainder_size = 0; size_t packet_size = this->configuration.max_payload_size; @@ -180,13 +179,13 @@ hailo_status EthernetInputStream::eth_stream__write_all_no_sync(void *buffer, si return eth_stream__write_with_remainder(buffer, offset, size, remainder_size); } -hailo_status EthernetInputStream::eth_stream__write_with_remainder(void *buffer, size_t offset, size_t size, size_t remainder_size) { +hailo_status EthernetInputStream::eth_stream__write_with_remainder(const void *buffer, size_t offset, size_t size, size_t remainder_size) { size_t transfer_size = 0; size_t offset_end_without_remainder = offset + size - remainder_size; while (offset < offset_end_without_remainder) { transfer_size = offset_end_without_remainder - offset; - auto expected_bytes_written = sync_write_raw_buffer(MemoryView(static_cast(buffer) + offset, transfer_size)); + auto expected_bytes_written = sync_write_raw_buffer(MemoryView::create_const(static_cast(buffer) + offset, transfer_size)); if (HAILO_STREAM_ABORTED_BY_USER == expected_bytes_written.status()) { LOGGER__INFO("sync_write_raw_buffer was aborted!"); return expected_bytes_written.status(); @@ -195,7 +194,7 @@ hailo_status EthernetInputStream::eth_stream__write_with_remainder(void *buffer, offset += expected_bytes_written.release(); } if (0 < remainder_size) { - auto expected_bytes_written = sync_write_raw_buffer(MemoryView(static_cast(buffer) + offset, remainder_size)); + auto expected_bytes_written = sync_write_raw_buffer(MemoryView::create_const(static_cast(buffer) + offset, remainder_size)); if (HAILO_STREAM_ABORTED_BY_USER == expected_bytes_written.status()) { LOGGER__INFO("sync_write_raw_buffer was aborted!"); return expected_bytes_written.status(); @@ -220,7 +219,7 @@ TokenBucketEthernetInputStream::TokenBucketEthernetInputStream(Device &device, U token_bucket() {} -hailo_status TokenBucketEthernetInputStream::eth_stream__write_with_remainder(void *buffer, size_t offset, size_t size, size_t remainder_size) { +hailo_status TokenBucketEthernetInputStream::eth_stream__write_with_remainder(const void *buffer, size_t offset, size_t size, size_t remainder_size) { size_t transfer_size = 0; size_t offset_end_without_remainder = offset + size - remainder_size; @@ -231,7 +230,7 @@ hailo_status TokenBucketEthernetInputStream::eth_stream__write_with_remainder(vo (void)token_bucket.consumeWithBorrowAndWait(MAX_CONSUME_SIZE, rate_bytes_per_sec, BURST_SIZE); transfer_size = offset_end_without_remainder - offset; - auto expected_bytes_written = sync_write_raw_buffer(MemoryView(static_cast(buffer) + offset, transfer_size)); + auto expected_bytes_written = sync_write_raw_buffer(MemoryView::create_const(static_cast(buffer) + offset, transfer_size)); if (HAILO_STREAM_ABORTED_BY_USER == expected_bytes_written.status()) { LOGGER__INFO("sync_write_raw_buffer was aborted!"); return expected_bytes_written.status(); @@ -244,7 +243,7 @@ hailo_status TokenBucketEthernetInputStream::eth_stream__write_with_remainder(vo // However, since remainder_size is modulo MAX_UDP_PAYLOAD_SIZE and BURST_SIZE == MAX_UDP_PAYLOAD_SIZE, it should be smaller. (void)token_bucket.consumeWithBorrowAndWait(static_cast(remainder_size), rate_bytes_per_sec, BURST_SIZE); - auto expected_bytes_written = sync_write_raw_buffer(MemoryView(static_cast(buffer) + offset, remainder_size)); + auto expected_bytes_written = sync_write_raw_buffer(MemoryView::create_const(static_cast(buffer) + offset, remainder_size)); if (HAILO_STREAM_ABORTED_BY_USER == expected_bytes_written.status()) { LOGGER__INFO("sync_write_raw_buffer was aborted!"); return expected_bytes_written.status(); @@ -296,7 +295,7 @@ TrafficControlEthernetInputStream::TrafficControlEthernetInputStream(Device &dev {} #endif -hailo_status EthernetInputStream::eth_stream__write_all_with_sync(void *buffer, size_t offset, size_t size) { +hailo_status EthernetInputStream::eth_stream__write_all_with_sync(const void *buffer, size_t offset, size_t size) { hailo_status status = HAILO_UNINITIALIZED; size_t number_of_frames = 0; size_t frame_size = m_stream_info.hw_frame_size; @@ -635,7 +634,7 @@ bool EthernetOutputStream::is_sync_packet(const void* buffer, size_t offset, siz ((hailo_output_sync_packet_t*)((uint8_t*)buffer + offset))->barker == BYTE_ORDER__ntohl(SYNC_PACKET_BARKER)); } -hailo_status EthernetOutputStream::read_all(MemoryView &buffer) +hailo_status EthernetOutputStream::read_impl(MemoryView &buffer) { if ((buffer.size() % HailoRTCommon::HW_DATA_ALIGNMENT) != 0) { LOGGER__ERROR("Size must be aligned to {} (got {})", HailoRTCommon::HW_DATA_ALIGNMENT, buffer.size()); @@ -649,7 +648,7 @@ hailo_status EthernetOutputStream::read_all(MemoryView &buffer) status = this->read_all_no_sync(buffer.data(), 0, buffer.size()); } if (HAILO_STREAM_ABORTED_BY_USER == status) { - LOGGER__INFO("read_all was aborted!"); + LOGGER__INFO("read was aborted!"); return status; } CHECK_SUCCESS(status); diff --git a/hailort/libhailort/src/eth/eth_stream.hpp b/hailort/libhailort/src/eth/eth_stream.hpp index 0b6c0a9..7702f83 100644 --- a/hailort/libhailort/src/eth/eth_stream.hpp +++ b/hailort/libhailort/src/eth/eth_stream.hpp @@ -54,15 +54,15 @@ private: Device &m_device; hailo_status eth_stream__config_input_sync_params(uint32_t frames_per_sync); - hailo_status eth_stream__write_all_no_sync(void *buffer, size_t offset, size_t size); - hailo_status eth_stream__write_all_with_sync(void *buffer, size_t offset, size_t size); + hailo_status eth_stream__write_all_no_sync(const void *buffer, size_t offset, size_t size); + hailo_status eth_stream__write_all_with_sync(const void *buffer, size_t offset, size_t size); hailo_status set_timeout(std::chrono::milliseconds timeout); void set_max_payload_size(uint16_t size); protected: - virtual hailo_status eth_stream__write_with_remainder(void *buffer, size_t offset, size_t size, size_t remainder_size); - virtual Expected sync_write_raw_buffer(const MemoryView &buffer) override; - virtual hailo_status sync_write_all_raw_buffer_no_transform_impl(void *buffer, size_t offset, size_t size) override; + virtual hailo_status eth_stream__write_with_remainder(const void *buffer, size_t offset, size_t size, size_t remainder_size); + Expected sync_write_raw_buffer(const MemoryView &buffer); + virtual hailo_status write_impl(const MemoryView &buffer) override; public: EthernetInputStream(Device &device, Udp &&udp, EventPtr &&core_op_activated_event, const LayerInfo &layer_info, hailo_status &status) : @@ -103,7 +103,7 @@ private: static const uint32_t MAX_CONSUME_SIZE = MAX_UDP_PAYLOAD_SIZE; protected: - virtual hailo_status eth_stream__write_with_remainder(void *buffer, size_t offset, size_t size, size_t remainder_size); + virtual hailo_status eth_stream__write_with_remainder(const void *buffer, size_t offset, size_t size, size_t remainder_size) override; public: TokenBucketEthernetInputStream(Device &device, Udp &&udp, EventPtr &&core_op_activated_event, @@ -140,7 +140,7 @@ private: Device &m_device; EthernetOutputStream(Device &device, const LayerInfo &edge_layer, Udp &&udp, EventPtr &&core_op_activated_event, hailo_status &status) : - OutputStreamBase(edge_layer, std::move(core_op_activated_event), status), + OutputStreamBase(edge_layer, HAILO_STREAM_INTERFACE_ETH, std::move(core_op_activated_event), status), leftover_buffer(), leftover_size(0), // Firmware starts sending sync sequence from 0, so treating the first previous as max value (that will be overflowed to 0) @@ -151,7 +151,7 @@ private: m_device(device) {} - hailo_status read_all(MemoryView &buffer) override; + hailo_status read_impl(MemoryView &buffer) override; hailo_status read_all_with_sync(void *buffer, size_t offset, size_t size); hailo_status read_all_no_sync(void *buffer, size_t offset, size_t size); @@ -166,7 +166,7 @@ private: public: virtual ~EthernetOutputStream(); - virtual Expected sync_read_raw_buffer(MemoryView &buffer); + Expected sync_read_raw_buffer(MemoryView &buffer); static Expected> create(Device &device, const LayerInfo &edge_layer, const hailo_eth_output_stream_params_t ¶ms, EventPtr core_op_activated_event); diff --git a/hailort/libhailort/src/eth/hcp_config_core_op.cpp b/hailort/libhailort/src/eth/hcp_config_core_op.cpp index eb6d8bc..39ad039 100644 --- a/hailort/libhailort/src/eth/hcp_config_core_op.cpp +++ b/hailort/libhailort/src/eth/hcp_config_core_op.cpp @@ -80,6 +80,12 @@ Expected HcpConfigCoreOp::get_boundary_vdma_channel_by return make_unexpected(HAILO_INVALID_OPERATION); } +Expected HcpConfigCoreOp::run_hw_infer_estimator() +{ + LOGGER__ERROR("run_hw_infer_estimator function is not supported on ETH core-ops"); + return make_unexpected(HAILO_INVALID_OPERATION); +} + hailo_status HcpConfigCoreOp::activate_impl(uint16_t dynamic_batch_size, bool resume_pending_stream_transfers) { m_active_core_op_holder.set(*this); diff --git a/hailort/libhailort/src/eth/hcp_config_core_op.hpp b/hailort/libhailort/src/eth/hcp_config_core_op.hpp index 710d98c..9ef18bd 100644 --- a/hailort/libhailort/src/eth/hcp_config_core_op.hpp +++ b/hailort/libhailort/src/eth/hcp_config_core_op.hpp @@ -50,6 +50,7 @@ public: virtual hailo_status activate_impl(uint16_t dynamic_batch_size, bool resume_pending_stream_transfers) override; virtual hailo_status deactivate_impl(bool keep_nn_config_during_reset) override; + virtual Expected run_hw_infer_estimator() override; virtual ~HcpConfigCoreOp() = default; HcpConfigCoreOp(const HcpConfigCoreOp &other) = delete; diff --git a/hailort/libhailort/src/eth/network_rate_calculator.cpp b/hailort/libhailort/src/eth/network_rate_calculator.cpp index 0578d67..5a2c450 100644 --- a/hailort/libhailort/src/eth/network_rate_calculator.cpp +++ b/hailort/libhailort/src/eth/network_rate_calculator.cpp @@ -12,6 +12,7 @@ #include "hailo/network_rate_calculator.hpp" #include "common/utils.hpp" +#include "common/ethernet_utils.hpp" #include "eth/eth_stream.hpp" @@ -148,4 +149,42 @@ Expected> NetworkUdpRateCalculator::get_udp_ports_r return results; } +hailo_status NetworkUdpRateCalculator::set_rate_limit(const std::string &ip, uint16_t port, uint32_t rate_bytes_per_sec) +{ +#if defined(__GNUC__) + auto tc = TrafficControlUtil::create(ip, port, rate_bytes_per_sec); + CHECK_EXPECTED_AS_STATUS(tc); + CHECK_SUCCESS(tc->set_rate_limit()); + + return HAILO_SUCCESS; +#else + (void)ip; + (void)port; + (void)rate_bytes_per_sec; + LOGGER__ERROR("set_rate_limit is only supported on Unix platforms"); + return HAILO_NOT_IMPLEMENTED; +#endif +} + +hailo_status NetworkUdpRateCalculator::reset_rate_limit(const std::string &ip, uint16_t port) +{ +#if defined(__GNUC__) + auto tc = TrafficControlUtil::create(ip, port, 0); + CHECK_EXPECTED_AS_STATUS(tc); + CHECK_SUCCESS(tc->reset_rate_limit()); + + return HAILO_SUCCESS; +#else + (void)ip; + (void)port; + LOGGER__ERROR("reset_rate_limit is only supported on Unix platforms"); + return HAILO_NOT_IMPLEMENTED; +#endif +} + +Expected NetworkUdpRateCalculator::get_interface_name(const std::string &ip) +{ + return EthernetUtils::get_interface_from_board_ip(ip); +} + } /* namespace hailort */ diff --git a/hailort/libhailort/src/hailort.cpp b/hailort/libhailort/src/hailort.cpp index e065116..8704bed 100644 --- a/hailort/libhailort/src/hailort.cpp +++ b/hailort/libhailort/src/hailort.cpp @@ -34,12 +34,36 @@ #include "utils/shared_resource_manager.hpp" #include "vdevice/vdevice_internal.hpp" #include "utils/profiler/tracer_macros.hpp" +#include "utils/exported_resource_manager.hpp" #include +#include using namespace hailort; +// Note: Async stream API uses BufferPtr as a param. When exporting BufferPtrs to the user via c-api, they must be +// stored in some container, otherwise their ref count may reach zero and they will be freed, despite the +// c-api user still using them. (shared_ptr doesn't have a release method like unique_ptr) +// Singleton holding a mapping between the address of a buffer allocated/mapped via hailo_allocate_buffer/hailo_dma_map_buffer_to_device +// to the underlying BufferPtr. When a buffer is freed via hailo_free_buffer, the BufferPtr object will be removed from +// the storage. +using ExportedBufferManager = ExportedResourceManager; + +struct ThreeTupleHash { + template + std::size_t operator()(const T& tuple) const { + auto hash = std::hash::type>()(std::get<0>(tuple)); + hash ^= std::hash::type>()(std::get<1>(tuple)); + hash ^= std::hash::type>()(std::get<2>(tuple)); + return hash; + } +}; + +// (buffer_addr, device_id, mapping_direction) +using DmaMappingKey = std::tuple; +using DmaMappingManager = ExportedResourceManager; + COMPAT__INITIALIZER(hailort__initialize_logger) { // Init logger singleton if compiling only HailoRT @@ -203,7 +227,7 @@ hailo_status hailo_create_pcie_device(hailo_pcie_device_info_t *device_info, hai { CHECK_ARG_NOT_NULL(device_out); - auto device = (device_info == nullptr) ? PcieDevice::create() : PcieDevice::create(*device_info); + auto device = (device_info == nullptr) ? Device::create_pcie() : Device::create_pcie(*device_info); CHECK_EXPECTED_AS_STATUS(device, "Failed creating pcie device"); *device_out = reinterpret_cast(device.release().release()); @@ -574,6 +598,71 @@ hailo_status hailo_init_configure_params(hailo_hef hef, hailo_stream_interface_t return HAILO_SUCCESS; } +void fill_cfg_params_struct_by_class(const std::string &network_group_name, const ConfigureNetworkParams &class_in, hailo_configure_network_group_params_t *struct_out) +{ + strncpy(struct_out->name, network_group_name.c_str(), network_group_name.size() + 1); + struct_out->batch_size = class_in.batch_size; + struct_out->power_mode = class_in.power_mode; + struct_out->latency = class_in.latency; + + int i = 0; + for (auto & pair: class_in.network_params_by_name) { + strncpy(struct_out->network_params_by_name[i].name, pair.first.c_str(), pair.first.length() + 1); + struct_out->network_params_by_name[i].network_params = pair.second; + i++; + } + struct_out->network_params_by_name_count = class_in.network_params_by_name.size(); + + i = 0; + for (auto & pair: class_in.stream_params_by_name) { + strncpy(struct_out->stream_params_by_name[i].name, pair.first.c_str(), pair.first.length() + 1); + struct_out->stream_params_by_name[i].stream_params = pair.second; + i++; + } + struct_out->stream_params_by_name_count = class_in.stream_params_by_name.size(); +} + +hailo_status hailo_init_configure_params_by_vdevice(hailo_hef hef, hailo_vdevice vdevice, + hailo_configure_params_t *params) +{ + CHECK_ARG_NOT_NULL(hef); + CHECK_ARG_NOT_NULL(vdevice); + CHECK_ARG_NOT_NULL(params); + + auto configure_params = (reinterpret_cast(vdevice))->create_configure_params(*reinterpret_cast(hef)); + CHECK_EXPECTED_AS_STATUS(configure_params); + + params->network_group_params_count = configure_params->size(); + uint8_t net_group = 0; + for (auto &cfg_params : configure_params.value()) { + fill_cfg_params_struct_by_class(cfg_params.first, cfg_params.second, &(params->network_group_params[net_group])); + net_group++; + } + + return HAILO_SUCCESS; +} + +hailo_status hailo_init_configure_params_by_device(hailo_hef hef, hailo_device device, + hailo_configure_params_t *params) +{ + CHECK_ARG_NOT_NULL(hef); + CHECK_ARG_NOT_NULL(device); + CHECK_ARG_NOT_NULL(params); + + auto configure_params = (reinterpret_cast(device))->create_configure_params(*reinterpret_cast(hef)); + CHECK_EXPECTED_AS_STATUS(configure_params); + + params->network_group_params_count = configure_params->size(); + uint8_t net_group = 0; + for (auto &cfg_params : configure_params.value()) { + fill_cfg_params_struct_by_class(cfg_params.first, cfg_params.second, &(params->network_group_params[net_group])); + net_group++; + } + + return HAILO_SUCCESS; +} + + hailo_status hailo_init_configure_params_mipi_input(hailo_hef hef, hailo_stream_interface_t output_interface, hailo_mipi_input_stream_params_t *mipi_params, hailo_configure_params_t *params) { @@ -1008,6 +1097,106 @@ hailo_status hailo_set_scheduler_priority(hailo_configured_network_group configu return (reinterpret_cast(configured_network_group))->set_scheduler_priority(priority, network_name_str); } +hailo_status hailo_allocate_buffer(size_t size, const hailo_buffer_parameters_t *allocation_params, void **buffer_out) +{ + CHECK_ARG_NOT_NULL(allocation_params); + CHECK_ARG_NOT_NULL(buffer_out); + CHECK(0 != size, HAILO_INVALID_ARGUMENT, "Buffer size must be greater than zero"); + + auto buffer_storage_params = BufferStorageParams::create(*allocation_params); + CHECK_EXPECTED_AS_STATUS(buffer_storage_params); + + // Create buffer + auto buffer = Buffer::create_shared(size, *buffer_storage_params); + CHECK_EXPECTED_AS_STATUS(buffer); + + // Store the buffer in manager (otherwise it'll be freed at the end of this func) + const auto status = ExportedBufferManager::register_resource(*buffer, buffer->get()->data()); + CHECK_SUCCESS(status); + + *buffer_out = buffer->get()->data(); + + return HAILO_SUCCESS; +} + +hailo_status hailo_free_buffer(void *buffer) +{ + CHECK_ARG_NOT_NULL(buffer); + return ExportedBufferManager::unregister_resource(buffer); +} + +static Expected get_mapping_key(void *buffer, hailo_device device, hailo_dma_buffer_direction_t direction) +{ + hailo_device_id_t device_id{}; + auto status = hailo_get_device_id(device, &device_id); + CHECK_SUCCESS_AS_EXPECTED(status); + + return std::make_tuple(buffer, std::string(device_id.id), direction); +} + +// TODO: hailo_dma_map_buffer_to_device/hailo_dma_unmap_buffer_from_device aren't thread safe when crossed with +// hailo_allocate_buffer/hailo_free_buffer (HRT-10669) +hailo_status hailo_dma_map_buffer_to_device(void *buffer, size_t size, hailo_device device, hailo_dma_buffer_direction_t direction) +{ + CHECK_ARG_NOT_NULL(buffer); + CHECK_ARG_NOT_NULL(device); + + auto hailort_allocated_buffer = ExportedBufferManager::get_resource(buffer); + if (hailort_allocated_buffer) { + // TODO: this will change here HRT-10983 + // The buffer has been allocated by hailort + // The mapping is held by the Buffer object + auto mapping_result = hailort_allocated_buffer->get()->storage().dma_map(*reinterpret_cast(device), direction); + CHECK_EXPECTED_AS_STATUS(mapping_result); + const auto new_mapping = mapping_result.value(); + + if (!new_mapping) { + return HAILO_DMA_MAPPING_ALREADY_EXISTS; + } + } else { + // The buffer has been allocated by the user + // Create dma storage + auto dma_mapped_buffer = DmaStorage::create_from_user_address(buffer, size, direction, *reinterpret_cast(device)); + CHECK_EXPECTED_AS_STATUS(dma_mapped_buffer); + assert(buffer == dma_mapped_buffer.value()->user_address()); + auto dma_mapped_buffer_ptr = dma_mapped_buffer.release(); + + // Store the mapping in manager (otherwise it'll be freed at the end of this func) + auto key = get_mapping_key(dma_mapped_buffer_ptr->user_address(), device, direction); + CHECK_EXPECTED_AS_STATUS(key); + const auto status = DmaMappingManager::register_resource(dma_mapped_buffer_ptr, key.release()); + if (HAILO_INVALID_ARGUMENT == status) { + // TODO: This will change once we allow mapping the same buffer in different directions (HRT-10656). + // Checking that the mapping exists will need to be at DmaStorage's level + return HAILO_DMA_MAPPING_ALREADY_EXISTS; + } + CHECK_SUCCESS(status); + } + + return HAILO_SUCCESS; +} + +hailo_status hailo_dma_unmap_buffer_from_device(void *buffer, hailo_device device, hailo_dma_buffer_direction_t direction) +{ + // TODO: support mapping the same buffer in different directions (HRT-10656) + (void)direction; + + CHECK_ARG_NOT_NULL(buffer); + CHECK_ARG_NOT_NULL(device); + + auto hailort_allocated_buffer = ExportedBufferManager::get_resource(buffer); + if (hailort_allocated_buffer) { + // TODO: mappings get dtor'd when the Buffer object is dtor'd. + // We want all the mapping to be held in one place for hailort::Buffers and for user alloacted buffers + // so this will change (HRT-10983) + return HAILO_SUCCESS; + } + + auto key = get_mapping_key(buffer, device, direction); + CHECK_EXPECTED_AS_STATUS(key); + return DmaMappingManager::unregister_resource(key.release()); +} + hailo_status hailo_calculate_eth_input_rate_limits(hailo_hef hef, const char *network_group_name, uint32_t fps, hailo_rate_limit_t *rates, size_t *rates_length) { @@ -1106,6 +1295,114 @@ hailo_status hailo_stream_write_raw_buffer(hailo_input_stream stream, const void return HAILO_SUCCESS; } +hailo_status hailo_stream_wait_for_async_output_ready(hailo_output_stream stream, size_t transfer_size, uint32_t timeout_ms) +{ + CHECK_ARG_NOT_NULL(stream); + return (reinterpret_cast(stream))->wait_for_async_ready(transfer_size, std::chrono::milliseconds(timeout_ms)); +} + +hailo_status hailo_stream_wait_for_async_input_ready(hailo_input_stream stream, size_t transfer_size, uint32_t timeout_ms) +{ + CHECK_ARG_NOT_NULL(stream); + return (reinterpret_cast(stream))->wait_for_async_ready(transfer_size, std::chrono::milliseconds(timeout_ms)); +} + +hailo_status hailo_output_stream_get_async_max_queue_size(hailo_output_stream stream, size_t *queue_size) +{ + CHECK_ARG_NOT_NULL(stream); + CHECK_ARG_NOT_NULL(queue_size); + + auto local_queue_size = reinterpret_cast(stream)->get_async_max_queue_size(); + CHECK_EXPECTED_AS_STATUS(local_queue_size); + *queue_size = local_queue_size.release(); + + return HAILO_SUCCESS; +} + +hailo_status hailo_input_stream_get_async_max_queue_size(hailo_input_stream stream, size_t *queue_size) +{ + CHECK_ARG_NOT_NULL(stream); + CHECK_ARG_NOT_NULL(queue_size); + + auto local_queue_size = reinterpret_cast(stream)->get_async_max_queue_size(); + CHECK_EXPECTED_AS_STATUS(local_queue_size); + *queue_size = local_queue_size.release(); + + return HAILO_SUCCESS; +} + +static InputStream::TransferDoneCallback wrap_c_user_callback(hailo_stream_write_async_callback_t callback, void *opaque) +{ + return [callback, opaque](const InputStream::CompletionInfo &completion_info) { + hailo_stream_write_async_completion_info_t c_completion_info{}; + c_completion_info.status = completion_info.status; + c_completion_info.buffer_addr = completion_info.buffer_addr; + c_completion_info.buffer_size = completion_info.buffer_size; + c_completion_info.opaque = opaque; + callback(&c_completion_info); + }; +} + +static OutputStream::TransferDoneCallback wrap_c_user_callback(hailo_stream_read_async_callback_t callback, void *opaque) +{ + return [callback, opaque](const OutputStream::CompletionInfo &completion_info) { + hailo_stream_read_async_completion_info_t c_completion_info{}; + c_completion_info.status = completion_info.status; + c_completion_info.buffer_addr = completion_info.buffer_addr; + c_completion_info.buffer_size = completion_info.buffer_size; + c_completion_info.opaque = opaque; + callback(&c_completion_info); + }; +} + +hailo_status hailo_stream_read_raw_buffer_async(hailo_output_stream stream, void *buffer, size_t size, + hailo_stream_read_async_callback_t callback, void *opaque) +{ + CHECK_ARG_NOT_NULL(stream); + CHECK_ARG_NOT_NULL(buffer); + CHECK_ARG_NOT_NULL(callback); + + auto buffer_ref = ExportedBufferManager::get_resource(buffer); + if (HAILO_NOT_FOUND == buffer_ref.status()) { + // User addr (buffer hasn't been allocated by hailo_allocate_buffer) + return (reinterpret_cast(stream))->read_async(buffer, size, + wrap_c_user_callback(callback, opaque)); + } + + // buffer has been allocated by hailo_allocate_buffer + CHECK_EXPECTED_AS_STATUS(buffer_ref); + auto buffer_ptr = buffer_ref->get(); + assert(buffer_ptr != nullptr); + CHECK(size == buffer_ptr->size(), HAILO_INVALID_ARGUMENT); + + return (reinterpret_cast(stream))->read_async(buffer_ptr, + wrap_c_user_callback(callback, opaque)); +} + +hailo_status hailo_stream_write_raw_buffer_async(hailo_input_stream stream, const void *buffer, size_t size, + hailo_stream_write_async_callback_t callback, void *opaque) +{ + CHECK_ARG_NOT_NULL(stream); + CHECK_ARG_NOT_NULL(buffer); + CHECK_ARG_NOT_NULL(callback); + + auto buffer_ref = ExportedBufferManager::get_resource(const_cast(buffer)); + if (HAILO_NOT_FOUND == buffer_ref.status()) { + // User addr (buffer hasn't been allocated by hailo_allocate_buffer) + return (reinterpret_cast(stream))->write_async(buffer, size, + wrap_c_user_callback(callback, opaque)); + } + + // buffer has been allocated by hailo_allocate_buffer + CHECK_EXPECTED_AS_STATUS(buffer_ref); + auto buffer_ptr = buffer_ref->get(); + assert(buffer_ptr != nullptr); + CHECK(size == buffer_ptr->size(), HAILO_INVALID_ARGUMENT); + + return (reinterpret_cast(stream))->write_async(buffer_ptr, + wrap_c_user_callback(callback, opaque)); +} + hailo_status hailo_fuse_nms_frames(const hailo_nms_fuse_input_t *nms_fuse_inputs, uint32_t inputs_count, uint8_t *fused_buffer, size_t fused_buffer_size) { @@ -1328,6 +1625,25 @@ hailo_status hailo_demux_raw_frame_by_output_demuxer(hailo_output_demuxer demuxe return HAILO_SUCCESS; } +hailo_status hailo_demux_by_name_raw_frame_by_output_demuxer(hailo_output_demuxer demuxer, const void *src, + size_t src_size, hailo_stream_raw_buffer_by_name_t *raw_buffers_by_name, size_t raw_buffers_count) +{ + CHECK_ARG_NOT_NULL(src); + CHECK_ARG_NOT_NULL(raw_buffers_by_name); + CHECK_ARG_NOT_NULL(demuxer); + + std::map raw_buffers_map; + for (size_t i = 0; i < raw_buffers_count; i++) { + raw_buffers_map.emplace(std::string(raw_buffers_by_name[i].name), + MemoryView(raw_buffers_by_name[i].raw_buffer.buffer, raw_buffers_by_name[i].raw_buffer.size)); + } + auto src_memview = MemoryView::create_const(src, src_size); + auto status = reinterpret_cast(demuxer)->transform_demux(src_memview, raw_buffers_map); + CHECK_SUCCESS(status); + + return HAILO_SUCCESS; +} + hailo_status hailo_get_mux_infos_by_output_demuxer(hailo_output_demuxer demuxer, hailo_stream_info_t *stream_infos, size_t *number_of_streams) { diff --git a/hailort/libhailort/src/hailort_defaults.cpp b/hailort/libhailort/src/hailort_defaults.cpp index c845d3e..527b95e 100644 --- a/hailort/libhailort/src/hailort_defaults.cpp +++ b/hailort/libhailort/src/hailort_defaults.cpp @@ -361,6 +361,14 @@ std::string HailoRTDefaults::get_network_name(const std::string &net_group_name) hailo_format_t HailoRTDefaults::expand_auto_format(const hailo_format_t &host_format, const hailo_format_t &hw_format) { + if (HAILO_FORMAT_ORDER_HAILO_NMS == hw_format.order) { + assert(HAILO_FORMAT_TYPE_UINT16 == hw_format.type); + // TODO (HRT-11082): On NMS, change meaning of auto to float + if (HAILO_FORMAT_TYPE_AUTO == host_format.type) { + LOGGER__WARNING("Received 'HAILO_FORMAT_TYPE_AUTO' for NMS output, which is currently translated as HAILO_FORMAT_TYPE_UINT16. "\ + "Starting HailoRT version 4.15, this will change to HAILO_FORMAT_TYPE_FLOAT32"); + } + } auto host_format_copy = host_format; if (HAILO_FORMAT_TYPE_AUTO == host_format_copy.type) { host_format_copy.type = hw_format.type; diff --git a/hailort/libhailort/src/hef/context_switch_actions.cpp b/hailort/libhailort/src/hef/context_switch_actions.cpp index eda6184..0279ef4 100644 --- a/hailort/libhailort/src/hef/context_switch_actions.cpp +++ b/hailort/libhailort/src/hef/context_switch_actions.cpp @@ -16,7 +16,6 @@ namespace hailort { - static uint8_t pack_vdma_channel_id(const vdma::ChannelId &channel_id) { return static_cast(channel_id.channel_index | @@ -83,7 +82,7 @@ Expected ContextSwitchConfigAction::serialize_header() const Expected NoneAction::create() { auto result = ContextSwitchConfigActionPtr(new (std::nothrow) NoneAction()); - CHECK_AS_EXPECTED((nullptr != result), HAILO_OUT_OF_HOST_MEMORY); + CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY); return result; } @@ -113,7 +112,7 @@ Expected ActivateConfigChannelAction::create(uint8 { auto result = ContextSwitchConfigActionPtr(new (std::nothrow) ActivateConfigChannelAction(config_stream_index, channel_id, host_buffer_info)); - CHECK_AS_EXPECTED((nullptr != result), HAILO_OUT_OF_HOST_MEMORY); + CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY); return result; } @@ -146,7 +145,7 @@ Expected DeactivateConfigChannelAction::create(uin { auto result = ContextSwitchConfigActionPtr(new (std::nothrow) DeactivateConfigChannelAction(config_stream_index, channel_id)); - CHECK_AS_EXPECTED((nullptr != result), HAILO_OUT_OF_HOST_MEMORY); + CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY); return result; } @@ -179,7 +178,7 @@ Expected WriteDataCcwAction::create( "Too many ccw burst {} (must fit in uint16)", total_ccw_burst); auto result = ContextSwitchConfigActionPtr(new (std::nothrow) WriteDataCcwAction( std::move(data), config_stream_index, static_cast(total_ccw_burst))); - CHECK_AS_EXPECTED((nullptr != result), HAILO_OUT_OF_HOST_MEMORY); + CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY); return result; } @@ -211,7 +210,7 @@ Expected WriteDataCcwAction::serialize_params(const ContextResources &) Expected AddCcwBurstAction::create(uint8_t config_stream_index, uint16_t ccw_bursts) { auto result = ContextSwitchConfigActionPtr(new (std::nothrow) AddCcwBurstAction(config_stream_index, ccw_bursts)); - CHECK_AS_EXPECTED((nullptr != result), HAILO_OUT_OF_HOST_MEMORY); + CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY); return result; } @@ -241,7 +240,7 @@ Expected FetchCfgChannelDescriptorsAction::create( "On cfg with continuous mode, max descriptors size must fit in uint16_t"); auto result = ContextSwitchConfigActionPtr(new (std::nothrow) FetchCfgChannelDescriptorsAction(channel_id, static_cast(desc_count))); - CHECK_AS_EXPECTED((nullptr != result), HAILO_OUT_OF_HOST_MEMORY); + CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY); return result; } @@ -267,7 +266,7 @@ Expected FetchCfgChannelDescriptorsAction::serialize_params(const Contex Expected StartBurstCreditsTaskAction::create() { auto result = ContextSwitchConfigActionPtr(new (std::nothrow) StartBurstCreditsTaskAction()); - CHECK_AS_EXPECTED((nullptr != result), HAILO_OUT_OF_HOST_MEMORY); + CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY); return result; } @@ -290,7 +289,7 @@ Expected StartBurstCreditsTaskAction::serialize_params(const ContextReso Expected WaitForNetworkGroupChangeAction::create() { auto result = ContextSwitchConfigActionPtr(new (std::nothrow) WaitForNetworkGroupChangeAction()); - CHECK_AS_EXPECTED((nullptr != result), HAILO_OUT_OF_HOST_MEMORY); + CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY); return result; } @@ -323,7 +322,7 @@ Expected RepeatedAction::create( "Invalid repeated sub-action type (can't have sub-action with type CONTEXT_SWITCH_DEFS__ACTION_TYPE_COUNT)"); auto result = ContextSwitchConfigActionPtr(new (std::nothrow) RepeatedAction(std::move(actions))); - CHECK_AS_EXPECTED((nullptr != result), HAILO_OUT_OF_HOST_MEMORY); + CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY); return result; } @@ -371,7 +370,7 @@ Expected> RepeatedAction::serialize(const ContextResources & Expected DisableLcuAction::create(uint8_t cluster_index, uint8_t lcu_index) { auto result = ContextSwitchConfigActionPtr(new (std::nothrow) DisableLcuAction(cluster_index, lcu_index)); - CHECK_AS_EXPECTED((nullptr != result), HAILO_OUT_OF_HOST_MEMORY); + CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY); return result; } @@ -396,7 +395,7 @@ Expected DisableLcuAction::serialize_params(const ContextResources &) co Expected WaitForLcuAction::create(uint8_t cluster_index, uint8_t lcu_index) { auto result = ContextSwitchConfigActionPtr(new (std::nothrow) WaitForLcuAction(cluster_index, lcu_index)); - CHECK_AS_EXPECTED((nullptr != result), HAILO_OUT_OF_HOST_MEMORY); + CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY); return result; } @@ -426,7 +425,7 @@ Expected EnableLcuAction::create(uint8_t cluster_i (CONTEXT_SWITCH_DEFS__ENABLE_LCU_DEFAULT_KERNEL_COUNT == kernel_done_count); auto result = ContextSwitchConfigActionPtr(new (std::nothrow) EnableLcuAction(cluster_index, lcu_index, network_index, kernel_done_address, kernel_done_count, is_default)); - CHECK_AS_EXPECTED((nullptr != result), HAILO_OUT_OF_HOST_MEMORY); + CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY); return result; } @@ -481,7 +480,7 @@ Expected EnableSequencerAction::create(uint8_t clu { auto result = ContextSwitchConfigActionPtr(new (std::nothrow) EnableSequencerAction(cluster_index, initial_l3_cut, initial_l3_offset, active_apu, active_ia, active_sc, active_l2, l2_offset_0, l2_offset_1)); - CHECK_AS_EXPECTED((nullptr != result), HAILO_OUT_OF_HOST_MEMORY); + CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY); return result; } @@ -523,7 +522,7 @@ Expected EnableSequencerAction::serialize_params(const ContextResources Expected WaitForSequencerAction::create(uint8_t cluster_index) { auto result = ContextSwitchConfigActionPtr(new (std::nothrow) WaitForSequencerAction(cluster_index)); - CHECK_AS_EXPECTED((nullptr != result), HAILO_OUT_OF_HOST_MEMORY); + CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY); return result; } @@ -548,7 +547,7 @@ Expected WaitForSequencerAction::serialize_params(const ContextResources Expected AllowInputDataflowAction::create(uint8_t stream_index) { auto result = ContextSwitchConfigActionPtr(new (std::nothrow) AllowInputDataflowAction(stream_index)); - CHECK_AS_EXPECTED((nullptr != result), HAILO_OUT_OF_HOST_MEMORY); + CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY); return result; } @@ -571,7 +570,8 @@ bool AllowInputDataflowAction::supports_repeated_block() const Expected AllowInputDataflowAction::serialize_params(const ContextResources &context_resources) const { - const auto edge_layer = context_resources.get_edge_layer_by_stream_index(m_stream_index); + // H2D direction because it is Input actions + const auto edge_layer = context_resources.get_edge_layer_by_stream_index(m_stream_index, HAILO_H2D_STREAM); CHECK_EXPECTED(edge_layer); CONTEXT_SWITCH_DEFS__fetch_data_action_data_t params{}; @@ -602,7 +602,7 @@ Expected AllowInputDataflowAction::serialize_params(const ContextResourc Expected WaitForModuleConfigDoneAction::create(uint8_t module_index) { auto result = ContextSwitchConfigActionPtr(new (std::nothrow) WaitForModuleConfigDoneAction(module_index)); - CHECK_AS_EXPECTED((nullptr != result), HAILO_OUT_OF_HOST_MEMORY); + CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY); return result; } @@ -629,7 +629,7 @@ Expected DdrPairInfoAction::create(const vdma::Cha { auto result = ContextSwitchConfigActionPtr(new (std::nothrow) DdrPairInfoAction( h2d_channel_id, d2h_channel_id, network_index, descriptors_per_frame, descs_count)); - CHECK_AS_EXPECTED((nullptr != result), HAILO_OUT_OF_HOST_MEMORY); + CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY); return result; } @@ -662,7 +662,7 @@ Expected DdrPairInfoAction::serialize_params(const ContextResources &) c Expected StartDdrBufferingTaskAction::create() { auto result = ContextSwitchConfigActionPtr(new (std::nothrow) StartDdrBufferingTaskAction()); - CHECK_AS_EXPECTED((nullptr != result), HAILO_OUT_OF_HOST_MEMORY); + CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY); return result; } @@ -685,7 +685,7 @@ Expected StartDdrBufferingTaskAction::serialize_params(const ContextReso Expected ResetDdrBufferingTaskAction::create() { auto result = ContextSwitchConfigActionPtr(new (std::nothrow) ResetDdrBufferingTaskAction()); - CHECK_AS_EXPECTED((nullptr != result), HAILO_OUT_OF_HOST_MEMORY); + CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY); return result; } @@ -710,7 +710,7 @@ Expected ChangeVdmaToStreamMapping::create(const v { auto result = ContextSwitchConfigActionPtr(new (std::nothrow) ChangeVdmaToStreamMapping(channel_id, stream_index, is_dummy_stream)); - CHECK_AS_EXPECTED((nullptr != result), HAILO_OUT_OF_HOST_MEMORY); + CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY); return result; } @@ -740,7 +740,7 @@ Expected ChangeVdmaToStreamMapping::serialize_params(const ContextResour Expected WaitOutputTransferDoneAction::create(uint8_t stream_index) { auto result = ContextSwitchConfigActionPtr(new (std::nothrow) WaitOutputTransferDoneAction(stream_index)); - CHECK_AS_EXPECTED((nullptr != result), HAILO_OUT_OF_HOST_MEMORY); + CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY); return result; } @@ -757,7 +757,8 @@ bool WaitOutputTransferDoneAction::supports_repeated_block() const Expected WaitOutputTransferDoneAction::serialize_params(const ContextResources &context_resources) const { - const auto edge_layer = context_resources.get_edge_layer_by_stream_index(m_stream_index); + // D2H direction because it is output action + const auto edge_layer = context_resources.get_edge_layer_by_stream_index(m_stream_index, HAILO_D2H_STREAM); CHECK_EXPECTED(edge_layer); CONTEXT_SWITCH_DEFS__vdma_dataflow_interrupt_data_t params{}; @@ -770,7 +771,7 @@ Expected OpenBoundaryInputChannelAction::create(co { auto result = ContextSwitchConfigActionPtr(new (std::nothrow) OpenBoundaryInputChannelAction(channel_id, host_buffer_info)); - CHECK_AS_EXPECTED((nullptr != result), HAILO_OUT_OF_HOST_MEMORY); + CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY); return result; } @@ -801,7 +802,7 @@ Expected OpenBoundaryOutputChannelAction::create(c { auto result = ContextSwitchConfigActionPtr(new (std::nothrow) OpenBoundaryOutputChannelAction(channel_id, host_buffer_info)); - CHECK_AS_EXPECTED((nullptr != result), HAILO_OUT_OF_HOST_MEMORY); + CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY); return result; } @@ -847,7 +848,7 @@ Expected ActivateBoundaryInputChannelAction::creat { auto result = ContextSwitchConfigActionPtr(new (std::nothrow) ActivateBoundaryInputChannelAction(channel_id, stream_index, nn_stream_config, host_buffer_info, initial_credit_size)); - CHECK_AS_EXPECTED((nullptr != result), HAILO_OUT_OF_HOST_MEMORY); + CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY); return result; } @@ -886,7 +887,7 @@ Expected ActivateBoundaryOutputChannelAction::crea { auto result = ContextSwitchConfigActionPtr(new (std::nothrow) ActivateBoundaryOutputChannelAction(channel_id, stream_index, nn_stream_config, host_buffer_info)); - CHECK_AS_EXPECTED((nullptr != result), HAILO_OUT_OF_HOST_MEMORY); + CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY); return result; } @@ -923,7 +924,7 @@ Expected ActivateInterContextInputChannelAction::c { auto result = ContextSwitchConfigActionPtr(new (std::nothrow) ActivateInterContextInputChannelAction(channel_id, stream_index, nn_stream_config, host_buffer_info, initial_credit_size)); - CHECK_AS_EXPECTED((nullptr != result), HAILO_OUT_OF_HOST_MEMORY); + CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY); return result; } @@ -962,7 +963,7 @@ Expected ActivateInterContextOutputChannelAction:: { auto result = ContextSwitchConfigActionPtr(new (std::nothrow) ActivateInterContextOutputChannelAction(channel_id, stream_index, network_index, nn_stream_config, host_buffer_info)); - CHECK_AS_EXPECTED((nullptr != result), HAILO_OUT_OF_HOST_MEMORY); + CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY); return result; } @@ -1002,7 +1003,7 @@ Expected ActivateDdrInputChannelAction::create(con { auto result = ContextSwitchConfigActionPtr(new (std::nothrow) ActivateDdrInputChannelAction(channel_id, stream_index, nn_stream_config, host_buffer_info, initial_credit_size, connected_d2h_channel_id)); - CHECK_AS_EXPECTED((nullptr != result), HAILO_OUT_OF_HOST_MEMORY); + CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY); return result; } @@ -1044,7 +1045,7 @@ Expected ActivateDdrOutputChannelAction::create(co { auto result = ContextSwitchConfigActionPtr(new (std::nothrow) ActivateDdrOutputChannelAction(channel_id, stream_index, nn_stream_config, host_buffer_info, buffered_rows_count)); - CHECK_AS_EXPECTED((nullptr != result), HAILO_OUT_OF_HOST_MEMORY); + CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY); return result; } @@ -1084,7 +1085,7 @@ Expected ValidateChannelAction::create(const EdgeL edge_layer.layer_info.direction, is_inter_context, static_cast(edge_layer.buffer_info.buffer_type), edge_layer.layer_info.max_shmifo_size)); - CHECK_AS_EXPECTED((nullptr != result), HAILO_OUT_OF_HOST_MEMORY); + CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY); return result; } @@ -1126,7 +1127,7 @@ Expected DeactivateChannelAction::create(const Edg edge_layer.layer_info.direction, is_inter_context, static_cast(edge_layer.buffer_info.buffer_type), edge_layer.layer_info.max_shmifo_size)); - CHECK_AS_EXPECTED((nullptr != result), HAILO_OUT_OF_HOST_MEMORY); + CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY); return result; } @@ -1164,7 +1165,7 @@ Expected DeactivateChannelAction::serialize_params(const ContextResource Expected WaitDmaIdleAction::create(uint8_t stream_index) { auto result = ContextSwitchConfigActionPtr(new (std::nothrow) WaitDmaIdleAction(stream_index)); - CHECK_AS_EXPECTED((nullptr != result), HAILO_OUT_OF_HOST_MEMORY); + CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY); return result; } @@ -1181,7 +1182,8 @@ bool WaitDmaIdleAction::supports_repeated_block() const Expected WaitDmaIdleAction::serialize_params(const ContextResources &context_resources) const { - const auto edge_layer = context_resources.get_edge_layer_by_stream_index(m_stream_index); + // D2H direction because it is output action + const auto edge_layer = context_resources.get_edge_layer_by_stream_index(m_stream_index, HAILO_D2H_STREAM); CHECK_EXPECTED(edge_layer); CONTEXT_SWITCH_DEFS__wait_dma_idle_data_t params{}; @@ -1198,7 +1200,7 @@ Expected WaitNmsIdleAction::create(uint8_t aggrega auto result = ContextSwitchConfigActionPtr(new (std::nothrow) WaitNmsIdleAction(aggregator_index, pred_cluster_ob_index, pred_cluster_ob_cluster_index, pred_cluster_ob_interface, succ_prepost_ob_index, succ_prepost_ob_interface)); - CHECK_AS_EXPECTED((nullptr != result), HAILO_OUT_OF_HOST_MEMORY); + CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY); return result; } @@ -1231,17 +1233,20 @@ Expected WaitNmsIdleAction::serialize_params(const ContextResources &) c return Buffer::create(reinterpret_cast(¶ms), sizeof(params)); } -Expected EnableNmsAction::create(uint8_t nms_unit_index, uint8_t network_index) +Expected EnableNmsAction::create(uint8_t nms_unit_index, uint8_t network_index, uint16_t number_of_classes, + uint16_t burst_size) { - auto result = ContextSwitchConfigActionPtr(new (std::nothrow) EnableNmsAction(nms_unit_index, network_index)); - CHECK_AS_EXPECTED((nullptr != result), HAILO_OUT_OF_HOST_MEMORY); + auto result = ContextSwitchConfigActionPtr(new (std::nothrow) EnableNmsAction(nms_unit_index, network_index, number_of_classes, burst_size)); + CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY); return result; } -EnableNmsAction::EnableNmsAction(uint8_t nms_unit_index, uint8_t network_index) : +EnableNmsAction::EnableNmsAction(uint8_t nms_unit_index, uint8_t network_index, uint16_t number_of_classes, uint16_t burst_size) : ContextSwitchConfigAction(ContextSwitchConfigAction::Type::EnableNms, CONTEXT_SWITCH_DEFS__ACTION_TYPE_ENABLE_NMS), m_nms_unit_index(nms_unit_index), - m_network_index(network_index) + m_network_index(network_index), + m_number_of_classes(number_of_classes), + m_burst_size(burst_size) {} Expected EnableNmsAction::serialize_params(const ContextResources &) const @@ -1249,6 +1254,8 @@ Expected EnableNmsAction::serialize_params(const ContextResources &) con CONTEXT_SWITCH_DEFS__enable_nms_action_t params{}; params.nms_unit_index = m_nms_unit_index; params.network_index = m_network_index; + params.number_of_classes = m_number_of_classes; + params.burst_size = m_burst_size; return Buffer::create(reinterpret_cast(¶ms), sizeof(params)); } @@ -1257,4 +1264,70 @@ bool EnableNmsAction::supports_repeated_block() const return true; } +Expected WriteDataByTypeAction::create(uint32_t address, uint8_t data_type, uint32_t data, + uint8_t shift, uint32_t mask, uint8_t network_index) +{ + auto result = ContextSwitchConfigActionPtr(new (std::nothrow) WriteDataByTypeAction(address, data_type, data, shift, mask, network_index)); + CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY); + return result; +} + +WriteDataByTypeAction::WriteDataByTypeAction(uint32_t address, uint8_t data_type, uint32_t data, uint8_t shift, uint32_t mask, uint8_t network_index) : + ContextSwitchConfigAction(ContextSwitchConfigAction::Type::WriteDataByType, CONTEXT_SWITCH_DEFS__ACTION_TYPE_WRITE_DATA_BY_TYPE), + m_address(address), + m_data_type(data_type), + m_data(data), + m_shift(shift), + m_mask(mask), + m_network_index(network_index) +{} + +Expected WriteDataByTypeAction::serialize_params(const ContextResources &) const +{ + CONTEXT_SWITCH_DEFS__write_data_by_type_action_t params{}; + params.address = m_address; + params.data_type = m_data_type; + params.data = m_data; + params.shift = m_shift; + params.mask = m_mask; + params.network_index = m_network_index; + + return Buffer::create(reinterpret_cast(¶ms), sizeof(params)); +} + +bool WriteDataByTypeAction::supports_repeated_block() const +{ + return false; +} + +Expected SwitchLcuBatchAction::create(uint8_t cluster_index, uint8_t lcu_index, uint8_t network_index, + uint32_t kernel_done_count) +{ + auto result = ContextSwitchConfigActionPtr(new (std::nothrow) SwitchLcuBatchAction(cluster_index, lcu_index, network_index, kernel_done_count)); + CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY); + return result; +} + +SwitchLcuBatchAction::SwitchLcuBatchAction(uint8_t cluster_index, uint8_t lcu_index, uint8_t network_index, uint32_t kernel_done_count) : + ContextSwitchConfigAction(Type::SwitchLcuBatch, CONTEXT_SWITCH_DEFS__ACTION_TYPE_SWITCH_LCU_BATCH), + m_cluster_index(cluster_index), + m_lcu_index(lcu_index), + m_network_index(network_index), + m_kernel_done_count(kernel_done_count) +{} + +bool SwitchLcuBatchAction::supports_repeated_block() const +{ + return true; +} + +Expected SwitchLcuBatchAction::serialize_params(const ContextResources &) const +{ + CONTEXT_SWITCH_DEFS__switch_lcu_batch_action_data_t params{}; + params.packed_lcu_id = pack_lcu_id(m_cluster_index, m_lcu_index); + params.network_index = m_network_index; + params.kernel_done_count = m_kernel_done_count; + return Buffer::create(reinterpret_cast(¶ms), sizeof(params)); +} + } /* namespace hailort */ diff --git a/hailort/libhailort/src/hef/context_switch_actions.hpp b/hailort/libhailort/src/hef/context_switch_actions.hpp index 155958f..defe31a 100644 --- a/hailort/libhailort/src/hef/context_switch_actions.hpp +++ b/hailort/libhailort/src/hef/context_switch_actions.hpp @@ -71,6 +71,8 @@ public: WaitDmaIdle, WaitNmsIdle, EnableNms, + WriteDataByType, + SwitchLcuBatch, }; ContextSwitchConfigAction(ContextSwitchConfigAction &&) = default; @@ -744,7 +746,8 @@ private: class EnableNmsAction : public ContextSwitchConfigAction { public: - static Expected create(uint8_t nms_unit_index, uint8_t network_index); + static Expected create(uint8_t nms_unit_index, uint8_t network_index, uint16_t number_of_classes, + uint16_t burst_size); EnableNmsAction(EnableNmsAction &&) = default; EnableNmsAction(const EnableNmsAction &) = delete; EnableNmsAction &operator=(EnableNmsAction &&) = delete; @@ -754,12 +757,58 @@ public: virtual Expected serialize_params(const ContextResources &context_resources) const override; private: - EnableNmsAction(uint8_t nms_unit_index, uint8_t network_index); + EnableNmsAction(uint8_t nms_unit_index, uint8_t network_index, uint16_t number_of_classes, uint16_t burst_size); const uint8_t m_nms_unit_index; const uint8_t m_network_index; + const uint16_t m_number_of_classes; + const uint16_t m_burst_size; }; +class WriteDataByTypeAction : public ContextSwitchConfigAction +{ +public: + static Expected create(uint32_t address, uint8_t data_type, uint32_t data, + uint8_t shift, uint32_t mask, uint8_t network_index); + + virtual bool supports_repeated_block() const override; + virtual Expected serialize_params(const ContextResources &context_resources) const override; + +private: + WriteDataByTypeAction(uint32_t address, uint8_t data_type, uint32_t data, uint8_t shift, uint32_t mask, uint8_t network_index); + + const uint32_t m_address; + const uint8_t m_data_type; + const uint32_t m_data; + const uint8_t m_shift; + const uint32_t m_mask; + const uint8_t m_network_index; + +}; + +class SwitchLcuBatchAction : public ContextSwitchConfigAction +{ +public: + static Expected create(uint8_t cluster_index, uint8_t lcu_index, uint8_t network_index, + uint32_t kernel_done_count); + SwitchLcuBatchAction(SwitchLcuBatchAction &&) = default; + SwitchLcuBatchAction(const SwitchLcuBatchAction &) = delete; + SwitchLcuBatchAction &operator=(SwitchLcuBatchAction &&) = delete; + SwitchLcuBatchAction &operator=(const SwitchLcuBatchAction &) = delete; + virtual ~SwitchLcuBatchAction() = default; + virtual bool supports_repeated_block() const override; + virtual Expected serialize_params(const ContextResources &context_resources) const override; + +private: + SwitchLcuBatchAction(uint8_t cluster_index, uint8_t lcu_index, uint8_t network_index, uint32_t kernel_done_count); + + const uint8_t m_cluster_index; + const uint8_t m_lcu_index; + const uint8_t m_network_index; + const uint32_t m_kernel_done_count; +}; + + } /* namespace hailort */ #endif /* _HAILO_CONTEXT_SWITCH_ACTIONS_HPP_ */ diff --git a/hailort/libhailort/src/hef/core_op_metadata.cpp b/hailort/libhailort/src/hef/core_op_metadata.cpp index 8d148d9..e10f415 100644 --- a/hailort/libhailort/src/hef/core_op_metadata.cpp +++ b/hailort/libhailort/src/hef/core_op_metadata.cpp @@ -8,6 +8,7 @@ **/ #include "core_op_metadata.hpp" +#include "hef_internal.hpp" #include namespace hailort @@ -181,25 +182,13 @@ CoreOpMetadata::CoreOpMetadata(const std::string &core_op_name, ContextMetadata &&preliminary_context, std::vector &&dynamic_contexts, std::vector &&config_channels_info, - std::vector &&sorted_output_names, SupportedFeatures &supported_features, - const std::vector &sorted_network_names) + std::vector sorted_network_names) : m_preliminary_context(std::move(preliminary_context)), m_dynamic_contexts(std::move(dynamic_contexts)), m_config_channels_info(std::move(config_channels_info)), - m_core_op_name(core_op_name), m_sorted_output_names(std::move(sorted_output_names)), - m_supported_features(supported_features), m_sorted_network_names(sorted_network_names) {} - -Expected CoreOpMetadata::get_layer_info_by_stream_name(const std::string &stream_name) const -{ - for (auto layer_info : get_all_layer_infos()) { - if (layer_info.name == stream_name) { - return layer_info; - } - } - LOGGER__ERROR("Failed to find layer with name {}", stream_name); - return make_unexpected(HAILO_NOT_FOUND); -} + m_core_op_name(core_op_name), m_supported_features(supported_features), + m_sorted_network_names(sorted_network_names) {} std::vector CoreOpMetadata::get_input_layer_infos() const { @@ -301,18 +290,24 @@ Expected> CoreOpMetadata::get_all_layer_infos(const std:: Expected> CoreOpMetadata::get_input_stream_infos(const std::string &network_name) const { - auto input_layer_infos = get_input_layer_infos(network_name); - CHECK_EXPECTED(input_layer_infos); - - return convert_layer_infos_to_stream_infos(input_layer_infos.value()); + std::vector res; + auto input_layers = get_input_layer_infos(network_name); + CHECK_EXPECTED(input_layers); + for (auto &layer_info : input_layers.value()) { + res.push_back(LayerInfoUtils::get_stream_info_from_layer_info(layer_info)); + } + return res; } Expected> CoreOpMetadata::get_output_stream_infos(const std::string &network_name) const { - auto output_layer_infos = get_output_layer_infos(network_name); - CHECK_EXPECTED(output_layer_infos); - - return convert_layer_infos_to_stream_infos(output_layer_infos.value()); + std::vector res; + auto output_layers = get_output_layer_infos(network_name); + CHECK_EXPECTED(output_layers); + for (auto &layer_info : output_layers.value()) { + res.push_back(LayerInfoUtils::get_stream_info_from_layer_info(layer_info)); + } + return res; } Expected> CoreOpMetadata::get_all_stream_infos(const std::string &network_name) const @@ -331,42 +326,92 @@ Expected> CoreOpMetadata::get_all_stream_infos( return res; } -Expected> CoreOpMetadata::get_input_vstream_infos(const std::string &network_name) const + +size_t CoreOpMetadata::get_contexts_count() { - auto input_layer_infos = get_input_layer_infos(network_name); - CHECK_EXPECTED(input_layer_infos); + return (m_dynamic_contexts.size() + CONTROL_PROTOCOL__CONTEXT_SWITCH_NUMBER_OF_NON_DYNAMIC_CONTEXTS); +} - return convert_layer_infos_to_vstream_infos(input_layer_infos.value()); +Expected CoreOpMetadata::get_total_transfer_size() +{ + size_t total_transfer_size = 0; + for (const auto &dynamic_context : m_dynamic_contexts) { + auto context_size = dynamic_context.get_context_transfer_size(); + CHECK_EXPECTED(context_size); + total_transfer_size += context_size.release(); + } + return total_transfer_size; } -Expected> CoreOpMetadata::get_output_vstream_infos(const std::string &network_name) const +Expected CoreOpMetadataPerArch::get_metadata(uint32_t partial_clusters_layout_bitmap) const { - std::vector res; - if (m_supported_features.hailo_net_flow) { - res = m_output_vstreams_infos; - return res; + if (PARTIAL_CLUSTERS_LAYOUT_IGNORE == partial_clusters_layout_bitmap) { + // Passing PARTIAL_CLUSTERS_LAYOUT_IGNORE is magic for getting one of the metadata + assert(0 != m_metadata_per_arch.size()); + auto result = m_metadata_per_arch.begin()->second; + return result; } - auto expected_output_layer_infos = get_output_layer_infos(network_name); - CHECK_EXPECTED(expected_output_layer_infos); - auto output_layer_infos = expected_output_layer_infos.release(); + if (contains(m_metadata_per_arch, partial_clusters_layout_bitmap)) { + auto result = m_metadata_per_arch.at(partial_clusters_layout_bitmap); + return result; + } + LOGGER__ERROR("CoreOpPerArch does not contain metadata for partial_clusters_layout_bitmap {}", partial_clusters_layout_bitmap); + return make_unexpected(HAILO_INTERNAL_FAILURE); +} - res = convert_layer_infos_to_vstream_infos(output_layer_infos); +void CoreOpMetadataPerArch::add_metadata(const CoreOpMetadataPtr &metadata, uint32_t partial_clusters_layout_bitmap) +{ + m_metadata_per_arch[partial_clusters_layout_bitmap] = metadata; +} +Expected NetworkGroupMetadata::create(const std::string &network_group_name, + std::map &&core_ops_metadata_per_arch, std::vector &sorted_output_names, + SupportedFeatures &supported_features, const std::vector &sorted_network_names, + std::vector> &net_flow_ops) +{ + auto all_layers_infos = get_all_layer_infos(core_ops_metadata_per_arch); + CHECK_EXPECTED(all_layers_infos); + + std::vector input_vstream_infos; + std::vector output_vstream_infos; + for (auto &layer_info : all_layers_infos.value()) { + if (std::any_of(net_flow_ops.begin(), net_flow_ops.end(), + [&layer_info](auto &op) { return contains(op->input_streams, layer_info.name); })) { + continue; // all output_vstream_infos that relates to the op are coming from the op itself instead of layer_infos + } + auto vstreams_info = LayerInfoUtils::get_vstream_infos_from_layer_info(layer_info); + if (HAILO_D2H_STREAM == layer_info.direction) { + // In case of fused nms layers, several LayerInfos will contain data about the same fused layer + for (auto &vstream_info : vstreams_info) { + if (!LayerInfoUtils::vstream_info_already_in_vector(output_vstream_infos, vstream_info.name)) { + output_vstream_infos.push_back(vstream_info); + } + } + } else { + input_vstream_infos.insert(input_vstream_infos.end(), + std::make_move_iterator(vstreams_info.begin()), std::make_move_iterator(vstreams_info.end())); + } + } + for (auto &op : net_flow_ops) { + output_vstream_infos.push_back(op->output_vstream_info); + } + + // Sort vstream infos by sorted_output_names hailo_status status = HAILO_SUCCESS; - std::sort(res.begin(), res.end(), - [this, &status](const auto &info1, const auto &info2) + std::sort(output_vstream_infos.begin(), output_vstream_infos.end(), + [&sorted_output_names, &status](const auto &info1, const auto &info2) { - const auto index1 = std::find(m_sorted_output_names.begin(), m_sorted_output_names.end(), std::string(info1.name)); - const auto index2 = std::find(m_sorted_output_names.begin(), m_sorted_output_names.end(), std::string(info2.name)); + const auto index1 = std::find(sorted_output_names.begin(), sorted_output_names.end(), std::string(info1.name)); + const auto index2 = std::find(sorted_output_names.begin(), sorted_output_names.end(), std::string(info2.name)); - if (m_sorted_output_names.end() == index1) { - LOGGER__ERROR("Stream {} not found in sorted output names", info1.name); + if (sorted_output_names.end() == index1) { + LOGGER__ERROR("VStream {} not found in sorted output names", info1.name); status = HAILO_INTERNAL_FAILURE; return false; } - if (m_sorted_output_names.end() == index2) { - LOGGER__ERROR("Stream {} not found in sorted output names", info2.name); + if (sorted_output_names.end() == index2) { + LOGGER__ERROR("VStream {} not found in sorted output names", info2.name); status = HAILO_INTERNAL_FAILURE; return false; } @@ -375,10 +420,37 @@ Expected> CoreOpMetadata::get_output_vstream_i }); CHECK_SUCCESS_AS_EXPECTED(status); + return NetworkGroupMetadata(network_group_name, std::move(core_ops_metadata_per_arch), sorted_output_names, supported_features, sorted_network_names, + input_vstream_infos, output_vstream_infos, net_flow_ops); +} + +Expected> NetworkGroupMetadata::get_input_vstream_infos(const std::string &network_name) const +{ + std::vector res; + for (auto &vstream_info : m_input_vstreams_infos) { + if ((network_name == std::string(vstream_info.network_name)) || (network_name.empty()) || (network_name == default_network_name())) { + res.push_back(vstream_info); + } + } + CHECK_AS_EXPECTED(0 != res.size(), HAILO_NOT_FOUND, "No VStreams where found for network {}", network_name); + return res; } -Expected> CoreOpMetadata::get_all_vstream_infos(const std::string &network_name) const +Expected> NetworkGroupMetadata::get_output_vstream_infos(const std::string &network_name) const +{ + std::vector res; + for (auto &vstream_info : m_output_vstreams_infos) { + if ((network_name == std::string(vstream_info.network_name)) || (network_name.empty()) || (network_name == default_network_name())) { + res.push_back(vstream_info); + } + } + CHECK_AS_EXPECTED(0 != res.size(), HAILO_NOT_FOUND, "No VStreams where found for network {}", network_name); + + return res; +} + +Expected> NetworkGroupMetadata::get_all_vstream_infos(const std::string &network_name) const { auto input_vstream_infos = get_input_vstream_infos(network_name); CHECK_EXPECTED(input_vstream_infos); @@ -394,10 +466,21 @@ Expected> CoreOpMetadata::get_all_vstream_info return res; } -Expected> CoreOpMetadata::get_vstream_names_from_stream_name(const std::string &stream_name) const +Expected> NetworkGroupMetadata::get_vstream_names_from_stream_name(const std::string &stream_name) { std::vector results; - for (auto &layer_info : get_all_layer_infos()) { + for (auto &pp : m_net_flow_ops) { + if (contains(pp->input_streams, stream_name)) { + for (auto &output_metadata : pp->op->outputs_metadata()) { + results.push_back(output_metadata.first); + } + return results; + } + } + + auto all_layers_infos = get_all_layer_infos(m_core_ops_metadata_per_arch); + CHECK_EXPECTED(all_layers_infos); + for (auto &layer_info : all_layers_infos.release()) { if (stream_name == layer_info.name) { if (layer_info.is_defused_nms) { return std::vector (1, layer_info.fused_nms_layer[0].name); @@ -411,10 +494,21 @@ Expected> CoreOpMetadata::get_vstream_names_from_stream return make_unexpected(HAILO_NOT_FOUND); } -Expected> CoreOpMetadata::get_stream_names_from_vstream_name(const std::string &vstream_name) const +Expected> NetworkGroupMetadata::get_stream_names_from_vstream_name(const std::string &vstream_name) { std::vector results; - for (auto &layer_info : get_all_layer_infos()) { + for (auto &pp : m_net_flow_ops) { + if (contains(pp->op->outputs_metadata(), vstream_name)) { + for (auto &input_name : pp->input_streams) { + results.push_back(input_name); + } + return results; + } + } + + auto all_layers_infos = get_all_layer_infos(m_core_ops_metadata_per_arch); + CHECK_EXPECTED(all_layers_infos); + for (auto &layer_info : all_layers_infos.release()) { if (layer_info.is_mux) { if (is_edge_under_mux(layer_info, vstream_name)) { // vstream_name is a demux of the layer info @@ -436,31 +530,7 @@ Expected> CoreOpMetadata::get_stream_names_from_vstream return results; } -std::vector CoreOpMetadata::convert_layer_infos_to_stream_infos(const std::vector &layer_infos) const -{ - std::vector res; - for (auto &layer_info : layer_infos) { - res.push_back(LayerInfoUtils::get_stream_info_from_layer_info(layer_info)); - } - return res; -} - -std::vector CoreOpMetadata::convert_layer_infos_to_vstream_infos(const std::vector &layer_infos) const -{ - std::vector res; - for (auto &layer_info : layer_infos) { - auto vstream_infos = LayerInfoUtils::get_vstream_infos_from_layer_info(layer_info); - for (const auto &vstream_info : vstream_infos) { - // In case of fused nms layers, several LayerInfos will contain data about the same fused layer - if (!LayerInfoUtils::vstream_info_already_in_vector(res, vstream_info.name)) { - res.push_back(vstream_info); - } - } - } - return res; -} - -Expected> CoreOpMetadata::get_network_infos() const +Expected> NetworkGroupMetadata::get_network_infos() const { std::vector network_infos; network_infos.reserve(m_sorted_network_names.size()); @@ -476,41 +546,4 @@ Expected> CoreOpMetadata::get_network_infos() return network_infos; } -size_t CoreOpMetadata::get_contexts_count() -{ - return (m_dynamic_contexts.size() + CONTROL_PROTOCOL__CONTEXT_SWITCH_NUMBER_OF_NON_DYNAMIC_CONTEXTS); -} - -Expected CoreOpMetadata::get_total_transfer_size() -{ - size_t total_transfer_size = 0; - for (const auto &dynamic_context : m_dynamic_contexts) { - auto context_size = dynamic_context.get_context_transfer_size(); - CHECK_EXPECTED(context_size); - total_transfer_size += context_size.release(); - } - return total_transfer_size; -} - -Expected CoreOpMetadataPerArch::get_metadata(uint32_t partial_clusters_layout_bitmap) -{ - if (PARTIAL_CLUSTERS_LAYOUT_IGNORE == partial_clusters_layout_bitmap) { - // Passing PARTIAL_CLUSTERS_LAYOUT_IGNORE is magic for getting one of the metadata - assert(0 != m_metadata_per_arch.size()); - auto result = m_metadata_per_arch.begin()->second; - return result; - } - if (contains(m_metadata_per_arch, partial_clusters_layout_bitmap)) { - auto result = m_metadata_per_arch[partial_clusters_layout_bitmap]; - return result; - } - LOGGER__ERROR("CoreOpPerArch does not contain metadata for partial_clusters_layout_bitmap {}", partial_clusters_layout_bitmap); - return make_unexpected(HAILO_INTERNAL_FAILURE); -} - -void CoreOpMetadataPerArch::add_metadata(const CoreOpMetadata &metadata, uint32_t partial_clusters_layout_bitmap) -{ - m_metadata_per_arch[partial_clusters_layout_bitmap] = metadata; -} - } /* namespace hailort */ diff --git a/hailort/libhailort/src/hef/core_op_metadata.hpp b/hailort/libhailort/src/hef/core_op_metadata.hpp index d725524..b449679 100644 --- a/hailort/libhailort/src/hef/core_op_metadata.hpp +++ b/hailort/libhailort/src/hef/core_op_metadata.hpp @@ -25,6 +25,10 @@ struct SupportedFeatures { bool multi_context = false; bool preliminary_run_asap = false; bool hailo_net_flow = false; + bool dual_direction_stream_index = false; + bool nms_burst_mode = false; + bool output_scale_by_feature = false; + bool periph_calculation_in_hailort = false; }; // For each config_stream_index we store vector of all ccw write length. The vector is used to build the config buffer.g @@ -33,7 +37,6 @@ using ConfigBufferInfoMap = std::unordered_map>; class ContextMetadata final { public: - ContextMetadata() = default; // TODO HRT-8478: remove ContextMetadata(std::vector &&actions, ConfigBufferInfoMap&& config_buffers_info); @@ -74,14 +77,12 @@ struct ConfigChannelInfo { class CoreOpMetadata final { public: - CoreOpMetadata() = default; // TODO HRT-8478: remove CoreOpMetadata(const std::string &core_op_name, ContextMetadata &&preliminary_context, std::vector &&dynamic_contexts, std::vector &&config_channels_info, - std::vector &&sorted_output_names, SupportedFeatures &supported_features, - const std::vector &sorted_network_names); + std::vector sorted_network_names); std::vector get_input_layer_infos() const; std::vector get_output_layer_infos() const; @@ -90,38 +91,112 @@ public: Expected> get_input_layer_infos(const std::string &network_name) const; Expected> get_output_layer_infos(const std::string &network_name) const; Expected> get_all_layer_infos(const std::string &network_name) const; - Expected get_layer_info_by_stream_name(const std::string &stream_name) const; const ContextMetadata &preliminary_context() const; const std::vector &dynamic_contexts() const; const std::vector &config_channels_info() const; + // TODO: Move stream infos into NetworkGroupMetadata Expected> get_input_stream_infos(const std::string &network_name = "") const; Expected> get_output_stream_infos(const std::string &network_name = "") const; Expected> get_all_stream_infos(const std::string &network_name = "") const; - // TODO: HRT-9546 - Remove, should only be in CNG + size_t get_contexts_count(); + + const std::string &core_op_name() const + { + return m_core_op_name; + } + + const SupportedFeatures &supported_features() const + { + return m_supported_features; + } + + Expected get_total_transfer_size(); + + // TODO: Remove + const std::vector &get_network_names() const + { + return m_sorted_network_names; + } + +private: + // TODO: Remove + const std::string default_network_name() const + { + return HailoRTDefaults::get_network_name(m_core_op_name); + } + + ContextMetadata m_preliminary_context; + std::vector m_dynamic_contexts; + std::vector m_config_channels_info; + std::string m_core_op_name; + SupportedFeatures m_supported_features; + std::vector m_sorted_network_names; +}; + +using CoreOpMetadataPtr = std::shared_ptr; + +class CoreOpMetadataPerArch final +{ +public: + CoreOpMetadataPerArch() = default; + + Expected get_metadata(uint32_t partial_clusters_layout_bitmap) const; + void add_metadata(const CoreOpMetadataPtr &metadata, uint32_t partial_clusters_layout_bitmap); + +private: + std::map m_metadata_per_arch; +}; + +struct NetFlowElement; + +class NetworkGroupMetadata final { +public: + static Expected create(const std::string &network_group_name, + std::map &&core_ops_metadata_per_arch, + std::vector &sorted_output_names, + SupportedFeatures &supported_features, + const std::vector &sorted_network_names, + std::vector> &net_flow_ops); + + NetworkGroupMetadata(const std::string &network_group_name, + std::map &&core_ops_metadata_per_arch, + std::vector &sorted_output_names, + SupportedFeatures &supported_features, + const std::vector &sorted_network_names, + std::vector &input_vstreams_infos, + std::vector &output_vstreams_infos, + std::vector> &net_flow_ops) : + m_network_group_name(network_group_name), + m_sorted_output_names(sorted_output_names), + m_supported_features(supported_features), + m_sorted_network_names(sorted_network_names), + m_input_vstreams_infos(input_vstreams_infos), + m_output_vstreams_infos(output_vstreams_infos), + m_core_ops_metadata_per_arch(std::move(core_ops_metadata_per_arch)), + m_net_flow_ops(net_flow_ops) + {}; + Expected> get_input_vstream_infos(const std::string &network_name = "") const; Expected> get_output_vstream_infos(const std::string &network_name = "") const; Expected> get_all_vstream_infos(const std::string &network_name = "") const; - // TODO: HRT-9546 - Remove, should only be in CNG - need to decide if relevant only for one CoreOp case. - Expected> get_vstream_names_from_stream_name(const std::string &stream_name) const; - Expected> get_stream_names_from_vstream_name(const std::string &vstream_name) const; + Expected> get_vstream_names_from_stream_name(const std::string &stream_name); + Expected> get_stream_names_from_vstream_name(const std::string &vstream_name); Expected> get_network_infos() const; - size_t get_contexts_count(); - - const std::string &core_op_name() const + const std::string &name() const { - return m_core_op_name; + return m_network_group_name; } const std::string default_network_name() const { - return HailoRTDefaults::get_network_name(m_core_op_name); + return HailoRTDefaults::get_network_name(m_network_group_name); } const std::vector get_sorted_output_names() const @@ -129,7 +204,6 @@ public: return m_sorted_output_names; } - // duplicated for each CoreOp const SupportedFeatures &supported_features() const { return m_supported_features; @@ -140,41 +214,31 @@ public: return m_sorted_network_names; } - // TODO: HRT-9546 - Move to CNG - void add_output_vstream_info(const hailo_vstream_info_t &output_vstream_info) { - m_output_vstreams_infos.push_back(output_vstream_info); - } - - Expected get_total_transfer_size(); - private: - std::vector convert_layer_infos_to_stream_infos(const std::vector &layer_infos) const; - std::vector convert_layer_infos_to_vstream_infos(const std::vector &layer_infos) const; + static Expected> get_all_layer_infos(std::map &core_ops_metadata_per_arch) + /* This function is used for names getters (such as get_vstream_names_from_stream_name), + so should be same across all clusters layouts */ + { + CHECK_AS_EXPECTED(1 == core_ops_metadata_per_arch.size(), HAILO_INTERNAL_FAILURE); + auto core_op_metadata = core_ops_metadata_per_arch.begin()->second.get_metadata(PARTIAL_CLUSTERS_LAYOUT_IGNORE); + CHECK_EXPECTED(core_op_metadata); - ContextMetadata m_preliminary_context; - std::vector m_dynamic_contexts; - std::vector m_config_channels_info; - std::string m_core_op_name; + return core_op_metadata.value()->get_all_layer_infos(); + } + + std::string m_network_group_name; std::vector m_sorted_output_names; SupportedFeatures m_supported_features; std::vector m_sorted_network_names; - // TODO: remove this from here! NetworkGroupMetadata should be CoreOpMetadata and contain no net_flow information! (HRT-9546) - // To add insult to injury, this is being constructed lazyly by add_output_layer_info - std::vector m_output_vstreams_infos; // Valid only in case of post process -}; + std::vector m_input_vstreams_infos; + std::vector m_output_vstreams_infos; + std::map m_core_ops_metadata_per_arch; // Key is core_op_name + std::vector> m_net_flow_ops; -class CoreOpMetadataPerArch final -{ -public: - CoreOpMetadataPerArch() = default; - - Expected get_metadata(uint32_t partial_clusters_layout_bitmap); - void add_metadata(const CoreOpMetadata &metadata, uint32_t partial_clusters_layout_bitmap); - -private: - std::map m_metadata_per_arch; + friend class Hef; + friend class ConfiguredNetworkGroupBase; }; } /* namespace hailort */ diff --git a/hailort/libhailort/src/hef/hef.cpp b/hailort/libhailort/src/hef/hef.cpp index cd543a0..f719c72 100644 --- a/hailort/libhailort/src/hef/hef.cpp +++ b/hailort/libhailort/src/hef/hef.cpp @@ -23,13 +23,17 @@ #include "net_flow/ops/nms_post_process.hpp" #include "net_flow/ops/yolo_post_process.hpp" +#include "net_flow/ops/yolox_post_process.hpp" #include "net_flow/ops/ssd_post_process.hpp" +#include "net_flow/ops/argmax_post_process.hpp" +#include "net_flow/ops/softmax_post_process.hpp" #include "hef/hef_internal.hpp" #include "vdma/pcie/pcie_device.hpp" #include "vdma/vdma_config_manager.hpp" #include "eth/hcp_config_core_op.hpp" #include "hef/layer_info.hpp" #include "device_common/control.hpp" +#include "stream_common/nms_stream_reader.hpp" #include "byte_order.h" #include "context_switch_defs.h" @@ -51,6 +55,8 @@ namespace hailort #define HEF__MD5_BUFFER_SIZE (1024) #define DEFAULT_BATCH_SIZE (1) #define SKIP_SPACE_COMMA_CHARACTERS (2) +#define ALIGNED_TO_4_BYTES (4) +#define DEFAULT_NMS_NO_BURST_SIZE (1) static const uint8_t ENABLE_LCU_CONTROL_WORD[4] = {1, 0, 0, 0}; @@ -487,26 +493,50 @@ hailo_status Hef::Impl::fill_networks_metadata() { fill_extensions_bitset(); - CoreOpMetadataPerArch metadata; + CoreOpMetadataPerArch core_op_metadata; uint32_t partial_clusters_layout_bitmap = 0; for (auto &network_group : m_groups) { + // Prepare core_op_metadata auto network_group_name = HefUtils::get_network_group_name(*network_group, m_supported_features); // TODO: keep metadata per core_op (HRT-9551) const auto &core_ops = m_core_ops_per_group[network_group_name]; assert(core_ops.size() == 1); const auto &core_op = core_ops[0]; + + // TODO: Clean this code after hef.proto refactor + std::vector sorted_network_names; + if (m_supported_features.multi_network_support) { + if (0 != network_group->networks_names_size()) { + sorted_network_names.reserve(core_op.networks_names.size()); + for (auto &partial_network_name : core_op.networks_names) { + auto network_name = HefUtils::get_network_name(network_group_name, partial_network_name); + sorted_network_names.push_back(network_name); + } + } else if (0 != network_group->partial_network_groups_size()) { + sorted_network_names.reserve(network_group->partial_network_groups().begin()->network_group().networks_names_size()); + for (auto &partial_network_name : network_group->partial_network_groups().begin()->network_group().networks_names()) { + auto network_name = HefUtils::get_network_name(network_group_name, partial_network_name); + sorted_network_names.push_back(network_name); + } + } + } + if (sorted_network_names.empty()) { + sorted_network_names.push_back(HailoRTDefaults::get_network_name(network_group_name)); + } + if (ProtoHEFHwArch::PROTO__HW_ARCH__HAILO8L == get_device_arch()) { if (m_supported_features.hailo_net_flow) { for (auto &partial_core_op : core_op.partial_core_ops) { partial_clusters_layout_bitmap = partial_core_op->layout.partial_clusters_layout_bitmap(); - auto metadata_per_arch = create_metadata_per_arch(*(partial_core_op->core_op)); - CHECK_EXPECTED_AS_STATUS(metadata_per_arch); - auto &&arch_metadata = metadata_per_arch.release(); - auto expected_net_flow_ops = create_net_flow_ops(*network_group, arch_metadata); + auto metadata_per_arch_exp = create_metadata_per_arch(*(partial_core_op->core_op), sorted_network_names); + CHECK_EXPECTED_AS_STATUS(metadata_per_arch_exp); + auto metadata_per_arch = metadata_per_arch_exp.release(); + + auto expected_net_flow_ops = create_net_flow_ops(*network_group, *metadata_per_arch, get_device_arch()); CHECK_EXPECTED_AS_STATUS(expected_net_flow_ops); - m_post_process_ops_per_group.insert({arch_metadata.core_op_name(), expected_net_flow_ops.value()}); - metadata.add_metadata(arch_metadata, partial_clusters_layout_bitmap); + m_post_process_ops_per_group.insert({metadata_per_arch->core_op_name(), expected_net_flow_ops.value()}); + core_op_metadata.add_metadata(metadata_per_arch, partial_clusters_layout_bitmap); } } else { for (auto &partial_network_group : network_group->partial_network_groups()) { @@ -520,27 +550,72 @@ hailo_status Hef::Impl::fill_networks_metadata() partial_network_group.network_group().networks_names(), {} }; - auto metadata_per_arch = create_metadata_per_arch(partial_core_op); - CHECK_EXPECTED_AS_STATUS(metadata_per_arch); - auto &&arch_metadata = metadata_per_arch.release(); + + auto metadata_per_arch_exp = create_metadata_per_arch(partial_core_op, sorted_network_names); + CHECK_EXPECTED_AS_STATUS(metadata_per_arch_exp); + auto metadata_per_arch = metadata_per_arch_exp.release(); + std::vector> empty_ops; - m_post_process_ops_per_group.insert({arch_metadata.core_op_name(), empty_ops}); - metadata.add_metadata(arch_metadata, partial_clusters_layout_bitmap); + m_post_process_ops_per_group.insert({metadata_per_arch->core_op_name(), empty_ops}); + core_op_metadata.add_metadata(metadata_per_arch, partial_clusters_layout_bitmap); } } } else { partial_clusters_layout_bitmap = PARTIAL_CLUSTERS_LAYOUT_IGNORE; - auto metadata_per_arch = create_metadata_per_arch(core_op); - CHECK_EXPECTED_AS_STATUS(metadata_per_arch); - auto &&arch_metadata = metadata_per_arch.release(); - auto expected_net_flow_ops = create_net_flow_ops(*network_group, arch_metadata); + auto metadata_per_arch_exp = create_metadata_per_arch(core_op, sorted_network_names); + CHECK_EXPECTED_AS_STATUS(metadata_per_arch_exp); + auto metadata_per_arch = metadata_per_arch_exp.release(); + + auto expected_net_flow_ops = create_net_flow_ops(*network_group, *metadata_per_arch, get_device_arch()); CHECK_EXPECTED_AS_STATUS(expected_net_flow_ops); - m_post_process_ops_per_group.insert({arch_metadata.core_op_name(), expected_net_flow_ops.value()}); - metadata.add_metadata(arch_metadata, partial_clusters_layout_bitmap); + m_post_process_ops_per_group.insert({metadata_per_arch->core_op_name(), expected_net_flow_ops.value()}); + core_op_metadata.add_metadata(metadata_per_arch, partial_clusters_layout_bitmap); } - CHECK(!contains(m_core_op_per_arch, network_group_name), + + // Taking the full-layout's name (name is same across all layouts) + auto metadata_exp = core_op_metadata.get_metadata(PARTIAL_CLUSTERS_LAYOUT_IGNORE); + CHECK_EXPECTED_AS_STATUS(metadata_exp); + auto core_op_name = metadata_exp.value()->core_op_name(); + std::map core_op_metadata_map; + core_op_metadata_map[core_op_name] = core_op_metadata; + // Prepare network_group_metadata + CHECK(!contains(m_network_group_metadata, network_group_name), HAILO_INVALID_OPERATION, "Network group with the name {} is already configured on the device", network_group_name); - m_core_op_per_arch.emplace(network_group_name, metadata); + + // TODO: Clean this code after hef.proto refactor + std::vector sorted_output_names; + if (core_op.fused_layers_metadata.network_has_fused_layers()) { + // If the model has fused layers, updated sorted_output_names is under the fused layer metadata + for (auto &name : core_op.fused_layers_metadata.updated_sorted_output_names()) { + sorted_output_names.push_back(name); + } + } else if(!m_supported_features.hailo_net_flow && (0 != network_group->partial_network_groups_size()) && + (network_group->partial_network_groups().begin()->network_group().sorted_outputs_order_size())) { + // If the model doesnt support net_flow, its possible that sorted output names will be under the partial_network_groups metadata + for (auto &name : network_group->partial_network_groups().begin()->network_group().sorted_outputs_order()) { + sorted_output_names.push_back(name); + } + } else if (0 != network_group->sorted_outputs_order_size()) { + // Most cases should fall here - either net_flow is supported, or network_group->sorted_outputs_order() has values + for (auto &name : network_group->sorted_outputs_order()) { + sorted_output_names.push_back(name); + } + } else { + // For very old HEFs, sorted_output_names might be in the last context's metadata + uint32_t number_of_contexts = core_op.contexts.size(); + const auto& context_metadata = core_op.contexts[number_of_contexts - 1].metadata(); + CHECK(0 < context_metadata.sorted_outputs_order_size(), HAILO_INVALID_HEF, + "Sorted output names is not set up in the HEF."); + for (auto &name : context_metadata.sorted_outputs_order()) { + sorted_output_names.push_back(name); + } + } + + auto network_group_metadata = NetworkGroupMetadata::create(network_group_name, std::move(core_op_metadata_map), + sorted_output_names, m_supported_features, sorted_network_names, m_post_process_ops_per_group.at(network_group_name)); + + CHECK_EXPECTED_AS_STATUS(network_group_metadata); + m_network_group_metadata.emplace(network_group_name, network_group_metadata.release()); } return HAILO_SUCCESS; } @@ -578,36 +653,22 @@ static Expected> parse_config_channels_info(const return config_channels_info; } -Expected Hef::Impl::create_metadata_per_arch(const ProtoHEFCoreOpMock &core_op) +Expected Hef::Impl::create_metadata_per_arch(const ProtoHEFCoreOpMock &core_op, const std::vector &sorted_network_names) { auto preliminary_context = HefUtils::parse_preliminary_context(core_op.preliminary_config, m_supported_features); CHECK_EXPECTED(preliminary_context); - auto dynamic_contexts = HefUtils::parse_dynamic_contexts(core_op, m_supported_features); + auto dynamic_contexts = HefUtils::parse_dynamic_contexts(core_op, m_supported_features, get_device_arch()); CHECK_EXPECTED(dynamic_contexts); auto config_channels_info = parse_config_channels_info(core_op); CHECK_EXPECTED(config_channels_info); - auto sorted_output_names = HefUtils::get_sorted_output_names(core_op); - CHECK_EXPECTED(sorted_output_names); - - std::vector sorted_network_names; - if (m_supported_features.multi_network_support) { - sorted_network_names.reserve(core_op.networks_names.size()); - for (auto &partial_network_name : core_op.networks_names) { - auto network_name = HefUtils::get_network_name(core_op, partial_network_name); - sorted_network_names.push_back(network_name); - } - } else { - sorted_network_names.push_back(HailoRTDefaults::get_network_name(core_op.network_group_metadata.network_group_name())); - } - // Currently, CoreOp name is the same as network_group_name, thats why we init it with it. // TODO: HRT-9551 - Change it when supporting multi core ops. - CoreOpMetadata metadata_per_arch(core_op.network_group_metadata.network_group_name(), - preliminary_context.release(), dynamic_contexts.release(), config_channels_info.release(), - sorted_output_names.release(), m_supported_features, sorted_network_names); + auto metadata_per_arch = make_shared_nothrow(core_op.network_group_metadata.network_group_name(), + preliminary_context.release(), dynamic_contexts.release(), config_channels_info.release(), m_supported_features, sorted_network_names); + CHECK_NOT_NULL_AS_EXPECTED(metadata_per_arch, HAILO_OUT_OF_HOST_MEMORY); return metadata_per_arch; } @@ -772,6 +833,14 @@ SupportedFeatures Hef::Impl::get_supported_features(const ProtoHEFHeader &header header, hef_extensions, included_features); supported_features.hailo_net_flow = check_hef_extension(ProtoHEFExtensionType::HAILO_NET_FLOW, header, hef_extensions, included_features); + supported_features.dual_direction_stream_index = check_hef_extension(ProtoHEFExtensionType::DUAL_DIRECTION_STREAM_INDEX, + header, hef_extensions, included_features); + supported_features.nms_burst_mode = check_hef_extension(ProtoHEFExtensionType::NMS_OUTPUT_BURST, + header, hef_extensions, included_features); + supported_features.output_scale_by_feature = check_hef_extension(ProtoHEFExtensionType::OUTPUT_SCALE_PER_FEATURE, + header, hef_extensions, included_features); + supported_features.periph_calculation_in_hailort = check_hef_extension(ProtoHEFExtensionType::PERIPH_CALCULATION_IN_HAILORT, + header, hef_extensions, included_features); return supported_features; } @@ -782,7 +851,7 @@ net_flow::NmsPostProcessConfig create_nms_config(const ProtoHEFOp &op_proto) nms_config.nms_score_th = (float32_t)op_proto.nms_op().nms_score_th(); nms_config.nms_iou_th = (float32_t)op_proto.nms_op().nms_iou_th(); nms_config.max_proposals_per_class = op_proto.nms_op().max_proposals_per_class(); - nms_config.classes = op_proto.nms_op().classes(); + nms_config.number_of_classes = op_proto.nms_op().classes(); nms_config.background_removal = op_proto.nms_op().background_removal(); nms_config.background_removal_index = op_proto.nms_op().background_removal_index(); @@ -836,15 +905,25 @@ Expected> create_yolox_op(const ProtoHEFOp &op_pro const std::map &pad_index_to_streams_info, const std::map &input_to_output_pads) { auto nms_config = create_nms_config(op_proto); - net_flow::YoloPostProcessConfig yolo_config{}; - yolo_config.image_height = (float32_t)op_proto.nms_op().yolo_nms_op().image_height(); - yolo_config.image_width = (float32_t)op_proto.nms_op().yolo_nms_op().image_width(); + net_flow::YoloxPostProcessConfig yolox_config{}; + yolox_config.image_height = (float32_t)op_proto.nms_op().yolox_nms_op().image_height(); + yolox_config.image_width = (float32_t)op_proto.nms_op().yolox_nms_op().image_width(); std::map inputs_metadata; std::map outputs_metadata; net_flow::BufferMetaData output_metadata{}; output_metadata.format = output_format; outputs_metadata.insert({op_proto.output_pads()[0].name(), output_metadata}); + + for (auto &bbox_proto : op_proto.nms_op().yolox_nms_op().bbox_decoders()) { + assert(contains(pad_index_to_streams_info, static_cast(bbox_proto.reg_pad_index()))); + auto reg_name = pad_index_to_streams_info.at(bbox_proto.reg_pad_index()).name; + assert(contains(pad_index_to_streams_info, static_cast(bbox_proto.cls_pad_index()))); + auto cls_name = pad_index_to_streams_info.at(bbox_proto.cls_pad_index()).name; + assert(contains(pad_index_to_streams_info, static_cast(bbox_proto.obj_pad_index()))); + auto obj_name = pad_index_to_streams_info.at(bbox_proto.obj_pad_index()).name; + yolox_config.input_names.emplace_back(net_flow::MatchingLayersNames{reg_name, obj_name, cls_name}); + } for (auto &input_pad : op_proto.input_pads()) { CHECK_AS_EXPECTED(contains(input_to_output_pads, static_cast(input_pad.index())), HAILO_INVALID_HEF, @@ -861,7 +940,7 @@ Expected> create_yolox_op(const ProtoHEFOp &op_pro input_metadata.padded_shape = op_input_stream.hw_shape; inputs_metadata.insert({op_input_stream.name, input_metadata}); } - return net_flow::YOLOXPostProcessOp::create(inputs_metadata, outputs_metadata, nms_config, yolo_config); + return net_flow::YOLOXPostProcessOp::create(inputs_metadata, outputs_metadata, nms_config, yolox_config); } Expected> create_ssd_op(const ProtoHEFOp &op_proto, hailo_format_t output_format, @@ -925,8 +1004,124 @@ Expected> create_ssd_op(const ProtoHEFOp &op_proto return net_flow::SSDPostProcessOp::create(inputs_metadata, outputs_metadata, nms_config, ssd_config); } +Expected> create_argmax_op(const ProtoHEFPad &input_pad, const ProtoHEFPad &output_pad, + const std::string &input_name, const std::string &output_name, const bool &is_hw_padding_supported) +{ + // create input meta + std::map inputs_metadata; + hailort::net_flow::BufferMetaData input_metadata{}; + input_metadata.shape = {input_pad.tensor_shape().height(), input_pad.tensor_shape().width(), input_pad.tensor_shape().features()}; + // If padding is done in HW, the padded shape is as the shape (TODO: Remove once HRT support hw_padding from DFC) + if (is_hw_padding_supported) { + input_metadata.padded_shape = input_metadata.shape; + } else { + input_metadata.padded_shape = {input_pad.tensor_shape().padded_height(), input_pad.tensor_shape().padded_width(), + input_pad.tensor_shape().padded_features()}; + } + + input_metadata.format.type = static_cast(input_pad.format_type()); + input_metadata.format.order = static_cast(input_pad.format_order()); + input_metadata.format.flags = HAILO_FORMAT_FLAGS_NONE; + input_metadata.quant_info.qp_zp = input_pad.numeric_info().qp_zp(); + input_metadata.quant_info.qp_scale = input_pad.numeric_info().qp_scale(); + input_metadata.quant_info.limvals_min = input_pad.numeric_info().limvals_min(); + input_metadata.quant_info.limvals_max = input_pad.numeric_info().limvals_max(); + inputs_metadata.insert({input_name, input_metadata}); + + // create output meta + std::map outputs_metadata; + hailort::net_flow::BufferMetaData output_metadata{}; + output_metadata.shape = {input_pad.tensor_shape().height(), input_pad.tensor_shape().width(), hailort::net_flow::ARGMAX_OUTPUT_FEATURES_SIZE}; + output_metadata.padded_shape = output_metadata.shape; // padded_shape is the same as the output_shape in argmax op + output_metadata.format.order = static_cast(output_pad.format_order()); + output_metadata.format.type = static_cast(output_pad.format_type()); + output_metadata.quant_info.qp_zp = output_pad.numeric_info().qp_zp(); + output_metadata.quant_info.qp_scale = output_pad.numeric_info().qp_scale(); + output_metadata.quant_info.limvals_min = output_pad.numeric_info().limvals_min(); + output_metadata.quant_info.limvals_max = output_pad.numeric_info().limvals_max(); + output_metadata.format.flags = HAILO_FORMAT_FLAGS_NONE; + outputs_metadata.insert({output_name, output_metadata}); + return net_flow::ArgmaxPostProcessOp::create(inputs_metadata, outputs_metadata); +} + +Expected> create_softmax_op(const ProtoHEFPad &input_pad, const ProtoHEFPad &output_pad, + const std::string &input_name, const std::string &output_name) +{ + // create input meta + std::map inputs_metadata; + hailort::net_flow::BufferMetaData input_metadata{}; + input_metadata.shape = {input_pad.tensor_shape().height(), input_pad.tensor_shape().width(), input_pad.tensor_shape().features()}; + input_metadata.padded_shape = input_metadata.shape; // since softmax is connected to transform context, shape and padded shape are the same + + input_metadata.format.type = static_cast(input_pad.format_type()); + input_metadata.format.order = static_cast(input_pad.format_order()); + input_metadata.format.flags = HAILO_FORMAT_FLAGS_NONE; + input_metadata.quant_info.qp_zp = input_pad.numeric_info().qp_zp(); + input_metadata.quant_info.qp_scale = input_pad.numeric_info().qp_scale(); + input_metadata.quant_info.limvals_min = input_pad.numeric_info().limvals_min(); + input_metadata.quant_info.limvals_max = input_pad.numeric_info().limvals_max(); + inputs_metadata.insert({input_name, input_metadata}); + + // create output meta + std::map outputs_metadata; + hailort::net_flow::BufferMetaData output_metadata{}; + output_metadata.shape = {input_pad.tensor_shape().height(), input_pad.tensor_shape().width(), input_pad.tensor_shape().features()}; + output_metadata.padded_shape = output_metadata.shape; // padded_shape is the same as the output_shape in softmax op + output_metadata.format.order = static_cast(output_pad.format_order()); + output_metadata.format.type = static_cast(output_pad.format_type()); + output_metadata.quant_info.qp_zp = output_pad.numeric_info().qp_zp(); + output_metadata.quant_info.qp_scale = output_pad.numeric_info().qp_scale(); + output_metadata.quant_info.limvals_min = output_pad.numeric_info().limvals_min(); + output_metadata.quant_info.limvals_max = output_pad.numeric_info().limvals_max(); + output_metadata.format.flags = HAILO_FORMAT_FLAGS_NONE; + outputs_metadata.insert({output_name, output_metadata}); + return net_flow::SoftmaxPostProcessOp::create(inputs_metadata, outputs_metadata); +} + +Expected> create_logits_op(const ProtoHEFOp &op_proto, const std::map &input_to_output_pads, + const std::map &pad_index_to_pad_data, NetFlowElement &net_flow_element, + const std::map &pad_index_to_streams_info, const ProtoHEFHwArch &hef_arch) +{ + // connect input_streams to net_flow element + CHECK_AS_EXPECTED(op_proto.input_pads().size() == 1, HAILO_INVALID_HEF, "Logits op must have 1 input only"); + CHECK_AS_EXPECTED(op_proto.output_pads().size() == 1, HAILO_INVALID_HEF, "Logits op must have 1 output only"); + auto input_pad = op_proto.input_pads()[0]; + auto output_pad = op_proto.output_pads()[0]; + CHECK_AS_EXPECTED(contains(input_to_output_pads, static_cast(input_pad.index())), HAILO_INVALID_HEF, + "Logits op is not connected to core-op"); + auto output_pad_index = input_to_output_pads.at(input_pad.index()); + CHECK_AS_EXPECTED(contains(pad_index_to_streams_info, output_pad_index), HAILO_INVALID_HEF, + "Pad {} of post-process {} is not connected to any core output stream", input_pad.index(), op_proto.name()); + + // Data of the input_pad is taken from the output_pad of the core op + const auto &connected_output_pad = pad_index_to_pad_data.at(output_pad_index); + net_flow_element.input_streams.insert(connected_output_pad.name()); + // TODO: HRT-10603 + const auto &op_input_stream = pad_index_to_streams_info.at(output_pad_index); + auto max_periph_bytes_from_hef = HefConfigurator::max_periph_bytes_value(DeviceBase::hef_arch_to_device_arch(hef_arch)); + CHECK_EXPECTED(max_periph_bytes_from_hef); + const auto max_periph_bytes = (0 == op_input_stream.max_shmifo_size) ? max_periph_bytes_from_hef.value(): + MIN(max_periph_bytes_from_hef.value(), op_input_stream.max_shmifo_size); + const auto is_hw_padding_supported = HefConfigurator::is_hw_padding_supported(op_input_stream, max_periph_bytes); + net_flow_element.name = op_proto.name(); + + switch (op_proto.logits_op().logits_type()) { + case ProtoHEFLogitsType::PROTO_HEF_ARGMAX_TYPE: { + net_flow_element.op_type = HAILO_NET_FLOW_OP_TYPE_ARGMAX; + return create_argmax_op(connected_output_pad, output_pad, input_pad.name(), output_pad.name(), is_hw_padding_supported); + } + case ProtoHEFLogitsType::PROTO_HEF_SOFTMAX_TYPE: { + net_flow_element.op_type = HAILO_NET_FLOW_OP_TYPE_SOFTMAX; + return create_softmax_op(connected_output_pad, output_pad, input_pad.name(), output_pad.name()); + } + default: { + LOGGER__ERROR("Invalid Net-Flow Logits-Op {}", ProtoHEFLogitsType_Name(op_proto.logits_op().logits_type())); + return make_unexpected(HAILO_INTERNAL_FAILURE); + } + } +} Expected>> Hef::Impl::create_net_flow_ops(const ProtoHEFNetworkGroup &network_group_proto, - CoreOpMetadata &core_op_metadata) const + CoreOpMetadata &core_op_metadata, const ProtoHEFHwArch &hef_arch) const { std::vector> result; if (!m_supported_features.hailo_net_flow) { @@ -943,6 +1138,16 @@ Expected>> Hef::Impl::create_net_flo for (auto &pad_edge : network_group_proto.pad_edges()) { input_to_output_pads.insert({pad_edge.dst(), pad_edge.src()}); } + std::map pad_index_to_pad_data; + for (auto &op_proto : network_group_proto.ops()) { + for (auto &output_pad : op_proto.output_pads()) { + pad_index_to_pad_data.insert({output_pad.index(), output_pad}); + } + for (auto &input_pad : op_proto.input_pads()) { + pad_index_to_pad_data.insert({input_pad.index(), input_pad}); + } + } + for (auto &op_proto : network_group_proto.ops()) { switch (op_proto.op_case()) { case ProtoHEFOp::kCoreOp: { @@ -950,10 +1155,10 @@ Expected>> Hef::Impl::create_net_flo } case ProtoHEFOp::kNmsOp: { hailo_format_t output_format{}; - output_format.type = HAILO_FORMAT_TYPE_FLOAT32; - output_format.order = HAILO_FORMAT_ORDER_HAILO_NMS; - output_format.flags = HAILO_FORMAT_FLAGS_QUANTIZED; + output_format.order = HAILO_FORMAT_ORDER_HAILO_NMS; // TODO Remove- HRT-9737 + NetFlowElement net_flow_element{}; + net_flow_element.op_type = HAILO_NET_FLOW_OP_TYPE_NMS; // TODO: HRT-9902 - Move nms_info to be an op member instead of NetFlowElement net_flow_element.nms_info = { @@ -962,7 +1167,9 @@ Expected>> Hef::Impl::create_net_flo sizeof(hailo_bbox_float32_t), 1, // input_division_factor false, - hailo_nms_defuse_info_t() + hailo_nms_defuse_info_t(), + DEFAULT_NMS_NO_BURST_SIZE, + HAILO_BURST_TYPE_NO_BURST }; for (auto &input_pad : op_proto.input_pads()) { CHECK_AS_EXPECTED(contains(input_to_output_pads, static_cast(input_pad.index())), HAILO_INVALID_HEF, @@ -1007,7 +1214,6 @@ Expected>> Hef::Impl::create_net_flo } } net_flow_element.op = post_process_op; - // Fill meta-data output vstream info auto net_group_name = HefUtils::get_network_group_name(network_group_proto, m_supported_features); auto network_name = HailoRTDefaults::get_network_name(net_group_name); @@ -1024,11 +1230,34 @@ Expected>> Hef::Impl::create_net_flo net_flow_output_vstream_info.nms_shape.number_of_classes--; net_flow_element.nms_info.number_of_classes--; } + net_flow_element.output_vstream_info = net_flow_output_vstream_info; - result.push_back(std::make_shared(net_flow_element)); + auto net_flow_element_ptr = make_shared_nothrow(net_flow_element); + CHECK_NOT_NULL_AS_EXPECTED(net_flow_element_ptr, HAILO_OUT_OF_HOST_MEMORY); + result.push_back(net_flow_element_ptr); + break; + } + case ProtoHEFOp::kLogitsOp: { + NetFlowElement net_flow_element{}; + auto expected_logits_op = create_logits_op(op_proto, input_to_output_pads, pad_index_to_pad_data, net_flow_element, + pad_index_to_streams_info, hef_arch); + CHECK_EXPECTED(expected_logits_op); + net_flow_element.op = expected_logits_op.release(); + + hailo_vstream_info_t net_flow_output_vstream_info{}; + auto proto_output_pad = op_proto.output_pads()[0]; + auto net_group_name = HefUtils::get_network_group_name(network_group_proto, m_supported_features); + auto network_name = HailoRTDefaults::get_network_name(net_group_name); + strncpy(net_flow_output_vstream_info.name, proto_output_pad.name().c_str(), proto_output_pad.name().length() + 1); + strncpy(net_flow_output_vstream_info.network_name, network_name.c_str(), network_name.length() + 1); + net_flow_output_vstream_info.direction = HAILO_D2H_STREAM; + net_flow_output_vstream_info.format = net_flow_element.op.get()->outputs_metadata().begin()->second.format; + net_flow_output_vstream_info.shape = net_flow_element.op.get()->outputs_metadata().begin()->second.shape; + net_flow_element.output_vstream_info = net_flow_output_vstream_info; - // TODO: HRT-9546 - Move vstreams out of core op - core_op_metadata.add_output_vstream_info(net_flow_output_vstream_info); + auto net_flow_element_ptr = make_shared_nothrow(net_flow_element); + CHECK_NOT_NULL_AS_EXPECTED(net_flow_element_ptr, HAILO_OUT_OF_HOST_MEMORY); + result.push_back(net_flow_element_ptr); break; } default: { @@ -1040,11 +1269,14 @@ Expected>> Hef::Impl::create_net_flo return result; } -Expected Hef::Impl::get_core_op_metadata(const std::string &network_group_name, uint32_t partial_clusters_layout_bitmap) +Expected Hef::Impl::get_core_op_metadata(const std::string &network_group_name, uint32_t partial_clusters_layout_bitmap) { - CHECK_AS_EXPECTED(contains(m_core_op_per_arch, network_group_name), HAILO_NOT_FOUND, + CHECK_AS_EXPECTED(contains(m_network_group_metadata, network_group_name), HAILO_NOT_FOUND, "Network group with name {} wasn't found", network_group_name); - auto metadata_per_arch = m_core_op_per_arch.at(network_group_name); + auto &ng_metadata = m_network_group_metadata.at(network_group_name); + CHECK_AS_EXPECTED(contains(ng_metadata.m_core_ops_metadata_per_arch, network_group_name), HAILO_NOT_FOUND, + "Core-op with name {} wasn't found", network_group_name); + auto metadata_per_arch = ng_metadata.m_core_ops_metadata_per_arch.at(network_group_name); auto metadata = metadata_per_arch.get_metadata(partial_clusters_layout_bitmap); return metadata; } @@ -1107,29 +1339,29 @@ hailo_status get_hw_padding_params(hailo_format_order_t format_order, uint32_t w } Expected HefConfigurator::parse_nn_stream_config(hailo_format_order_t format_order, uint32_t width, uint32_t features, - uint32_t hw_data_bytes, uint16_t core_buffers_per_frame, uint16_t core_bytes_per_buffer, bool hw_padding_supported, bool is_ddr) + uint32_t hw_data_bytes, uint16_t core_buffers_per_frame, uint16_t core_bytes_per_buffer, bool hw_padding_supported, bool is_ddr, + uint16_t periph_buffers_per_frame, uint16_t periph_bytes_per_buffer) { CONTROL_PROTOCOL__nn_stream_config_t stream_config = {}; stream_config.core_buffers_per_frame = core_buffers_per_frame; stream_config.core_bytes_per_buffer = core_bytes_per_buffer; - stream_config.periph_buffers_per_frame = core_buffers_per_frame; // periph buffers per frame is the same (even if - // for hw padding each buffer is smaller). + stream_config.periph_buffers_per_frame = periph_buffers_per_frame; + stream_config.periph_bytes_per_buffer = periph_bytes_per_buffer; /* For DDR buffering - core buffers is depended on the amount of buffers per PCIe interrupt. No HW padding required */ if (is_ddr) { stream_config.core_buffers_per_frame = 1; stream_config.feature_padding_payload = 0; - stream_config.periph_bytes_per_buffer = stream_config.core_bytes_per_buffer; } else { if (hw_padding_supported) { auto status = get_hw_padding_params(format_order, width, features, hw_data_bytes, stream_config.feature_padding_payload, stream_config.periph_bytes_per_buffer); CHECK_SUCCESS_AS_EXPECTED(status); + stream_config.periph_buffers_per_frame = core_buffers_per_frame; } else { stream_config.feature_padding_payload = 0; - stream_config.periph_bytes_per_buffer = stream_config.core_bytes_per_buffer; } /* For now, no support for buffer padding */ stream_config.buffer_padding_payload = 0; @@ -1151,24 +1383,72 @@ Expected HefConfigurator::parse_nn_stream_ auto format_order = format_order_exp.release(); auto is_ddr = ProtoHEFEdgeConnectionType::PROTO__EDGE_CONNECTION_TYPE__DDR == edge_connection_type; + CHECK_AS_EXPECTED(IS_FIT_IN_UINT32(edge_layer.padded_width() * edge_layer.padded_features() * + edge_layer.padded_height() * edge_layer.data_bytes()), HAILO_INVALID_HEF, "padded shape too big"); + + // TODO HRT-10993: Remove these parameters for the parse_nn_stream_config function call + // These values will get overrided in update_layer_info in resource_manager_builder - except in case of + // MIPI stream with hw padding supported (HRT-11030) + // TODO HRT-11030 - in MIPI with hw padding supported - in this case because the layer thinks hw padding is + // supported it wont recalculate periph values , but when creating the InputStreamBase - it will not use hw padding + // and then will take the initial values. Should fix this behavior + const uint16_t INITIAL_PERIPH_BYTES_PER_BUFFER = static_cast(edge_layer.core_bytes_per_buffer()); + const uint16_t INITIAL_PERIPH_BUFFERS_PER_FRAME = static_cast(edge_layer.core_buffers_per_frame()); + // Width and features only used in case hw_padding is supported. In that case, they represent the HW shape (without padding) return parse_nn_stream_config(format_order, edge_layer.width(), edge_layer.features(), edge_layer.data_bytes(), static_cast(edge_layer.core_buffers_per_frame()), - static_cast(edge_layer.core_bytes_per_buffer()), hw_padding_supported, is_ddr); + static_cast(edge_layer.core_bytes_per_buffer()), hw_padding_supported, is_ddr, + INITIAL_PERIPH_BUFFERS_PER_FRAME, INITIAL_PERIPH_BYTES_PER_BUFFER); } Expected HefConfigurator::parse_nn_stream_config(const LayerInfo &edge_layer, bool hw_padding_supported) { // TODO HRT-7177 - pass interface to layer info instead of re-calculated Layer info from stream_internal.hpp // After passing stream interface, there is no need for this function. Just use CONTROL_PROTOCOL__nn_stream_config_t from layer info. - auto is_ddr = false; // This function is called only on boundary layers, so no DDR + assert(LayerType::BOUNDARY == edge_layer.type); + const auto is_ddr = false; // This function is called only on boundary layers, so no DDR + return parse_nn_stream_config(edge_layer.format.order, edge_layer.hw_shape.width, edge_layer.hw_shape.features, edge_layer.hw_data_bytes, edge_layer.nn_stream_config.core_buffers_per_frame, - edge_layer.nn_stream_config.core_bytes_per_buffer, hw_padding_supported, is_ddr); + edge_layer.nn_stream_config.core_bytes_per_buffer, hw_padding_supported, is_ddr, edge_layer.nn_stream_config.periph_buffers_per_frame, + edge_layer.nn_stream_config.periph_bytes_per_buffer); +} + +Expected HefConfigurator::max_periph_bytes_value(const hailo_device_architecture_t hw_arch) +{ + switch (hw_arch) { + case HAILO_ARCH_HAILO8_A0: + case HAILO_ARCH_HAILO8: + case HAILO_ARCH_HAILO8L: + return HAILO8_INBOUND_DATA_STREAM_SIZE; + case HAILO_ARCH_HAILO15: + return HAILO15_PERIPH_BYTES_PER_BUFFER_MAX_SIZE; + default: + LOGGER__ERROR("Unknown device architecture!"); + return make_unexpected(HAILO_INVALID_ARGUMENT); + } +} + +// TODO HRT-11006: remove this function when hw padding is removed from InputStreamBase / OutputStreamBase constructor +Expected HefConfigurator::max_periph_bytes_value(const hailo_stream_interface_t interface) +{ + switch (interface) { + case HAILO_STREAM_INTERFACE_ETH: + case HAILO_STREAM_INTERFACE_MIPI: + case HAILO_STREAM_INTERFACE_PCIE: + return HAILO8_INBOUND_DATA_STREAM_SIZE; + case HAILO_STREAM_INTERFACE_INTEGRATED: + return HAILO15_PERIPH_BYTES_PER_BUFFER_MAX_SIZE; + default: + LOGGER__ERROR("Unknown stream interface!"); + return make_unexpected(HAILO_INVALID_ARGUMENT); + } } bool HefConfigurator::is_hw_padding_supported(bool is_boundary, bool is_mux, hailo_format_order_t format_order, - uint16_t core_buffers_per_frame, uint32_t height, uint32_t width, uint32_t features, uint32_t hw_data_bytes) + uint16_t core_buffers_per_frame, uint32_t height, uint32_t width, uint32_t features, uint32_t hw_data_bytes, + const uint32_t max_periph_bytes_value) { if (!is_boundary || is_mux) { return false; @@ -1196,16 +1476,15 @@ bool HefConfigurator::is_hw_padding_supported(bool is_boundary, bool is_mux, hai return false; } - if ((width * features * hw_data_bytes) > - (HAILO8_INBOUND_DATA_STREAM_SIZE - 1)) { + if ((width * features * hw_data_bytes) > (max_periph_bytes_value - 1)) { // TODO: HRT-4177 - LOGGER__DEBUG("HW padding is supported only on layers with features * width * data size > stream size"); + LOGGER__DEBUG("HW padding is supported only on layers with shape size < stream size"); return false; } return true; } -bool HefConfigurator::is_hw_padding_supported(const LayerInfo &layer_info) +bool HefConfigurator::is_hw_padding_supported(const LayerInfo &layer_info, const uint32_t max_periph_bytes_value) { /* If the network is transposed, the width and height are swapped in LayerInfo c'tor, so need to swap it again for calculations */ auto height = layer_info.shape.height; @@ -1214,13 +1493,13 @@ bool HefConfigurator::is_hw_padding_supported(const LayerInfo &layer_info) std::swap(height, width); } - auto is_boundary = true; // This function is called only on boundary layers + auto is_boundary = (LayerType::BOUNDARY == layer_info.type); return is_hw_padding_supported(is_boundary, layer_info.is_mux, layer_info.format.order, layer_info.nn_stream_config.core_buffers_per_frame, height, width, - layer_info.shape.features, layer_info.hw_data_bytes); + layer_info.shape.features, layer_info.hw_data_bytes, max_periph_bytes_value); } -bool HefConfigurator::is_hw_padding_supported(const ProtoHEFEdgeLayer &edge_layer) +bool HefConfigurator::is_hw_padding_supported(const ProtoHEFEdgeLayer &edge_layer, const uint32_t max_periph_bytes_value) { auto is_boundary = (ProtoHEFEdgeConnectionType::PROTO__EDGE_CONNECTION_TYPE__BOUNDARY == edge_layer.context_switch_info().edge_connection_type()); auto is_mux = (ProtoHEFEdgeLayerType::PROTO__EDGE_LAYER_TYPE__MUX == edge_layer.edge_layer_type()); @@ -1238,48 +1517,51 @@ bool HefConfigurator::is_hw_padding_supported(const ProtoHEFEdgeLayer &edge_laye auto format_order = format_order_exp.release(); return is_hw_padding_supported(is_boundary, is_mux, format_order, static_cast(edge_layer_base.core_buffers_per_frame()), - edge_layer_base.height(), edge_layer_base.width(), edge_layer_base.features(), edge_layer_base.data_bytes()); + edge_layer_base.height(), edge_layer_base.width(), edge_layer_base.features(), edge_layer_base.data_bytes(), + max_periph_bytes_value); } Expected> Hef::Impl::get_input_stream_infos(const std::string &net_group_name, const std::string &network_name) { - auto network_group_metadata = get_core_op_metadata(net_group_name); - CHECK_EXPECTED(network_group_metadata); - return network_group_metadata->get_input_stream_infos(network_name); + auto core_op_metadata = get_core_op_metadata(net_group_name); + CHECK_EXPECTED(core_op_metadata); + + return core_op_metadata.value()->get_input_stream_infos(network_name); } Expected> Hef::Impl::get_output_stream_infos(const std::string &net_group_name, const std::string &network_name) { - auto network_group_metadata = get_core_op_metadata(net_group_name); - CHECK_EXPECTED(network_group_metadata); - return network_group_metadata->get_output_stream_infos(network_name); + auto core_op_metadata = get_core_op_metadata(net_group_name); + CHECK_EXPECTED(core_op_metadata); + + return core_op_metadata.value()->get_output_stream_infos(network_name); } Expected> Hef::Impl::get_all_stream_infos(const std::string &net_group_name, const std::string &network_name) { - auto network_group_metadata = get_core_op_metadata(net_group_name); - CHECK_EXPECTED(network_group_metadata); - return network_group_metadata->get_all_stream_infos(network_name); + auto core_op_metadata = get_core_op_metadata(net_group_name); + CHECK_EXPECTED(core_op_metadata); + + return core_op_metadata.value()->get_all_stream_infos(network_name); } Expected> Hef::Impl::get_network_infos(const std::string &net_group_name) { - auto network_group_metadata = get_core_op_metadata(net_group_name); - CHECK_EXPECTED(network_group_metadata); - return network_group_metadata->get_network_infos(); + CHECK_AS_EXPECTED(contains(m_network_group_metadata, net_group_name), HAILO_NOT_FOUND); + return m_network_group_metadata.at(net_group_name).get_network_infos(); } Expected Hef::Impl::get_stream_info_by_name(const std::string &stream_name, hailo_stream_direction_t stream_direction, const std::string &net_group_name) { - auto network_group_metadata = get_core_op_metadata(net_group_name); - CHECK_EXPECTED(network_group_metadata); + auto core_op_metadata = get_core_op_metadata(net_group_name); + CHECK_EXPECTED(core_op_metadata); if (HAILO_H2D_STREAM == stream_direction) { - auto stream_infos = network_group_metadata->get_input_stream_infos(); + auto stream_infos = core_op_metadata.value()->get_input_stream_infos(); CHECK_EXPECTED(stream_infos); for (auto &stream_info : stream_infos.value()) { if (stream_name == stream_info.name) { @@ -1287,7 +1569,7 @@ Expected Hef::Impl::get_stream_info_by_name(const std::stri } } } else { - auto stream_infos = network_group_metadata->get_output_stream_infos(); + auto stream_infos = core_op_metadata.value()->get_output_stream_infos(); CHECK_EXPECTED(stream_infos); for (auto &stream_info : stream_infos.value()) { if (stream_name == stream_info.name) { @@ -1302,25 +1584,22 @@ Expected Hef::Impl::get_stream_info_by_name(const std::stri Expected> Hef::Impl::get_input_vstream_infos(const std::string &net_group_name, const std::string &network_name) { - auto network_group_metadata = get_core_op_metadata(net_group_name); - CHECK_EXPECTED(network_group_metadata); - return network_group_metadata->get_input_vstream_infos(network_name); + CHECK_AS_EXPECTED(contains(m_network_group_metadata, net_group_name), HAILO_NOT_FOUND); + return m_network_group_metadata.at(net_group_name).get_input_vstream_infos(network_name); } Expected> Hef::Impl::get_output_vstream_infos(const std::string &net_group_name, const std::string &network_name) { - auto network_group_metadata = get_core_op_metadata(net_group_name); - CHECK_EXPECTED(network_group_metadata); - return network_group_metadata->get_output_vstream_infos(network_name); + CHECK_AS_EXPECTED(contains(m_network_group_metadata, net_group_name), HAILO_NOT_FOUND); + return m_network_group_metadata.at(net_group_name).get_output_vstream_infos(network_name); } Expected> Hef::Impl::get_all_vstream_infos(const std::string &net_group_name, const std::string &network_name) { - auto network_group_metadata = get_core_op_metadata(net_group_name); - CHECK_EXPECTED(network_group_metadata); - return network_group_metadata->get_all_vstream_infos(network_name); + CHECK_AS_EXPECTED(contains(m_network_group_metadata, net_group_name), HAILO_NOT_FOUND); + return m_network_group_metadata.at(net_group_name).get_all_vstream_infos(network_name); } const std::vector& Hef::Impl::network_groups() const @@ -1334,10 +1613,11 @@ const std::vector& Hef::Impl::core_ops(const std::string &ne return m_core_ops_per_group.at(net_group_name); }; -const std::vector> Hef::Impl::post_process_ops(const std::string &net_group_name) const +const NetworkGroupMetadata Hef::Impl::network_group_metadata(const std::string &net_group_name) const { - assert(contains(m_post_process_ops_per_group, net_group_name)); - return m_post_process_ops_per_group.at(net_group_name); + assert(contains(m_network_group_metadata, net_group_name)); + auto metadata = m_network_group_metadata.at(net_group_name); + return metadata; } bool Hef::Impl::check_hef_extension(const ProtoHEFExtensionType &extension, const ProtoHEFHeader &header, @@ -1456,8 +1736,9 @@ Expected Hef::Impl::get_number_of_input_streams(const std::string &net_g auto core_op_metadata = get_core_op_metadata(net_group_name); CHECK_EXPECTED(core_op_metadata); - auto input_layer_infos = core_op_metadata->get_input_layer_infos(); - return input_layer_infos.size(); + auto input_stream_infos = core_op_metadata.value()->get_input_stream_infos(); + CHECK_EXPECTED(input_stream_infos); + return input_stream_infos->size(); } Expected Hef::Impl::get_number_of_output_streams(const std::string &net_group_name) @@ -1465,8 +1746,9 @@ Expected Hef::Impl::get_number_of_output_streams(const std::string &net_ auto core_op_metadata = get_core_op_metadata(net_group_name); CHECK_EXPECTED(core_op_metadata); - auto output_layer_infos = core_op_metadata->get_output_layer_infos(); - return output_layer_infos.size(); + auto output_stream_infos = core_op_metadata.value()->get_output_stream_infos(); + CHECK_EXPECTED(output_stream_infos); + return output_stream_infos->size(); } static Expected get_layer_type(const ProtoHEFEdgeConnectionType &edge_connection_type) @@ -1484,20 +1766,7 @@ static Expected get_layer_type(const ProtoHEFEdgeConnectionType &edge } } -hailo_status HefUtils::fill_layer_info_with_base_info(const ProtoHEFEdgeLayerBase &base_info, - const ProtoHEFEdgeConnectionType &edge_connection_type, const ProtoHEFNetworkGroupMetadata &network_group_proto, - bool hw_padding_supported, bool transposed, const uint8_t context_index, const uint8_t network_index, - LayerInfo &layer_info) -{ - auto format_order_exp = HailoRTDefaults::get_device_format_order(base_info.format()); - CHECK_EXPECTED_AS_STATUS(format_order_exp); - - auto format_oder = format_order_exp.release(); - - auto layer_type = get_layer_type(edge_connection_type); - CHECK_EXPECTED_AS_STATUS(layer_type); - layer_info.type = layer_type.value(); - +static void parse_layer_shape(LayerInfo &layer_info, const ProtoHEFEdgeLayerBase &base_info, const bool hw_padding_supported) { if (HEF__FORMAT__NMS != base_info.format()) { layer_info.shape.height = base_info.height(); layer_info.shape.width = base_info.width(); @@ -1519,6 +1788,23 @@ hailo_status HefUtils::fill_layer_info_with_base_info(const ProtoHEFEdgeLayerBas layer_info.hw_shape.features = base_info.padded_features(); } layer_info.hw_data_bytes = base_info.data_bytes(); +} + +hailo_status HefUtils::fill_layer_info_with_base_info(const ProtoHEFEdgeLayerBase &base_info, + const ProtoHEFEdgeConnectionType &edge_connection_type, const ProtoHEFNetworkGroupMetadata &network_group_proto, + bool hw_padding_supported, bool transposed, const uint8_t context_index, const uint8_t network_index, + LayerInfo &layer_info, const SupportedFeatures &supported_features, const ProtoHEFHwArch &hef_arch) +{ + auto format_order_exp = HailoRTDefaults::get_device_format_order(base_info.format()); + CHECK_EXPECTED_AS_STATUS(format_order_exp); + + auto format_oder = format_order_exp.release(); + + auto layer_type = get_layer_type(edge_connection_type); + CHECK_EXPECTED_AS_STATUS(layer_type); + layer_info.type = layer_type.value(); + + parse_layer_shape(layer_info, base_info, hw_padding_supported); // TODO: remove duplications with stream info parse layer_info.format.order = format_oder; @@ -1539,7 +1825,7 @@ hailo_status HefUtils::fill_layer_info_with_base_info(const ProtoHEFEdgeLayerBas CHECK_EXPECTED_AS_STATUS(type); layer_info.format.type = type.value(); - auto nn_stream_config = HefConfigurator::parse_nn_stream_config(base_info, hw_padding_supported, + auto nn_stream_config = HefConfigurator::parse_nn_stream_config(base_info, hw_padding_supported, edge_connection_type); CHECK_EXPECTED_AS_STATUS(nn_stream_config, "Failed parse nn stream config"); layer_info.nn_stream_config = nn_stream_config.release(); @@ -1554,7 +1840,8 @@ hailo_status HefUtils::fill_layer_info_with_base_info(const ProtoHEFEdgeLayerBas layer_info.dma_engine_index = static_cast(base_info.engine_id()); if (HAILO_FORMAT_ORDER_HAILO_NMS == layer_info.format.order) { - auto expected_nms_info = parse_proto_nms_info(base_info.additional_info().nms_info()); + auto expected_nms_info = parse_proto_nms_info(base_info.additional_info().nms_info(), supported_features.nms_burst_mode, + hef_arch); CHECK_EXPECTED_AS_STATUS(expected_nms_info); layer_info.nms_info = expected_nms_info.release(); } @@ -1568,10 +1855,10 @@ hailo_status HefUtils::fill_layer_info(const ProtoHEFEdgeLayerInfo &info, const ProtoHEFEdgeConnectionType &edge_connection_type, const ProtoHEFCoreOpMock &core_op, hailo_stream_direction_t direction, bool hw_padding_supported, const uint8_t context_index, const std::string &partial_network_name, - uint8_t network_index, LayerInfo &layer_info) + uint8_t network_index, LayerInfo &layer_info, const SupportedFeatures &supported_features, const ProtoHEFHwArch &hef_arch) { auto status = fill_layer_info_with_base_info(info.edge_layer_base(), edge_connection_type, core_op.network_group_metadata, - hw_padding_supported, info.transposed(), context_index, network_index, layer_info); + hw_padding_supported, info.transposed(), context_index, network_index, layer_info, supported_features, hef_arch); CHECK_SUCCESS(status); if (HAILO_MAX_STREAM_NAME_SIZE < (info.name().length() + 1)) { @@ -1591,6 +1878,21 @@ hailo_status HefUtils::fill_layer_info(const ProtoHEFEdgeLayerInfo &info, layer_info.quant_info.limvals_min = info.numeric_info().limvals_min(); layer_info.quant_info.qp_scale = info.numeric_info().qp_scale(); layer_info.quant_info.qp_zp = info.numeric_info().qp_zp(); + + for (uint32_t i = 0; i < layer_info.shape.features; i++) { + hailo_quant_info_t quant_info = {}; + if (supported_features.output_scale_by_feature) { + quant_info.qp_zp = static_cast(info.numeric_info().qp_zps()[i]); + quant_info.qp_scale = static_cast(info.numeric_info().qp_scales()[i]); + } else { + quant_info.qp_zp = info.numeric_info().qp_zp(); + quant_info.qp_scale = info.numeric_info().qp_scale(); + } + quant_info.limvals_min = info.numeric_info().limvals_min(); + quant_info.limvals_max = info.numeric_info().limvals_max(); + layer_info.quant_infos.push_back(std::move(quant_info)); + } + // Simulation info assert (1 == info.edge_layer_base().buffer_indices_size()); layer_info.buffer_indices.cluster_index = info.edge_layer_base().buffer_indices(0).cluster_index(); @@ -1605,7 +1907,8 @@ hailo_status HefUtils::fill_layer_info(const ProtoHEFEdgeLayerInfo &info, // This creates a new LayerInfo for the fused layer *for each defused layer*, even though they all share the same fused layer. // TODO Make it so all defused layer reference the same LayerInfo of the fused layer. LayerInfo fused_layer_info = {}; - status = fill_fused_nms_info(fused_layer, fused_layer_info, layer_info.quant_info, layer_info.network_name); + status = fill_fused_nms_info(fused_layer, fused_layer_info, layer_info.quant_info, layer_info.network_name, + supported_features.nms_burst_mode, hef_arch); CHECK_SUCCESS(status); layer_info.fused_nms_layer.push_back(fused_layer_info); break; @@ -1618,7 +1921,8 @@ hailo_status HefUtils::fill_layer_info(const ProtoHEFEdgeLayerInfo &info, } hailo_status HefUtils::fill_fused_nms_info(const ProtoHEFEdgeLayerFused &info, LayerInfo &layer_info, - hailo_quant_info_t &defuse_quant_info, const std::string &network_name) + hailo_quant_info_t &defuse_quant_info, const std::string &network_name, const bool burst_mode_enabled, + const ProtoHEFHwArch &hef_arch) { auto base_info = info.layer_info().edge_layer_base(); auto format_order_exp = HailoRTDefaults::get_device_format_order(base_info.format()); @@ -1637,7 +1941,7 @@ hailo_status HefUtils::fill_fused_nms_info(const ProtoHEFEdgeLayerFused &info, L CHECK_EXPECTED_AS_STATUS(type); layer_info.format.type = type.value(); - auto expected_nms_info = parse_proto_nms_info(info.nms_info()); + auto expected_nms_info = parse_proto_nms_info(info.nms_info(), burst_mode_enabled, hef_arch); CHECK_EXPECTED_AS_STATUS(expected_nms_info); layer_info.nms_info = expected_nms_info.release(); @@ -1664,11 +1968,11 @@ hailo_status HefUtils::fill_mux_info(const ProtoHEFEdgeLayerMux &info, const ProtoHEFEdgeConnectionType &edge_connection_type, const ProtoHEFCoreOpMock &core_op, hailo_stream_direction_t direction, bool hw_padding_supported, const uint8_t context_index, const std::string &partial_network_name, - uint8_t network_index, LayerInfo &layer_info) + uint8_t network_index, LayerInfo &layer_info, const SupportedFeatures &supported_features, const ProtoHEFHwArch &hef_arch) { const bool transposed = false; auto status = fill_layer_info_with_base_info(info.edge_layer_base(), edge_connection_type, core_op.network_group_metadata, - hw_padding_supported, transposed, context_index, network_index, layer_info); + hw_padding_supported, transposed, context_index, network_index, layer_info, supported_features, hef_arch); CHECK_SUCCESS(status); if (HAILO_MAX_STREAM_NAME_SIZE < (info.name().length() + 1)) { @@ -1699,7 +2003,8 @@ hailo_status HefUtils::fill_mux_info(const ProtoHEFEdgeLayerMux &info, switch (info.predecessors(i).edge_case()) { case ProtoHefEdge::kLayerInfo: status = fill_layer_info(info.predecessors(i).layer_info(), edge_connection_type, core_op, - direction, hw_padding_supported, context_index, partial_network_name, network_index, temp_layer); + direction, hw_padding_supported, context_index, partial_network_name, network_index, temp_layer, + supported_features, hef_arch); if (HAILO_SUCCESS != status) { return status; } @@ -1707,7 +2012,8 @@ hailo_status HefUtils::fill_mux_info(const ProtoHEFEdgeLayerMux &info, break; case ProtoHefEdge::kLayerMux: status = fill_mux_info(info.predecessors(i).layer_mux(), edge_connection_type, core_op, - direction, hw_padding_supported, context_index, partial_network_name, network_index, temp_layer); + direction, hw_padding_supported, context_index, partial_network_name, network_index, temp_layer, + supported_features, hef_arch); if (HAILO_SUCCESS != status) { return status; } @@ -1728,9 +2034,10 @@ hailo_status HefUtils::fill_boundary_layers_info( const uint8_t context_index, const ProtoHEFEdgeLayer &layer, const SupportedFeatures &supported_features, - ContextMetadata &context_metadata) + ContextMetadata &context_metadata, + const ProtoHEFHwArch &hef_arch) { - auto layer_info = get_boundary_layer_info(core_op, context_index, layer, supported_features); + auto layer_info = get_boundary_layer_info(core_op, context_index, layer, supported_features, hef_arch); CHECK_EXPECTED_AS_STATUS(layer_info); context_metadata.add_boundary_layer(layer_info.release()); @@ -1743,9 +2050,9 @@ hailo_status HefUtils::fill_inter_context_layers_info( const uint8_t context_index, const ProtoHEFEdgeLayer &layer, const SupportedFeatures &supported_features, - ContextMetadata &context_metadata) + ContextMetadata &context_metadata, const ProtoHEFHwArch &hef_arch) { - auto layer_info = get_inter_context_layer_info(core_op, context_index, layer, supported_features); + auto layer_info = get_inter_context_layer_info(core_op, context_index, layer, supported_features, hef_arch); CHECK_EXPECTED_AS_STATUS(layer_info); context_metadata.add_inter_context_layer(layer_info.release()); @@ -1757,9 +2064,9 @@ hailo_status HefUtils::fill_ddr_layers_info( const uint8_t context_index, const ProtoHEFEdgeLayer &layer, const SupportedFeatures &supported_features, - ContextMetadata &context_metadata) + ContextMetadata &context_metadata, const ProtoHEFHwArch &hef_arch) { - auto layer_info = get_ddr_layer_info(core_op, context_index, layer, supported_features); + auto layer_info = get_ddr_layer_info(core_op, context_index, layer, supported_features, hef_arch); CHECK_EXPECTED_AS_STATUS(layer_info); context_metadata.add_ddr_layer(layer_info.release()); @@ -1987,14 +2294,57 @@ static Expected parse_action(const ProtoHEFAction CHECK_AS_EXPECTED(IS_FIT_IN_UINT8(proto_action.enable_nms().network_index()), HAILO_INVALID_HEF, "Failed to parse HEF. Invalid network_index: {}.", proto_action.enable_nms().network_index()); + uint16_t number_of_classes = 0; + uint16_t burst_size = 0; + // TODO: HRT-10750 - change to error and failure in case of old enable nms action + if (0 == proto_action.enable_nms().number_of_classes() || 0 == proto_action.enable_nms().burst_size()) { + LOGGER__WARNING("Enable NMS Action must have number of classes and burst size, Please update Hef to SDK version newer than 3.24"); + number_of_classes = 1; + burst_size = 1; + } else { + number_of_classes = static_cast(proto_action.enable_nms().number_of_classes()); + burst_size = static_cast(proto_action.enable_nms().burst_size()); + } + auto support_multi_networks = supported_features.multi_network_support; auto network_index = static_cast((support_multi_networks) ? proto_action.enable_nms().network_index() : 0); const auto nms_unit_index = static_cast(proto_action.enable_nms().nms_unit_index()); - return EnableNmsAction::create(nms_unit_index, network_index); + return EnableNmsAction::create(nms_unit_index, network_index, number_of_classes, burst_size); } + case ProtoHEFAction::kWriteDataByType: + { + CHECK_AS_EXPECTED(IS_FIT_IN_UINT32(proto_action.write_data_by_type().address()), HAILO_INVALID_HEF, + "Failed to parse HEF. Invalid write_data_by_type address: {} (should fit uint32_t).", + proto_action.write_data_by_type().address()); + CHECK_AS_EXPECTED((0 == (proto_action.write_data_by_type().address() % ALIGNED_TO_4_BYTES)), HAILO_INVALID_HEF, + "Failed to parse HEF. Invalid write_data_by_type address. Address should be aligned to 4 bytes: {}.", + proto_action.write_data_by_type().address()); + CHECK_AS_EXPECTED(proto_action.write_data_by_type().data_type() == ProtoHEFWriteDataType::DATA_FROM_ACTION || + proto_action.write_data_by_type().data_type() == ProtoHEFWriteDataType::BATCH_SIZE, HAILO_INVALID_HEF, + "Failed to parse HEF. Invalid write_data_by_type data_type: {} ", proto_action.write_data_by_type().data_type()); + CHECK_AS_EXPECTED(proto_action.write_data_by_type().data().length() <= CONTEXT_SWITCH_DEFS__WRITE_ACTION_BY_TYPE_MAX_SIZE, HAILO_INVALID_HEF, + "Failed to parse HEF. Invalid write_data_by_type data size: {} ", proto_action.write_data_by_type().data().length()); + CHECK_AS_EXPECTED(IS_FIT_IN_UINT8(proto_action.write_data_by_type().shift()), HAILO_INVALID_HEF, + "Failed to parse HEF. Invalid write_data_by_type shift: {} (should fit uint8_t).", + proto_action.write_data_by_type().shift()); + + uint32_t data = 0x0; + memcpy(&data, proto_action.write_data_by_type().data().data(), + /* Limit the data to one register */ + MIN(CONTEXT_SWITCH_DEFS__WRITE_ACTION_BY_TYPE_MAX_SIZE, proto_action.write_data_by_type().data().length())); + + const auto address = static_cast(proto_action.write_data_by_type().address()); + const auto data_type = static_cast(proto_action.write_data_by_type().data_type()); + const auto mask = proto_action.write_data_by_type().mask(); + auto support_multi_networks = supported_features.multi_network_support; + const auto network_index = static_cast((support_multi_networks) ? proto_action.write_data_by_type().network_index() : 0); + const auto shift = static_cast(proto_action.write_data_by_type().shift()); + + return WriteDataByTypeAction::create(address, data_type, data, shift, mask, network_index); + } default: LOGGER__ERROR("Action {} not implemented", proto_action.action_case()); break; @@ -2119,7 +2469,8 @@ Expected HefUtils::parse_preliminary_context(const ProtoHEFPrel } Expected HefUtils::parse_single_dynamic_context(const ProtoHEFCoreOpMock &core_op, - const ProtoHEFContext &context_proto, uint8_t context_index, const SupportedFeatures &supported_features) + const ProtoHEFContext &context_proto, uint8_t context_index, const SupportedFeatures &supported_features, + const ProtoHEFHwArch &hef_arch) { auto context_metadata_exp = parse_operations(context_proto.operations(), supported_features); CHECK_EXPECTED(context_metadata_exp); @@ -2129,17 +2480,17 @@ Expected HefUtils::parse_single_dynamic_context(const ProtoHEFC if (ProtoHEFEdgeConnectionType::PROTO__EDGE_CONNECTION_TYPE__BOUNDARY == edge_layer.context_switch_info().edge_connection_type()) { auto status = fill_boundary_layers_info(core_op, context_index, edge_layer, - supported_features, context_metadata); + supported_features, context_metadata, hef_arch); CHECK_SUCCESS_AS_EXPECTED(status); } else if (ProtoHEFEdgeConnectionType::PROTO__EDGE_CONNECTION_TYPE__INTERMEDIATE == edge_layer.context_switch_info().edge_connection_type()) { auto status = fill_inter_context_layers_info(core_op, context_index, edge_layer, - supported_features, context_metadata); + supported_features, context_metadata, hef_arch); CHECK_SUCCESS_AS_EXPECTED(status); } else if (ProtoHEFEdgeConnectionType::PROTO__EDGE_CONNECTION_TYPE__DDR == edge_layer.context_switch_info().edge_connection_type()) { auto status = fill_ddr_layers_info(core_op, context_index, edge_layer, - supported_features, context_metadata); + supported_features, context_metadata, hef_arch); CHECK_SUCCESS_AS_EXPECTED(status); } } @@ -2170,12 +2521,13 @@ static hailo_status validate_unique_boundary_names(const std::vector> HefUtils::parse_dynamic_contexts(const ProtoHEFCoreOpMock &core_op, const SupportedFeatures &supported_features) +Expected> HefUtils::parse_dynamic_contexts(const ProtoHEFCoreOpMock &core_op, const SupportedFeatures &supported_features, + const ProtoHEFHwArch &hef_arch) { std::vector contexts_metadata; for (uint8_t context_index = 0; context_index < core_op.contexts.size(); context_index++) { auto &context_proto = core_op.contexts[context_index]; - auto context_metadata = parse_single_dynamic_context(core_op, context_proto, context_index, supported_features); + auto context_metadata = parse_single_dynamic_context(core_op, context_proto, context_index, supported_features, hef_arch); CHECK_EXPECTED(context_metadata); contexts_metadata.emplace_back(context_metadata.release()); } @@ -2186,13 +2538,39 @@ Expected> HefUtils::parse_dynamic_contexts(const Pr return contexts_metadata; } -Expected HefUtils::parse_proto_nms_info(const ProtoHEFNmsInfo &proto_nms_info) +Expected HefUtils::parse_proto_nms_info(const ProtoHEFNmsInfo &proto_nms_info, const bool burst_mode_enabled, + const ProtoHEFHwArch &hef_arch) { hailo_nms_info_t nms_info = {}; nms_info.number_of_classes = static_cast(proto_nms_info.number_of_classes()); nms_info.bbox_size = static_cast(proto_nms_info.bbox_size()); nms_info.max_bboxes_per_class = static_cast(proto_nms_info.max_output_size()); nms_info.chunks_per_frame = static_cast(proto_nms_info.input_division_factor()); + + if (burst_mode_enabled) { + nms_info.burst_size = static_cast(proto_nms_info.burst_size()); + nms_info.burst_type = static_cast(proto_nms_info.burst_type()); + + CHECK_AS_EXPECTED(nms_info.burst_type != HAILO_BURST_TYPE_NO_BURST, HAILO_INVALID_HEF, + "Invalid HEF, nms burst type is no burst but burst extension is enabled"); + + CHECK_AS_EXPECTED((nms_info.burst_size * nms_info.bbox_size) <= MAX_NMS_BURST_SIZE, + HAILO_INVALID_HEF, "Invalid HEF, nms burst size {} larger than maximum burst size {}", + (nms_info.burst_size * nms_info.bbox_size), MAX_NMS_BURST_SIZE); + + // Validate that burst type matches architecture + const auto dev_arch = DeviceBase::hef_arch_to_device_arch(hef_arch); + CHECK_AS_EXPECTED(LayerInfoUtils::validate_nms_burst_type(nms_info.burst_type, dev_arch), HAILO_INVALID_HEF, + "Invalid HEF, nms burst type {} on device architecture {}", nms_info.burst_type, dev_arch); + } else { + CHECK_AS_EXPECTED(HAILO_BURST_TYPE_NO_BURST == static_cast(proto_nms_info.burst_type()), + HAILO_INVALID_HEF, "Invalid HEF, nms burst extension is disabled yet burst type is {}", nms_info.burst_type); + + // In case of HAILO_BURST_TYPE_NO_BURST make burst size DEFAULT_NMS_NO_BURST_SIZE + nms_info.burst_size = DEFAULT_NMS_NO_BURST_SIZE; + nms_info.burst_type = static_cast(proto_nms_info.burst_type()); + } + if (nms_info.chunks_per_frame == 0) { // Old hef, use default value 1 nms_info.chunks_per_frame = 1; @@ -2213,7 +2591,8 @@ Expected HefUtils::parse_proto_nms_info(const ProtoHEFNmsInfo } Expected HefUtils::get_boundary_layer_info(const ProtoHEFCoreOpMock &core_op, - const uint8_t context_index, const ProtoHEFEdgeLayer &layer, const SupportedFeatures &supported_features) + const uint8_t context_index, const ProtoHEFEdgeLayer &layer, const SupportedFeatures &supported_features, + const ProtoHEFHwArch &hef_arch) { // We parse only boundary layers for user usage CHECK_AS_EXPECTED( @@ -2228,16 +2607,22 @@ Expected HefUtils::get_boundary_layer_info(const ProtoHEFCoreOpMock & auto network_index = static_cast((support_multi_networks) ? layer.network_index() : 0); auto partial_network_name = HefUtils::get_partial_network_name_by_index(core_op, network_index, supported_features); CHECK_EXPECTED(partial_network_name); - const bool hw_padding_supported = HefConfigurator::is_hw_padding_supported(layer); + auto max_periph_bytes_from_hef = HefConfigurator::max_periph_bytes_value(DeviceBase::hef_arch_to_device_arch(hef_arch)); + CHECK_EXPECTED(max_periph_bytes_from_hef); + const auto max_periph_bytes = (0 == layer.layer_info().edge_layer_base().max_shmifo_size()) ? max_periph_bytes_from_hef.value(): + MIN(max_periph_bytes_from_hef.value(), layer.layer_info().edge_layer_base().max_shmifo_size()); + const bool hw_padding_supported = HefConfigurator::is_hw_padding_supported(layer, max_periph_bytes); if (ProtoHEFEdgeLayerType::PROTO__EDGE_LAYER_TYPE__INFO == layer.edge_layer_type()) { // TODO: return LayerInfo auto status = fill_layer_info(layer.layer_info(), layer.context_switch_info().edge_connection_type(), - core_op, direction, hw_padding_supported, context_index, partial_network_name.value(), network_index, result); + core_op, direction, hw_padding_supported, context_index, partial_network_name.value(), network_index, result, + supported_features, hef_arch); CHECK_SUCCESS_AS_EXPECTED(status); } else if (ProtoHEFEdgeLayerType::PROTO__EDGE_LAYER_TYPE__MUX == layer.edge_layer_type()) { // TODO: return LayerInfo auto status = fill_mux_info(layer.layer_mux(), layer.context_switch_info().edge_connection_type(), - core_op, direction, hw_padding_supported, context_index, partial_network_name.value(), network_index, result); + core_op, direction, hw_padding_supported, context_index, partial_network_name.value(), network_index, result, + supported_features, hef_arch); CHECK_SUCCESS_AS_EXPECTED(status); } else { LOGGER__ERROR("Invalid layer type"); @@ -2272,7 +2657,8 @@ static Expected parse_connected_context_info( } Expected HefUtils::get_inter_context_layer_info(const ProtoHEFCoreOpMock &core_op, - const uint8_t context_index, const ProtoHEFEdgeLayer &layer, const SupportedFeatures &supported_features) + const uint8_t context_index, const ProtoHEFEdgeLayer &layer, const SupportedFeatures &supported_features, + const ProtoHEFHwArch &hef_arch) { LayerInfo result = {}; CHECK_AS_EXPECTED(PROTO__EDGE_LAYER_TYPE__INFO == layer.edge_layer_type(), HAILO_INVALID_HEF, "Inter-context layer can't be mux."); @@ -2284,9 +2670,14 @@ Expected HefUtils::get_inter_context_layer_info(const ProtoHEFCoreOpM CHECK_EXPECTED(partial_network_name); result.network_name = HefUtils::get_network_name(core_op, partial_network_name.release()); result.context_index = context_index; - const bool hw_padding_supported = HefConfigurator::is_hw_padding_supported(layer); + auto max_periph_bytes_from_hef = HefConfigurator::max_periph_bytes_value(DeviceBase::hef_arch_to_device_arch(hef_arch)); + CHECK_EXPECTED(max_periph_bytes_from_hef); + const auto max_periph_bytes = (0 == layer.layer_info().edge_layer_base().max_shmifo_size()) ? max_periph_bytes_from_hef.value(): + MIN(max_periph_bytes_from_hef.value(), layer.layer_info().edge_layer_base().max_shmifo_size()); + const bool hw_padding_supported = HefConfigurator::is_hw_padding_supported(layer, max_periph_bytes); result.name = layer.layer_info().name(); - auto nn_stream_config_exp = HefConfigurator::parse_nn_stream_config(layer.layer_info().edge_layer_base(), + + auto nn_stream_config_exp = HefConfigurator::parse_nn_stream_config(layer.layer_info().edge_layer_base(), hw_padding_supported, layer.context_switch_info().edge_connection_type()); CHECK_EXPECTED(nn_stream_config_exp); result.nn_stream_config = nn_stream_config_exp.release(); @@ -2299,6 +2690,8 @@ Expected HefUtils::get_inter_context_layer_info(const ProtoHEFCoreOpM result.max_shmifo_size = layer.layer_info().edge_layer_base().max_shmifo_size(); + parse_layer_shape(result, layer.layer_info().edge_layer_base(), hw_padding_supported); + result.direction = (ProtoHEFEdgeLayerDirection::PROTO__EDGE_LAYER_DIRECTION__DEVICE_TO_HOST == layer.direction()) ? HAILO_D2H_STREAM : HAILO_H2D_STREAM; @@ -2313,7 +2706,8 @@ Expected HefUtils::get_inter_context_layer_info(const ProtoHEFCoreOpM } Expected HefUtils::get_ddr_layer_info(const ProtoHEFCoreOpMock &core_op, - const uint8_t context_index, const ProtoHEFEdgeLayer &layer, const SupportedFeatures &supported_features) + const uint8_t context_index, const ProtoHEFEdgeLayer &layer, const SupportedFeatures &supported_features, + const ProtoHEFHwArch &hef_arch) { LayerInfo result = {}; CHECK_AS_EXPECTED(PROTO__EDGE_LAYER_TYPE__INFO == layer.edge_layer_type(), HAILO_INVALID_HEF, "DDR layer can't be mux."); @@ -2326,9 +2720,13 @@ Expected HefUtils::get_ddr_layer_info(const ProtoHEFCoreOpMock &core_ CHECK_EXPECTED(partial_network_name); result.network_name = HefUtils::get_network_name(core_op, partial_network_name.release()); result.context_index = context_index; - const bool hw_padding_supported = HefConfigurator::is_hw_padding_supported(layer); + auto max_periph_bytes_from_hef = HefConfigurator::max_periph_bytes_value(DeviceBase::hef_arch_to_device_arch(hef_arch)); + CHECK_EXPECTED(max_periph_bytes_from_hef); + const auto max_periph_bytes = (0 == layer.layer_info().edge_layer_base().max_shmifo_size()) ? max_periph_bytes_from_hef.value(): + MIN(max_periph_bytes_from_hef.value(), layer.layer_info().edge_layer_base().max_shmifo_size()); + const bool hw_padding_supported = HefConfigurator::is_hw_padding_supported(layer, max_periph_bytes); result.name = layer.layer_info().name(); - auto nn_stream_config_exp = HefConfigurator::parse_nn_stream_config(layer.layer_info().edge_layer_base(), + auto nn_stream_config_exp = HefConfigurator::parse_nn_stream_config(layer.layer_info().edge_layer_base(), hw_padding_supported, layer.context_switch_info().edge_connection_type()); CHECK_EXPECTED(nn_stream_config_exp); result.nn_stream_config = nn_stream_config_exp.release(); @@ -2351,6 +2749,8 @@ Expected HefUtils::get_ddr_layer_info(const ProtoHEFCoreOpMock &core_ result.direction = (ProtoHEFEdgeLayerDirection::PROTO__EDGE_LAYER_DIRECTION__DEVICE_TO_HOST == layer.direction()) ? HAILO_D2H_STREAM : HAILO_H2D_STREAM; + parse_layer_shape(result, layer.layer_info().edge_layer_base(), hw_padding_supported); + CHECK_AS_EXPECTED(IS_FIT_IN_UINT16(layer.layer_info().edge_layer_base().core_buffers_per_frame()), HAILO_INVALID_HEF, "Failed to parse HEF. Invalid core_buffers_per_frame: {}.", layer.layer_info().edge_layer_base().core_buffers_per_frame()); result.ddr_info.total_buffers_per_frame = static_cast(layer.layer_info().edge_layer_base().core_buffers_per_frame()); @@ -2362,28 +2762,6 @@ Expected HefUtils::get_ddr_layer_info(const ProtoHEFCoreOpMock &core_ return result; } -Expected> HefUtils::get_sorted_output_names(const ProtoHEFCoreOpMock &core_op) -{ - if (core_op.fused_layers_metadata.network_has_fused_layers()) { - return std::vector(std::begin(core_op.fused_layers_metadata.updated_sorted_output_names()), - std::end(core_op.fused_layers_metadata.updated_sorted_output_names())); - } else if (0 != core_op.sorted_outputs_order.size()) { - // For backwards compatibility before we've added updated_sorted_output_names - return std::vector(std::begin(core_op.sorted_outputs_order), - std::end(core_op.sorted_outputs_order)); - } else { - // For backwards compatibility before we've added this field - uint32_t number_of_contexts = core_op.contexts.size(); - const auto& context_metadata = core_op.contexts[number_of_contexts - 1].metadata(); - - CHECK_AS_EXPECTED(0 < context_metadata.sorted_outputs_order_size(), HAILO_INVALID_HEF, - "Sorted output names is not set up in the HEF."); - - return std::vector(std::begin(context_metadata.sorted_outputs_order()), - std::end(context_metadata.sorted_outputs_order())); - } -} - Expected HefUtils::get_partial_network_name_by_index(const ProtoHEFCoreOpMock &core_op, uint8_t network_index, const SupportedFeatures &supported_features) { @@ -2436,25 +2814,8 @@ Expected> Hef::Impl::get_core_op_per_arch(co Expected> Hef::Impl::get_sorted_output_names(const std::string &net_group_name) { - if (m_supported_features.hailo_net_flow) { - std::vector res; - for (const auto &net_group : m_groups) { - auto curr_name = HefUtils::get_network_group_name(*net_group, m_supported_features); - if (curr_name == net_group_name) { - res.reserve(net_group->sorted_outputs_order().size()); - for (auto &name : net_group->sorted_outputs_order()) { - res.push_back(name); - } - return res; - } - } - LOGGER__ERROR("Did not find network group of name {}", net_group_name); - return make_unexpected(HAILO_INVALID_HEF); - } - auto core_op_metadata = get_core_op_metadata(net_group_name); - CHECK_EXPECTED(core_op_metadata); - - auto res = core_op_metadata->get_sorted_output_names(); + CHECK_AS_EXPECTED(contains(m_network_group_metadata, net_group_name), HAILO_NOT_FOUND); + auto res = m_network_group_metadata.at(net_group_name).get_sorted_output_names(); return res; } @@ -2587,19 +2948,15 @@ bool Hef::Impl::contains_ddr_layers(const ProtoHEFCoreOpMock& core_op) Expected> Hef::Impl::get_stream_names_from_vstream_name(const std::string &vstream_name, const std::string &net_group_name) { - auto core_op_metadata = get_core_op_metadata(net_group_name); - CHECK_EXPECTED(core_op_metadata); - - return core_op_metadata->get_stream_names_from_vstream_name(vstream_name); + CHECK_AS_EXPECTED(contains(m_network_group_metadata, net_group_name), HAILO_NOT_FOUND); + return m_network_group_metadata.at(net_group_name).get_stream_names_from_vstream_name(vstream_name); } Expected> Hef::Impl::get_vstream_names_from_stream_name(const std::string &stream_name, const std::string &net_group_name) { - auto core_op_metadata = get_core_op_metadata(net_group_name); - CHECK_EXPECTED(core_op_metadata); - - return core_op_metadata->get_vstream_names_from_stream_name(stream_name); + CHECK_AS_EXPECTED(contains(m_network_group_metadata, net_group_name), HAILO_NOT_FOUND); + return m_network_group_metadata.at(net_group_name).get_vstream_names_from_stream_name(stream_name); } Expected Hef::Impl::get_vstream_name_from_original_name_mux(const std::string &original_name, const ProtoHefEdge &layer) @@ -2864,11 +3221,16 @@ Expected> Hef::Impl::get_post_processes_infos_descripti std::vector infos_strings; std::string infos_string; - auto post_process = post_process_ops(network_group_name); + CHECK_AS_EXPECTED(contains(m_network_group_metadata, network_group_name), HAILO_INTERNAL_FAILURE); + + auto post_process = m_network_group_metadata.at(network_group_name).m_net_flow_ops; for (const auto &post_process_info : post_process) { infos_string = post_process_info->op->get_op_description(); - infos_string += ", Bbox size: " + std::to_string(post_process_info->nms_info.bbox_size) + - ", Max bboxes per class: " + std::to_string(post_process_info->nms_info.max_bboxes_per_class); + if (HAILO_NET_FLOW_OP_TYPE_NMS == post_process_info->op_type) { + + infos_string += ", Bbox size: " + std::to_string(post_process_info->nms_info.bbox_size) + + ", Max bboxes per class: " + std::to_string(post_process_info->nms_info.max_bboxes_per_class); + } } /* If the string is empty there is no need to continue. */ if (infos_string.empty()) { @@ -2890,14 +3252,14 @@ Expected> Hef::Impl::get_post_processes_infos_descripti return infos_strings; } -Expected Hef::get_hef_description(bool stream_infos, bool vstream_infos) +Expected Hef::get_description(bool stream_infos, bool vstream_infos) { auto arch = get_hef_device_arch(); CHECK_EXPECTED(arch); - return pimpl->get_hef_description(stream_infos, vstream_infos, arch.value()); + return pimpl->get_description(stream_infos, vstream_infos, arch.value()); } -Expected Hef::Impl::get_hef_description(bool stream_infos, bool vstream_infos, hailo_device_architecture_t device_arch) +Expected Hef::Impl::get_description(bool stream_infos, bool vstream_infos, hailo_device_architecture_t device_arch) { std::string hef_infos; auto hef_arch_str = HailoRTCommon::get_device_arch_str(device_arch); @@ -2906,9 +3268,9 @@ Expected Hef::Impl::get_hef_description(bool stream_infos, bool vst auto network_group_infos = get_network_groups_infos(); CHECK_EXPECTED(network_group_infos); for (const auto &network_group_info : network_group_infos.release()) { - auto core_op_meta_data = get_core_op_metadata(network_group_info.name); - CHECK_EXPECTED(core_op_meta_data); - auto number_of_contexts = core_op_meta_data->get_contexts_count(); + auto core_op_metadata = get_core_op_metadata(network_group_info.name); + CHECK_EXPECTED(core_op_metadata); + auto number_of_contexts = core_op_metadata.value()->get_contexts_count(); auto contexts_str = (network_group_info.is_multi_context ? "Multi Context - Number of contexts: " + std::to_string(number_of_contexts) : "Single Context"); hef_infos += "Network group name: " + std::string(network_group_info.name) + ", " + contexts_str + "\n"; @@ -3020,9 +3382,8 @@ hailo_status Hef::Impl::fill_missing_input_vstream_params_with_default(const std const std::string &network_name, std::map &input_vstreams_params, bool quantized, hailo_format_type_t format_type, uint32_t timeout_ms, uint32_t queue_size) { - auto core_op_metadata = get_core_op_metadata(net_group_name); - CHECK_EXPECTED_AS_STATUS(core_op_metadata); - auto input_vstream_infos = core_op_metadata->get_input_vstream_infos(network_name); + CHECK(contains(m_network_group_metadata, net_group_name), HAILO_NOT_FOUND); + auto input_vstream_infos = m_network_group_metadata.at(net_group_name).get_input_vstream_infos(network_name); CHECK_EXPECTED_AS_STATUS(input_vstream_infos); return fill_missing_vstream_params_with_default(input_vstreams_params, input_vstream_infos.value(), @@ -3033,9 +3394,8 @@ hailo_status Hef::Impl::fill_missing_output_vstream_params_with_default(const st const std::string &network_name, std::map &output_vstream_params, bool quantized, hailo_format_type_t format_type, uint32_t timeout_ms, uint32_t queue_size) { - auto core_op_metadata = get_core_op_metadata(net_group_name); - CHECK_EXPECTED_AS_STATUS(core_op_metadata); - auto output_vstream_infos = core_op_metadata->get_output_vstream_infos(network_name); + CHECK(contains(m_network_group_metadata, net_group_name), HAILO_NOT_FOUND); + auto output_vstream_infos = m_network_group_metadata.at(net_group_name).get_output_vstream_infos(network_name); CHECK_EXPECTED_AS_STATUS(output_vstream_infos); return fill_missing_vstream_params_with_default(output_vstream_params, output_vstream_infos.value(), @@ -3110,12 +3470,16 @@ Expected> Hef::Impl::create_str CHECK_EXPECTED(core_op_metadata); std::map results; - for (auto &input_layer : core_op_metadata->get_input_layer_infos()) { + auto input_stream_infos = core_op_metadata.value()->get_input_stream_infos(); + CHECK_EXPECTED(input_stream_infos); + for (auto &input_layer : input_stream_infos.value()) { auto params = HailoRTDefaults::get_stream_parameters(stream_interface, HAILO_H2D_STREAM); CHECK_EXPECTED(params); results.emplace(std::make_pair(input_layer.name, params.release())); } - for (auto &output_layer : core_op_metadata->get_output_layer_infos()) { + auto output_stream_infos = core_op_metadata.value()->get_output_stream_infos(); + CHECK_EXPECTED(output_stream_infos); + for (auto &output_layer : output_stream_infos.value()) { auto params = HailoRTDefaults::get_stream_parameters(stream_interface, HAILO_D2H_STREAM); CHECK_EXPECTED(params); results.emplace(std::make_pair(output_layer.name, params.release())); @@ -3141,7 +3505,7 @@ Expected> Hef::Impl::create_ne std::map results; - if (core_op_metadata->supported_features().multi_network_support) { + if (core_op_metadata.value()->supported_features().multi_network_support) { CHECK_AS_EXPECTED((core_op.value()->networks_names.size() != 0), HAILO_INTERNAL_FAILURE, "Hef support multiple networks, but no networks found in the proto"); for (const auto &partial_network_name : core_op.value()->networks_names) { @@ -3178,14 +3542,18 @@ Expected> Hef::Impl::create_str CHECK_EXPECTED(core_op_metadata); std::map results; - for (auto &input_layer : core_op_metadata->get_input_layer_infos()) { + auto input_stream_infos = core_op_metadata.value()->get_input_stream_infos(); + CHECK_EXPECTED(input_stream_infos); + for (auto &input_layer : input_stream_infos.value()) { hailo_stream_parameters_t params = {}; params.direction = HAILO_H2D_STREAM; params.stream_interface = HAILO_STREAM_INTERFACE_MIPI; params.mipi_input_params = mipi_params; results.emplace(std::make_pair(input_layer.name, params)); } - for (auto &output_layer : core_op_metadata->get_output_layer_infos()) { + auto output_stream_infos = core_op_metadata.value()->get_output_stream_infos(); + CHECK_EXPECTED(output_stream_infos); + for (auto &output_layer : output_stream_infos.value()) { auto params = HailoRTDefaults::get_stream_parameters(output_interface, HAILO_D2H_STREAM); CHECK_EXPECTED(params); results.emplace(std::make_pair(output_layer.name, params.release())); diff --git a/hailort/libhailort/src/hef/hef_internal.hpp b/hailort/libhailort/src/hef/hef_internal.hpp index f2a3b53..bdee745 100644 --- a/hailort/libhailort/src/hef/hef_internal.hpp +++ b/hailort/libhailort/src/hef/hef_internal.hpp @@ -130,12 +130,23 @@ typedef enum { HEF__FORMAT__F8CR, } HEF__net_io_formatter_type_t; +typedef enum { + HAILO_NET_FLOW_OP_TYPE_NMS = 0, + HAILO_NET_FLOW_OP_TYPE_ARGMAX = 1, + HAILO_NET_FLOW_OP_TYPE_SOFTMAX = 2, + + /** Max enum value to maintain ABI Integrity */ + HAILO_NET_FLOW_OP_TYPE_MAX_ENUM = HAILO_MAX_ENUM +} hailo_net_flow_op_type_t; + struct NetFlowElement { std::string name; std::shared_ptr op; std::set input_streams; hailo_nms_info_t nms_info; + hailo_net_flow_op_type_t op_type; + hailo_vstream_info_t output_vstream_info; // Should be vector? }; const static uint32_t SUPPORTED_EXTENSIONS_BITSET_SIZE = 1000; @@ -151,7 +162,17 @@ static const std::vector SUPPORTED_EXTENSIONS = { OFFLOAD_ARGMAX, KO_RUN_ASAP, HAILO_NET_FLOW, - HAILO_NET_FLOW_YOLO_NMS // Extention added in platform 4.12 release + HAILO_NET_FLOW_YOLO_NMS, // Extention added in platform 4.12 release + HAILO_NET_FLOW_SSD_NMS, // Extention added in platform 4.14 release + WRITE_DATA_BY_TYPE, // Extention added in platform 4.14 release + NMS_OUTPUT_BURST, // Extention added in platform 4.14 release + DUAL_DIRECTION_STREAM_INDEX, // Extention added in platform 4.14 release + HAILO_NET_FLOW_ARGMAX, // Extention added in platform 4.14 release + HAILO_NET_FLOW_SOFTMAX, // Extention added in platform 4.14 release + ALIGNED_FORMAT_TYPE, // Extention added in platform 4.14 release + HAILO_NET_FLOW_YOLOX_NMS, // Extention added in platform 4.14 release + OUTPUT_SCALE_PER_FEATURE, // Extension added in platform 4.14 release + PERIPH_CALCULATION_IN_HAILORT, // Extension added in platform 4.14 release }; static inline bool is_h2d_boundary_info_layer(const ProtoHEFEdgeLayer& layer) @@ -209,7 +230,7 @@ public: const std::vector& network_groups() const; const std::vector& core_ops(const std::string &net_group_name) const; - const std::vector> post_process_ops(const std::string &net_group_name) const; + const NetworkGroupMetadata network_group_metadata(const std::string &net_group_name) const; Expected> get_network_group_and_network_name(const std::string &name); @@ -292,12 +313,12 @@ public: // Also adds information to CoreOpMetadata // TODO: When supporting multiple core ops in same netflow - Change metadata param to a map of core_ops_metadata. Expected>> create_net_flow_ops(const ProtoHEFNetworkGroup &network_group_proto, - CoreOpMetadata &core_op_metadata) const; + CoreOpMetadata &core_op_metadata, const ProtoHEFHwArch &hef_arch) const; // TODO: Should return map of NG's core_ops metadata? - Expected get_core_op_metadata(const std::string &network_group_name, uint32_t partial_clusters_layout_bitmap = PARTIAL_CLUSTERS_LAYOUT_IGNORE); + Expected get_core_op_metadata(const std::string &network_group_name, uint32_t partial_clusters_layout_bitmap = PARTIAL_CLUSTERS_LAYOUT_IGNORE); - Expected get_hef_description(bool stream_infos, bool vstream_infos, hailo_device_architecture_t device_arch); + Expected get_description(bool stream_infos, bool vstream_infos, hailo_device_architecture_t device_arch); const MD5_SUM_t &md5() const { @@ -371,7 +392,7 @@ private: static Expected get_vstream_name_from_original_name_mux(const std::string &original_name, const ProtoHefEdge &layer); static Expected> get_original_names_from_vstream_name_mux(const std::string &vstream_name, const ProtoHefEdge &layer); - Expected create_metadata_per_arch(const ProtoHEFCoreOpMock &core_op); + Expected create_metadata_per_arch(const ProtoHEFCoreOpMock &core_op, const std::vector &sorted_network_names); // TODO: Remove sorted_network_names Expected> get_stream_infos_description(const std::string &network_group_name, const std::string &network_name); Expected> get_vstream_infos_description(const std::string &network_group_name, const std::string &network_name); Expected> get_post_processes_infos_description(const std::string &network_group_name); @@ -392,8 +413,7 @@ private: Buffer m_hef_buffer; #endif // HAILO_SUPPORT_MULTI_PROCESS - // CoreOps information - TODO: Should be a map of map, mapping network_groups to it's core ops (second map is mapping core op name to its metadata). - std::map m_core_op_per_arch; + std::map m_network_group_metadata; // Key is NG name }; // TODO: Make this part of a namespace? (HRT-2881) @@ -409,15 +429,20 @@ public: static Expected parse_nn_stream_config(const LayerInfo &edge_layer, bool hw_padding_supported); - static bool is_hw_padding_supported(const ProtoHEFEdgeLayer &edge_layer); - static bool is_hw_padding_supported(const LayerInfo &layer_info); + static Expected max_periph_bytes_value(const hailo_device_architecture_t hw_arch); + static Expected max_periph_bytes_value(const hailo_stream_interface_t interface); + + static bool is_hw_padding_supported(const ProtoHEFEdgeLayer &edge_layer, const uint32_t max_periph_bytes_value); + static bool is_hw_padding_supported(const LayerInfo &layer_info, const uint32_t max_periph_bytes_value); private: static Expected parse_nn_stream_config(hailo_format_order_t format_order, uint32_t width, uint32_t features, uint32_t hw_data_bytes, uint16_t core_buffers_per_frame, - uint16_t core_bytes_per_buffer, bool hw_padding_supported, bool is_ddr); + uint16_t core_bytes_per_buffer, bool hw_padding_supported, bool is_ddr, uint16_t periph_buffers_per_frame, + uint16_t periph_bytes_per_buffer); static bool is_hw_padding_supported(bool is_boundary, bool is_mux, hailo_format_order_t format_order, - uint16_t core_buffers_per_frame, uint32_t height, uint32_t width, uint32_t features, uint32_t hw_data_bytes); + uint16_t core_buffers_per_frame, uint32_t height, uint32_t width, uint32_t features, uint32_t hw_data_bytes, + const uint32_t max_periph_bytes_value); }; class HefUtils final @@ -430,25 +455,26 @@ public: const uint8_t context_index, const ProtoHEFEdgeLayer &layer, const SupportedFeatures &supported_features, - ContextMetadata &context_metadata); + ContextMetadata &context_metadata, + const ProtoHEFHwArch &hef_arch); static Expected get_inter_context_layer_info( const ProtoHEFCoreOpMock &core_op, const uint8_t context_index, - const ProtoHEFEdgeLayer &layer, const SupportedFeatures &supported_features); + const ProtoHEFEdgeLayer &layer, const SupportedFeatures &supported_features, const ProtoHEFHwArch &hef_arch); static hailo_status fill_inter_context_layers_info( const ProtoHEFCoreOpMock &core_op, const uint8_t context_index, const ProtoHEFEdgeLayer &layer, const SupportedFeatures &supported_features, - ContextMetadata &context_metadata); + ContextMetadata &context_metadata, const ProtoHEFHwArch &hef_arch); static Expected get_ddr_layer_info( const ProtoHEFCoreOpMock &core_op, const uint8_t context_index, - const ProtoHEFEdgeLayer &layer, const SupportedFeatures &supported_features); + const ProtoHEFEdgeLayer &layer, const SupportedFeatures &supported_features, const ProtoHEFHwArch &hef_arch); static hailo_status fill_ddr_layers_info( const ProtoHEFCoreOpMock &core_op, const uint8_t context_index, const ProtoHEFEdgeLayer &layer, const SupportedFeatures &supported_features, - ContextMetadata &context_metadata); + ContextMetadata &context_metadata, const ProtoHEFHwArch &hef_arch); static hailo_status check_ddr_pairs_match( const std::vector &context_ddr_input_layers, const std::vector &context_ddr_output_layers, @@ -456,19 +482,18 @@ public: static Expected parse_preliminary_context(const ProtoHEFPreliminaryConfig &preliminary_proto, const SupportedFeatures &supported_features); static Expected parse_single_dynamic_context(const ProtoHEFCoreOpMock &core_op, - const ProtoHEFContext &context_proto, uint8_t context_index, const SupportedFeatures &supported_features); + const ProtoHEFContext &context_proto, uint8_t context_index, const SupportedFeatures &supported_features, + const ProtoHEFHwArch &hef_arch); static Expected> parse_dynamic_contexts(const ProtoHEFCoreOpMock &core_op, - const SupportedFeatures &supported_features); - static Expected parse_proto_nms_info(const ProtoHEFNmsInfo &proto_nms_info); + const SupportedFeatures &supported_features, const ProtoHEFHwArch &hef_arch); + static Expected parse_proto_nms_info(const ProtoHEFNmsInfo &proto_nms_info, + const bool burst_mode_enabled, const ProtoHEFHwArch &hef_arch); static Expected get_boundary_layer_info(const ProtoHEFCoreOpMock &core_op, - const uint8_t context_index, const ProtoHEFEdgeLayer &layer, const SupportedFeatures &supported_features); - static Expected> get_sorted_output_names(const ProtoHEFCoreOpMock &core_op); + const uint8_t context_index, const ProtoHEFEdgeLayer &layer, const SupportedFeatures &supported_features, + const ProtoHEFHwArch &hef_arch); static Expected get_partial_network_name_by_index(const ProtoHEFCoreOpMock &core_op, uint8_t network_index, const SupportedFeatures &supported_features); - static Expected> get_network_infos(const ProtoHEFNetworkGroup &net_group, - const std::string &net_group_name, const SupportedFeatures &supported_features); - static std::string get_network_group_name(const ProtoHEFNetworkGroup &net_group, const SupportedFeatures &supported_features); static std::string get_network_name(const ProtoHEFCoreOpMock &core_op, const std::string &partial_network_name); static std::string get_network_name(const std::string &net_group_name, const std::string &partial_network_name); @@ -477,19 +502,23 @@ private: static hailo_status fill_layer_info_with_base_info(const ProtoHEFEdgeLayerBase &base_info, const ProtoHEFEdgeConnectionType &edge_connection_type, const ProtoHEFNetworkGroupMetadata &network_group_proto, bool hw_padding_supported, bool transposed, - const uint8_t context_index, const uint8_t network_index, LayerInfo &layer_info); + const uint8_t context_index, const uint8_t network_index, LayerInfo &layer_info, + const SupportedFeatures &supported_features, const ProtoHEFHwArch &hef_arch); static hailo_status fill_layer_info(const ProtoHEFEdgeLayerInfo &info, const ProtoHEFEdgeConnectionType &edge_connection_type, const ProtoHEFCoreOpMock &core_op, hailo_stream_direction_t direction, bool hw_padding_supported, const uint8_t context_index, const std::string &partial_network_name, - uint8_t network_index, LayerInfo &layer_info); + uint8_t network_index, LayerInfo &layer_info, const SupportedFeatures &supported_features, + const ProtoHEFHwArch &hef_arch); static hailo_status fill_fused_nms_info(const ProtoHEFEdgeLayerFused &info, - LayerInfo &layer_info, hailo_quant_info_t &defuse_quant_info, const std::string &network_name); + LayerInfo &layer_info, hailo_quant_info_t &defuse_quant_info, const std::string &network_name, + const bool burst_mode_enabled, const ProtoHEFHwArch &hef_arch); static hailo_status fill_mux_info(const ProtoHEFEdgeLayerMux &info, const ProtoHEFEdgeConnectionType &edge_connection_type, const ProtoHEFCoreOpMock &core_op, hailo_stream_direction_t direction, bool hw_padding_supported, const uint8_t context_index, const std::string &partial_network_name, - uint8_t network_index, LayerInfo &layer_info); + uint8_t network_index, LayerInfo &layer_info, const SupportedFeatures &supported_features, + const ProtoHEFHwArch &hef_arch); }; } /* namespace hailort */ diff --git a/hailort/libhailort/src/hef/layer_info.hpp b/hailort/libhailort/src/hef/layer_info.hpp index af1ed07..2d0769f 100644 --- a/hailort/libhailort/src/hef/layer_info.hpp +++ b/hailort/libhailort/src/hef/layer_info.hpp @@ -26,6 +26,8 @@ namespace hailort { #define INVALID_PAD_INDEX (UINT32_MAX) +#define PERIPH_BYTES_PER_BUFFER_ALIGNMENT_SIZE (8) +#define PERIPH_BYTES_PER_BUFFER_DDR_ALIGNMENT_SIZE (512) enum class LayerType { @@ -54,7 +56,6 @@ struct DdrInfo { uint16_t min_buffered_rows; }; - struct LayerInfo { LayerType type = LayerType::NOT_SET; hailo_stream_direction_t direction; @@ -73,7 +74,8 @@ struct LayerInfo { hailo_3d_image_shape_t hw_shape; uint32_t hw_data_bytes; hailo_format_t format; - hailo_quant_info_t quant_info; + hailo_quant_info_t quant_info; // TODO: Remove, use vector + std::vector quant_infos; hailo_nms_info_t nms_info; // Mux info @@ -95,12 +97,12 @@ struct LayerInfo { DdrInfo ddr_info; }; -// LayerIdentifier = -using LayerIdentifier = std::tuple; +// LayerIdentifier = +using LayerIdentifier = std::tuple; inline LayerIdentifier to_layer_identifier(const LayerInfo &info) { - return std::make_tuple(info.type, info.name, info.stream_index); + return std::make_tuple(info.type, info.direction, info.name, info.stream_index); } class LayerInfoUtils { @@ -171,6 +173,10 @@ public: static Expected get_transfer_size(const LayerInfo &layer_info) { switch (layer_info.type) { case LayerType::BOUNDARY: + if (is_nms_burst_layer(layer_info)) { + return get_nms_layer_transfer_size(layer_info); + } + return layer_info.nn_stream_config.periph_bytes_per_buffer * layer_info.nn_stream_config.periph_buffers_per_frame; case LayerType::INTER_CONTEXT: return layer_info.nn_stream_config.periph_bytes_per_buffer * layer_info.nn_stream_config.periph_buffers_per_frame; case LayerType::DDR: @@ -180,6 +186,104 @@ public: } } + /** + * Validate nms burst type vs device architecture + * + * @param[in] burst_type A hailo_nms_burst_type_t burst_type. + * @param[in] arch A ::hailo_device_architecture_t architecture. + * @return true if the burst type matches the device architecture, otherwise false. + */ + static bool validate_nms_burst_type(const hailo_nms_burst_type_t burst_type, const hailo_device_architecture_t arch) + { + switch (arch) + { + case HAILO_ARCH_HAILO8_A0: + case HAILO_ARCH_HAILO8: + case HAILO_ARCH_HAILO8L: + return (HAILO_BURST_TYPE_H8_PER_CLASS == burst_type); + case HAILO_ARCH_HAILO15: + return ((HAILO_BURST_TYPE_H15_PER_CLASS == burst_type) || (HAILO_BURST_TYPE_H15_PER_FRAME == burst_type)); + default: + return false; + } + } + + /** + * Gets stream's transfer size in bytes by stream info and layer info params. + * + * @param[in] stream_info A ::hailo_stream_info_t object. + * @param[in] layer_info A ::LayerInfo object. + * @return The streams's transfer size in bytes. + */ + static constexpr uint32_t get_stream_transfer_size(const hailo_stream_info_t &stream_info, const LayerInfo &layer_info) + { + if (HAILO_FORMAT_ORDER_HAILO_NMS == layer_info.format.order) { + return get_nms_layer_transfer_size(layer_info); + } + return stream_info.hw_frame_size; + } + + /** + * Get NMS layers's transfer size in bytes by NMS. + * + * @param[in] layer_info A ::LayerInfo object. + * @return The layer's transfer size in bytes. + */ + static constexpr uint32_t get_nms_layer_transfer_size(const LayerInfo &layer_info) + { + switch (layer_info.nms_info.burst_type) { + // If No Burst mode - size of transfer is size of bbox + case HAILO_BURST_TYPE_NO_BURST: + return layer_info.nms_info.bbox_size; + // In hailo8 per class and hailo15 per class mode - check if can support interrupt per frame and if not do interrupt per burst + case HAILO_BURST_TYPE_H8_PER_CLASS: + case HAILO_BURST_TYPE_H15_PER_CLASS: + { + // In case of hailo8 - nn-core adds one delimeter per burst - in case of hailo15 nn-core adds delimeter and image delimeter per class + const size_t bboxes_needed_for_delimeter = (HAILO_BURST_TYPE_H8_PER_CLASS == layer_info.nms_info.burst_type) ? + 1 : 2; + // If burst size is bigger than max bboxes per class + bboxes_needed_for_delimeter - we can enable 1 interrupt per frame + // Becasue we know output size will be burst size * num classes + if (layer_info.nms_info.burst_size >= (layer_info.nms_info.max_bboxes_per_class + bboxes_needed_for_delimeter)) { + return layer_info.nms_info.burst_size * layer_info.nms_info.bbox_size * layer_info.nms_info.number_of_classes; + } else { + // support regular interrupt per burst + return layer_info.nms_info.burst_size * layer_info.nms_info.bbox_size; + } + } + // Currently HAILO_BURST_TYPE_H15_PER_FRAME mode isnt supported - Shouldn't reach here + case HAILO_BURST_TYPE_H15_PER_FRAME: + default: + assert(false); + return 0; + } + } + + /** + * Return if layer is NMS Burst layers. + * + * @param[in] layer_info A ::LayerInfo object. + * @return True if the layer is NMS layer with burst mode - false otherwise. + */ + static constexpr uint32_t is_nms_burst_layer(const LayerInfo &layer_info) + { + return (1 < layer_info.nms_info.burst_size); + } + + /** + * Get layers's transfer size. + * + * @param[in] layer_info A ::LayerInfo object. + * @return The layer's transfer size in bytes. + */ + static constexpr uint32_t get_layer_transfer_size(const LayerInfo &layer_info) + { + if (HAILO_FORMAT_ORDER_HAILO_NMS == layer_info.format.order) { + return get_nms_layer_transfer_size(layer_info); + } + return (layer_info.hw_shape.width * layer_info.hw_shape.features * layer_info.hw_shape.height * layer_info.hw_data_bytes); + } + private: static hailo_vstream_info_t get_vstream_info_from_layer_info_impl(const LayerInfo &layer_info) { diff --git a/hailort/libhailort/src/hw_consts.hpp b/hailort/libhailort/src/hw_consts.hpp index 4faec77..3acd38f 100644 --- a/hailort/libhailort/src/hw_consts.hpp +++ b/hailort/libhailort/src/hw_consts.hpp @@ -14,6 +14,9 @@ /** Package constants *********************************************************/ #define HAILO8_INBOUND_DATA_STREAM_SIZE (0x00010000L) +// Max periph bytes per buffer for hailo15 because (we use its value shifted right by 3 - according to the spec) to +// configure shmifo credit size - which in hailo15 only has a width of 10 bits +#define HAILO15_PERIPH_BYTES_PER_BUFFER_MAX_SIZE (0x00002000L) /** PCIe constants and macors ************************************************/ #define PCIE_CONFIG_BASE_ADDRESS (0x00200000L) // ::HW_BASE_ADDRESSES__PCIE_CONFIG(0, 0, 0) diff --git a/hailort/libhailort/src/mipi/mipi_stream.cpp b/hailort/libhailort/src/mipi/mipi_stream.cpp index e7c92fe..36007ec 100644 --- a/hailort/libhailort/src/mipi/mipi_stream.cpp +++ b/hailort/libhailort/src/mipi/mipi_stream.cpp @@ -128,17 +128,9 @@ hailo_status MipiInputStream::activate_stream(uint16_t /* dynamic_batch_size */, return HAILO_SUCCESS; } -Expected MipiInputStream::sync_write_raw_buffer(const MemoryView &buffer) +hailo_status MipiInputStream::write_impl(const MemoryView &buffer) { (void)buffer; - return make_unexpected(HAILO_INVALID_OPERATION); -} - -hailo_status MipiInputStream::sync_write_all_raw_buffer_no_transform_impl(void *buffer, size_t offset, size_t size) -{ - (void)buffer; - (void)offset; - (void)size; return HAILO_INVALID_OPERATION; } diff --git a/hailort/libhailort/src/mipi/mipi_stream.hpp b/hailort/libhailort/src/mipi/mipi_stream.hpp index b52597f..73178e1 100644 --- a/hailort/libhailort/src/mipi/mipi_stream.hpp +++ b/hailort/libhailort/src/mipi/mipi_stream.hpp @@ -35,8 +35,7 @@ private: CONTROL_PROTOCOL__mipi_input_config_params_t m_mipi_input_params; protected: - virtual Expected sync_write_raw_buffer(const MemoryView &buffer) override; - virtual hailo_status sync_write_all_raw_buffer_no_transform_impl(void *buffer, size_t offset, size_t size) override; + virtual hailo_status write_impl(const MemoryView &buffer) override; virtual hailo_status set_timeout(std::chrono::milliseconds timeout) { (void)timeout; return HAILO_INVALID_OPERATION; }; public: @@ -51,7 +50,6 @@ public: virtual std::chrono::milliseconds get_timeout() const override; virtual hailo_status abort() override; virtual hailo_status clear_abort() override; - }; } /* namespace hailort */ diff --git a/hailort/libhailort/src/net_flow/CMakeLists.txt b/hailort/libhailort/src/net_flow/CMakeLists.txt index ece9aa3..dd4a2c5 100644 --- a/hailort/libhailort/src/net_flow/CMakeLists.txt +++ b/hailort/libhailort/src/net_flow/CMakeLists.txt @@ -1,16 +1,16 @@ cmake_minimum_required(VERSION 3.0.0) -set(HAILORT_OPS_CPP_SOURCES +set(SRC_FILES ${CMAKE_CURRENT_SOURCE_DIR}/ops/nms_post_process.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ops/yolo_post_process.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/ops/yolox_post_process.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ops/ssd_post_process.cpp -) + ${CMAKE_CURRENT_SOURCE_DIR}/ops/argmax_post_process.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/ops/softmax_post_process.cpp -set(SRC_FILES ${CMAKE_CURRENT_SOURCE_DIR}/pipeline/pipeline.cpp ${CMAKE_CURRENT_SOURCE_DIR}/pipeline/inference_pipeline.cpp ${CMAKE_CURRENT_SOURCE_DIR}/pipeline/vstream.cpp ) -set(HAILORT_CPP_SOURCES ${HAILORT_CPP_SOURCES} ${SRC_FILES} ${HAILORT_OPS_CPP_SOURCES} PARENT_SCOPE) -set(HAILORT_OPS_CPP_SOURCES ${HAILORT_OPS_CPP_SOURCES} PARENT_SCOPE) +set(HAILORT_CPP_SOURCES ${HAILORT_CPP_SOURCES} ${SRC_FILES} PARENT_SCOPE) diff --git a/hailort/libhailort/src/net_flow/ops/argmax_post_process.cpp b/hailort/libhailort/src/net_flow/ops/argmax_post_process.cpp new file mode 100644 index 0000000..b7e2df9 --- /dev/null +++ b/hailort/libhailort/src/net_flow/ops/argmax_post_process.cpp @@ -0,0 +1,219 @@ +/** + * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved. + * Distributed under the MIT license (https://opensource.org/licenses/MIT) +**/ +/** + * @file argmax_post_process.cpp + * @brief: Argsmax op + **/ + +#include "argmax_post_process.hpp" +#include "hailo/hailort.h" +#include "hailo/hailort_common.hpp" +#include "common/utils.hpp" + +#include + + +namespace hailort +{ +namespace net_flow +{ + +// Source https://stackoverflow.com/questions/3793838/which-is-the-first-integer-that-an-ieee-754-float-is-incapable-of-representing-e +#define FLOAT_LAST_CONSECUTIVE_REPRESENTABLE_INT (1 << std::numeric_limits::digits) + +hailo_status ArgmaxPostProcessOp::execute_not_supported(const BufferMetaData &input_metadata, const BufferMetaData &output_metadata, + const std::map &inputs, std::map &outputs) + { + (void)inputs; + (void)outputs; + LOGGER__ERROR("Argmax post-process not supported with params: input_order {}, input_type {}, output_type {}", + HailoRTCommon::get_format_order_str(input_metadata.format.order), + HailoRTCommon::get_format_type_str(input_metadata.format.type), + HailoRTCommon::get_format_type_str(output_metadata.format.type)); + return HAILO_INVALID_ARGUMENT; + } + +ArgmaxFunction ArgmaxPostProcessOp::m_argmax_function_array[ARGMAX_NUM_OF_POSSIBLE_FORMAT_ORDERS][ARGMAX_NUM_OF_POSSIBLE_FORMAT_TYPES][ARGMAX_NUM_OF_POSSIBLE_FORMAT_TYPES] +{ + { + { + // NHCW x AUTO + // We don't support input_format_type to be auto + ArgmaxPostProcessOp::execute_not_supported, + ArgmaxPostProcessOp::execute_not_supported, + ArgmaxPostProcessOp::execute_not_supported, + ArgmaxPostProcessOp::execute_not_supported + }, + { + // NHCW x UINT8 + ArgmaxPostProcessOp::execute_not_supported, // We don't support output_format_type to be auto + ArgmaxPostProcessOp::NHCW_to_NHW_feature_axis, + ArgmaxPostProcessOp::NHCW_to_NHW_feature_axis, + ArgmaxPostProcessOp::NHCW_to_NHW_feature_axis + }, + { + // NHCW x UINT16 + ArgmaxPostProcessOp::execute_not_supported, // We don't support output_format_type to be auto + ArgmaxPostProcessOp::NHCW_to_NHW_feature_axis, + ArgmaxPostProcessOp::NHCW_to_NHW_feature_axis, + ArgmaxPostProcessOp::NHCW_to_NHW_feature_axis + }, + { + // NHCW x FLOAT32 + // We don't support input_format_type to be float32 + ArgmaxPostProcessOp::execute_not_supported, + ArgmaxPostProcessOp::execute_not_supported, + ArgmaxPostProcessOp::execute_not_supported, + ArgmaxPostProcessOp::execute_not_supported + } + }, + { + { + // NHWC x AUTO + // We don't support input_format_type to be auto + ArgmaxPostProcessOp::execute_not_supported, + ArgmaxPostProcessOp::execute_not_supported, + ArgmaxPostProcessOp::execute_not_supported, + ArgmaxPostProcessOp::execute_not_supported + }, + { + // NHWC x UINT8 + ArgmaxPostProcessOp::execute_not_supported, // We don't support output_format_type to be auto + ArgmaxPostProcessOp::NHWC_to_NHW_feature_axis, + ArgmaxPostProcessOp::NHWC_to_NHW_feature_axis, + ArgmaxPostProcessOp::NHWC_to_NHW_feature_axis + }, + { + // NHWC x UINT16 + ArgmaxPostProcessOp::execute_not_supported, // We don't support output_format_type to be auto + ArgmaxPostProcessOp::NHWC_to_NHW_feature_axis, + ArgmaxPostProcessOp::NHWC_to_NHW_feature_axis, + ArgmaxPostProcessOp::NHWC_to_NHW_feature_axis, + }, + { + // NHWC x FLOAT32 + ArgmaxPostProcessOp::execute_not_supported, + ArgmaxPostProcessOp::execute_not_supported, + ArgmaxPostProcessOp::execute_not_supported, + ArgmaxPostProcessOp::execute_not_supported + } + }, + { + { + // NC x AUTO + // We don't support input_format_type to be auto + ArgmaxPostProcessOp::execute_not_supported, + ArgmaxPostProcessOp::execute_not_supported, + ArgmaxPostProcessOp::execute_not_supported, + ArgmaxPostProcessOp::execute_not_supported + }, + { + // NC x UINT8 + ArgmaxPostProcessOp::execute_not_supported, // We don't support output_format_type to be auto + ArgmaxPostProcessOp::NC_to_N, + ArgmaxPostProcessOp::NC_to_N, + ArgmaxPostProcessOp::NC_to_N, + }, + { + // NC x UINT16 + ArgmaxPostProcessOp::execute_not_supported, // We don't support output_format_type to be auto + ArgmaxPostProcessOp::NC_to_N, + ArgmaxPostProcessOp::NC_to_N, + ArgmaxPostProcessOp::NC_to_N, + }, + { + // NC x FLOAT32 + // We don't support input_format_type to be float32 + ArgmaxPostProcessOp::execute_not_supported, + ArgmaxPostProcessOp::execute_not_supported, + ArgmaxPostProcessOp::execute_not_supported, + ArgmaxPostProcessOp::execute_not_supported + } + } +}; + +hailo_status ArgmaxPostProcessOp::execute(const std::map &inputs, + std::map &outputs) +{ + auto &input_name = inputs.begin()->first; + auto &output_name = outputs.begin()->first; + auto &input_metadata = m_inputs_metadata[input_name]; + auto &output_metadata = m_outputs_metadata[output_name]; + + uint8_t format_index = UINT8_MAX; + switch (input_metadata.format.order) { + case HAILO_FORMAT_ORDER_NHCW: + format_index = 0; + break; + case HAILO_FORMAT_ORDER_NHWC: + format_index = 1; + break; + case HAILO_FORMAT_ORDER_NC: + format_index = 2; + break; + default: + LOGGER__ERROR("Argmax post-process received invalid input order {}", + HailoRTCommon::get_format_order_str(input_metadata.format.order)); + return HAILO_INVALID_ARGUMENT; + } + return ArgmaxPostProcessOp::m_argmax_function_array[format_index][input_metadata.format.type][output_metadata.format.type](input_metadata, output_metadata, inputs, outputs); +} + +std::string ArgmaxPostProcessOp::get_op_description() +{ + auto config_info = fmt::format("ArgmaxPostProcess Op, Name: {}", m_name); + return config_info; +} + +hailo_status ArgmaxPostProcessOp::validate_metadata() +{ + assert(m_inputs_metadata.size() == hailort::net_flow::ARGMAX_NUMBER_OF_SRCS); + assert(m_outputs_metadata.size() == hailort::net_flow::ARGMAX_NUMBER_OF_DSTS); + + auto &input_metadata = m_inputs_metadata.begin()->second; + auto &output_metadata = m_outputs_metadata.begin()->second; + + CHECK(( + ((output_metadata.format.type == HAILO_FORMAT_TYPE_UINT8) && (input_metadata.shape.features <= std::numeric_limits::max())) || + ((output_metadata.format.type == HAILO_FORMAT_TYPE_UINT16) && (input_metadata.shape.features <= std::numeric_limits::max())) || + ((output_metadata.format.type == HAILO_FORMAT_TYPE_FLOAT32) && (input_metadata.shape.features <= FLOAT_LAST_CONSECUTIVE_REPRESENTABLE_INT))), + HAILO_INVALID_OPERATION, "Dst format type {} can't represent possible range {} for Argmax op", + HailoRTCommon::get_format_type_str(output_metadata.format.type), input_metadata.shape.features); + CHECK( + ((input_metadata.format.order == HAILO_FORMAT_ORDER_NHCW) && (output_metadata.format.order == HAILO_FORMAT_ORDER_NHW)) || + ((input_metadata.format.order == HAILO_FORMAT_ORDER_NHWC) && (output_metadata.format.order == HAILO_FORMAT_ORDER_NHW)) || + ((input_metadata.format.order == HAILO_FORMAT_ORDER_NC) && (output_metadata.format.order == HAILO_FORMAT_ORDER_NC)), + HAILO_INVALID_OPERATION, "Argmax op is not supported for src format order ({}) and dst format order ({})", + HailoRTCommon::get_format_order_str(input_metadata.format.order), + HailoRTCommon::get_format_order_str(output_metadata.format.order)); + + CHECK(output_metadata.shape.features == hailort::net_flow::ARGMAX_OUTPUT_FEATURES_SIZE, HAILO_INVALID_OPERATION, + "Dst features ({}) must be 1 on Argmax op", output_metadata.shape.features); + CHECK(input_metadata.shape.height == output_metadata.shape.height, HAILO_INVALID_OPERATION, + "Argmax op is supported only when src height ({}) is equal to dst height ({})", + input_metadata.shape.height, output_metadata.shape.height); + CHECK(input_metadata.shape.width == output_metadata.shape.width, HAILO_INVALID_OPERATION, + "Argmax op is supported only when src width ({}) is equal to dst width ({})", + input_metadata.shape.width, output_metadata.shape.width); + CHECK(( + (input_metadata.format.type == HAILO_FORMAT_TYPE_UINT8) || (input_metadata.format.type == HAILO_FORMAT_TYPE_UINT16)), + HAILO_INVALID_OPERATION, "Src format type {} is not valid. Must be either {} or {}", + HailoRTCommon::get_format_type_str(input_metadata.format.type), HailoRTCommon::get_format_type_str(HAILO_FORMAT_TYPE_UINT8), + HailoRTCommon::get_format_type_str(HAILO_FORMAT_TYPE_UINT16)); + + return HAILO_SUCCESS; +} + +Expected> ArgmaxPostProcessOp::create(const std::map &inputs_metadata, + std::map &outputs_metadata) +{ + auto op = std::shared_ptr(new (std::nothrow) ArgmaxPostProcessOp(inputs_metadata, outputs_metadata)); + CHECK_AS_EXPECTED(op != nullptr, HAILO_OUT_OF_HOST_MEMORY); + + return std::shared_ptr(std::move(op)); +} + +} /* namespace net_flow */ +} /* namespace hailort */ diff --git a/hailort/libhailort/src/net_flow/ops/argmax_post_process.hpp b/hailort/libhailort/src/net_flow/ops/argmax_post_process.hpp new file mode 100644 index 0000000..23dd6b4 --- /dev/null +++ b/hailort/libhailort/src/net_flow/ops/argmax_post_process.hpp @@ -0,0 +1,151 @@ +/** + * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved. + * Distributed under the MIT license (https://opensource.org/licenses/MIT) +**/ +/** + * @file argmax_post_process.hpp + * @brief: Argmax op perform argmax op as described: https://www.tensorflow.org/api_docs/python/tf/math/argmax + * A few notes: + * - Support only on features axis + * - Support only on NHWC, NHCW and NC input data order + * - In case of 2 maximal values - the lower index one will be given. + **/ + +#ifndef _HAILO_ARGMAX_POST_PROCESS_HPP_ +#define _HAILO_ARGMAX_POST_PROCESS_HPP_ + + +#include "hailo/hailort.h" +#include "net_flow/ops/op.hpp" +#include "common/utils.hpp" + +#include + +namespace hailort +{ +namespace net_flow +{ + +#define ARGMAX_NUM_OF_POSSIBLE_FORMAT_ORDERS (3) +#define ARGMAX_NUM_OF_POSSIBLE_FORMAT_TYPES (4) + +constexpr std::size_t ARGMAX_OUTPUT_FEATURES_SIZE {1}; +constexpr std::size_t ARGMAX_NUMBER_OF_SRCS {1}; +constexpr std::size_t ARGMAX_NUMBER_OF_DSTS {1}; + +typedef hailo_status (*ArgmaxFunction)(const BufferMetaData &input_metadata, const BufferMetaData &output_metadata, + const std::map &inputs, std::map &outputs); + +class ArgmaxPostProcessOp : public Op +{ + +private: + ArgmaxPostProcessOp(const std::map &inputs_metadata, + const std::map &outputs_metadata) + : Op(inputs_metadata, outputs_metadata, "Argmax-Post-Process") + {} + + template + static hailo_status NHCW_to_NHW_feature_axis(const BufferMetaData &input_metadata, const BufferMetaData &output_metadata, + const std::map &inputs, std::map &outputs) + { + auto src_ptr = (DeviceType*)inputs.begin()->second.data(); + auto dst_ptr = (HostType*)outputs.begin()->second.data(); + const auto src_row_size = input_metadata.padded_shape.width * input_metadata.padded_shape.features; + const auto dst_row_size = output_metadata.shape.width; + + for (uint32_t r = 0; r < input_metadata.shape.height; r++) { + const DeviceType *src_row = src_ptr + (r * src_row_size); + HostType *dst_row = dst_ptr + (r * dst_row_size); + for (uint32_t w = 0; w < input_metadata.shape.width; w++) { + const DeviceType *offset_in_row = src_row + w; + HostType max_index = 0; + auto max_value = *offset_in_row; + for (uint32_t c = 1; c < input_metadata.shape.features; c++) { + offset_in_row += input_metadata.padded_shape.width; + const auto ¤t_value = *offset_in_row; + if (current_value > max_value) { + max_index = static_cast(c); + max_value = current_value; + } + } + dst_row[w] = max_index; + } + } + return HAILO_SUCCESS; + } + + template + static hailo_status NHWC_to_NHW_feature_axis(const BufferMetaData &input_metadata, const BufferMetaData &output_metadata, + const std::map &inputs, std::map &outputs) + { + auto src_ptr = (DeviceType*)inputs.begin()->second.data(); + auto dst_ptr = (HostType*)outputs.begin()->second.data(); + const auto src_row_size = input_metadata.padded_shape.width * input_metadata.padded_shape.features; + const auto dst_row_size = output_metadata.shape.width; + + for (uint32_t r = 0; r < input_metadata.shape.height; r++) { + const DeviceType *src_row = src_ptr + (r * src_row_size); + HostType *dst_row = dst_ptr + (r * dst_row_size); + for (uint32_t w = 0; w < input_metadata.shape.width; w++) { + const DeviceType *offset_in_row = src_row + (w * input_metadata.padded_shape.features); + HostType max_index = 0; + auto max_value = *offset_in_row; + for (uint32_t c = 1; c < input_metadata.shape.features; c++) { + const auto ¤t_value = *(offset_in_row + c); + if (current_value > max_value) { + max_index = static_cast(c); + max_value = current_value; + } + } + dst_row[w] = max_index; + } + } + return HAILO_SUCCESS; + } + + template + static hailo_status NC_to_N(const BufferMetaData &input_metadata, const BufferMetaData &output_metadata, + const std::map &inputs, std::map &outputs) + { + (void) output_metadata; // only reason to have output_metadata is so that the function array will work + auto src_ptr = (DeviceType*)inputs.begin()->second.data(); + auto dst_ptr = (HostType*)outputs.begin()->second.data(); + HostType max_index = 0; + DeviceType max_value = 0; + + for (uint32_t c = 0; c < input_metadata.shape.features; c++) { + const auto ¤t_value = *(src_ptr + c); + if (current_value > max_value) { + max_index = static_cast(c); + max_value = current_value; + } + } + *dst_ptr = max_index; + return HAILO_SUCCESS; + } + + static hailo_status execute_not_supported(const BufferMetaData &input_metadata, const BufferMetaData &output_metadata, + const std::map &inputs, std::map &outputs); + +public: + static Expected> create(const std::map &inputs_metadata, + std::map &outputs_metadata); + virtual hailo_status execute(const std::map &inputs, + std::map &outputs) override; + virtual std::string get_op_description() override; + hailo_status validate_metadata() override; + + // A 3D array of argmax functions to call: + // 1st dim represent the data format order + // 2nd dim represent the input data type (only uint8 or uint16 are valid) + // 3rd dim represent the output data type + // Note: Assumption here the ordering of the enum hailo_format_type_t doesn't change + static ArgmaxFunction m_argmax_function_array[ARGMAX_NUM_OF_POSSIBLE_FORMAT_ORDERS][ARGMAX_NUM_OF_POSSIBLE_FORMAT_TYPES][ARGMAX_NUM_OF_POSSIBLE_FORMAT_TYPES]; + +}; + +} /* namespace net_flow */ +} /* namespace hailort */ + +#endif /* _HAILO_ARGMAX_POST_PROCESS_HPP_ */ diff --git a/hailort/libhailort/src/net_flow/ops/nms_post_process.cpp b/hailort/libhailort/src/net_flow/ops/nms_post_process.cpp index 1bf1859..8fced3b 100644 --- a/hailort/libhailort/src/net_flow/ops/nms_post_process.cpp +++ b/hailort/libhailort/src/net_flow/ops/nms_post_process.cpp @@ -15,6 +15,44 @@ namespace hailort { namespace net_flow { + + hailo_status NmsPostProcessOp::validate_metadata() + { + for (const auto& output_metadata : m_outputs_metadata) { + CHECK(HAILO_FORMAT_ORDER_HAILO_NMS == output_metadata.second.format.order, HAILO_INVALID_ARGUMENT, "The given output format order {} is not supported, " + "should be HAILO_FORMAT_ORDER_HAILO_NMS", HailoRTCommon::get_format_order_str(output_metadata.second.format.order)); + + CHECK(HAILO_FORMAT_TYPE_FLOAT32 == output_metadata.second.format.type, HAILO_INVALID_ARGUMENT, "The given output format type {} is not supported, " + "should be HAILO_FORMAT_TYPE_FLOAT32", HailoRTCommon::get_format_type_str(output_metadata.second.format.type)); + + CHECK(!(HAILO_FORMAT_FLAGS_TRANSPOSED & output_metadata.second.format.flags), HAILO_INVALID_ARGUMENT, "Output {} is marked as transposed, which is not supported for this model.", + output_metadata.first); + CHECK(!(HAILO_FORMAT_FLAGS_HOST_ARGMAX & output_metadata.second.format.flags), HAILO_INVALID_ARGUMENT, "Output {} is marked as argmax, which is not supported for this model.", + output_metadata.first); + CHECK(!(HAILO_FORMAT_FLAGS_QUANTIZED & output_metadata.second.format.flags), HAILO_INVALID_ARGUMENT, "Output {} is marked as quantized, which is not supported for this model.", + output_metadata.first); + } + + assert(1 <= m_inputs_metadata.size()); + const hailo_format_type_t& first_input_type = m_inputs_metadata.begin()->second.format.type; + for (const auto& input_metadata : m_inputs_metadata) { + CHECK(HAILO_FORMAT_ORDER_NHCW == input_metadata.second.format.order, HAILO_INVALID_ARGUMENT, "The given input format order {} is not supported, " + "should be HAILO_FORMAT_ORDER_NHCW", HailoRTCommon::get_format_order_str(input_metadata.second.format.order)); + + CHECK((HAILO_FORMAT_TYPE_UINT8 == input_metadata.second.format.type) || + (HAILO_FORMAT_TYPE_UINT16 == input_metadata.second.format.type), + HAILO_INVALID_ARGUMENT, "The given input format type {} is not supported, should be HAILO_FORMAT_TYPE_UINT8 or HAILO_FORMAT_TYPE_UINT16", + HailoRTCommon::get_format_type_str(input_metadata.second.format.type)); + + CHECK(input_metadata.second.format.type == first_input_type, HAILO_INVALID_ARGUMENT,"All inputs format type should be the same"); + + CHECK(HAILO_FORMAT_FLAGS_QUANTIZED == input_metadata.second.format.flags, HAILO_INVALID_ARGUMENT, "The given input format flag is not supported," + "should be HAILO_FORMAT_FLAGS_QUANTIZED"); + } + + return HAILO_SUCCESS; + } + float NmsPostProcessOp::compute_iou(const hailo_bbox_float32_t &box_1, const hailo_bbox_float32_t &box_2) { const float overlap_area_width = std::min(box_1.x_max, box_2.x_max) - std::max(box_1.x_min, box_2.x_min); @@ -64,10 +102,9 @@ namespace net_flow std::vector &classes_detections_count) { // Calculate the number of detections before each class, to help us later calculate the buffer_offset for it's detections. - std::vector num_of_detections_before; - num_of_detections_before.reserve(m_nms_config.classes); + std::vector num_of_detections_before(m_nms_config.number_of_classes, 0); uint32_t ignored_detections_count = 0; - for (size_t class_idx = 0; class_idx < m_nms_config.classes; class_idx++) { + for (size_t class_idx = 0; class_idx < m_nms_config.number_of_classes; class_idx++) { if (classes_detections_count[class_idx] > m_nms_config.max_proposals_per_class) { ignored_detections_count += (classes_detections_count[class_idx] - m_nms_config.max_proposals_per_class); classes_detections_count[class_idx] = m_nms_config.max_proposals_per_class; @@ -123,7 +160,7 @@ namespace net_flow std::string NmsPostProcessOp::get_nms_config_description() { auto config_info = fmt::format("Score threshold: {:.3f}, Iou threshold: {:.2f}, Classes: {}, Cross classes: {}", - m_nms_config.nms_score_th, m_nms_config.nms_iou_th, m_nms_config.classes, m_nms_config.cross_classes); + m_nms_config.nms_score_th, m_nms_config.nms_iou_th, m_nms_config.number_of_classes, m_nms_config.cross_classes); if (m_nms_config.background_removal) { config_info += fmt::format(", Background removal index: {}", m_nms_config.background_removal_index); } diff --git a/hailort/libhailort/src/net_flow/ops/nms_post_process.hpp b/hailort/libhailort/src/net_flow/ops/nms_post_process.hpp index 8b95a84..e7c9d59 100644 --- a/hailort/libhailort/src/net_flow/ops/nms_post_process.hpp +++ b/hailort/libhailort/src/net_flow/ops/nms_post_process.hpp @@ -66,7 +66,7 @@ struct NmsPostProcessConfig uint32_t max_proposals_per_class = 0; // The model's number of classes. (This depends on the dataset that the model trained on). - uint32_t classes = 0; + uint32_t number_of_classes = 0; // Toggle background class removal from results bool background_removal = false; @@ -107,7 +107,7 @@ protected: float32_t objectness, hailo_quant_info_t quant_info, uint32_t width) { std::pair max_id_score_pair; - for (uint32_t class_index = 0; class_index < m_nms_config.classes; class_index++) { + for (uint32_t class_index = 0; class_index < m_nms_config.number_of_classes; class_index++) { auto class_id = class_index; if (m_nms_config.background_removal) { if (m_nms_config.background_removal_index == class_index) { @@ -158,6 +158,8 @@ protected: std::string get_nms_config_description(); + hailo_status validate_metadata() override; + }; } diff --git a/hailort/libhailort/src/net_flow/ops/op.hpp b/hailort/libhailort/src/net_flow/ops/op.hpp index d6a02b3..cd8b3ae 100644 --- a/hailort/libhailort/src/net_flow/ops/op.hpp +++ b/hailort/libhailort/src/net_flow/ops/op.hpp @@ -50,6 +50,8 @@ public: */ virtual hailo_status execute(const std::map &inputs, std::map &outputs) = 0; + virtual hailo_status validate_metadata() = 0; + const std::map &inputs_metadata() const { return m_inputs_metadata; @@ -60,6 +62,16 @@ public: return m_outputs_metadata; } + void set_outputs_metadata(std::map &outputs_metadata) + { + m_outputs_metadata = outputs_metadata; + } + + void set_inputs_metadata(std::map &inputs_metadata) + { + m_inputs_metadata = inputs_metadata; + } + std::string get_name() { return m_name; } diff --git a/hailort/libhailort/src/net_flow/ops/softmax_post_process.cpp b/hailort/libhailort/src/net_flow/ops/softmax_post_process.cpp new file mode 100644 index 0000000..97fb1e3 --- /dev/null +++ b/hailort/libhailort/src/net_flow/ops/softmax_post_process.cpp @@ -0,0 +1,192 @@ +/** + * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved. + * Distributed under the MIT license (https://opensource.org/licenses/MIT) +**/ +/** + * @file softmax_post_process.cpp + * @brief: Softmax op + **/ + +#include "softmax_post_process.hpp" +#include "hailo/hailort.h" +#include "hailo/hailort_common.hpp" +#include "common/utils.hpp" + +#include + +namespace hailort +{ +namespace net_flow +{ + +// This function is for when trying to perform softmax op for unsupported formats +hailo_status SoftmaxPostProcessOp::execute_not_supported(const BufferMetaData &input_metadata, const BufferMetaData &output_metadata, + const std::map &inputs, std::map &outputs) + { + (void)inputs; + (void)outputs; + LOGGER__ERROR("Softmax post-process not supported with params: input_order {}, input_type {}, output_type {}", + HailoRTCommon::get_format_order_str(input_metadata.format.order), + HailoRTCommon::get_format_type_str(input_metadata.format.type), + HailoRTCommon::get_format_type_str(output_metadata.format.type)); + return HAILO_INVALID_ARGUMENT; + } + +SoftmaxFunction SoftmaxPostProcessOp::m_softmax_function_array[SOFTMAX_NUM_OF_POSSIBLE_FORMAT_ORDERS][SOFTMAX_NUM_OF_POSSIBLE_FORMAT_TYPES][SOFTMAX_NUM_OF_POSSIBLE_FORMAT_TYPES] +{ + // Currently supported on: + // NC, float_32 to NC, float_32 + // NHWC, float_32 to NHWC, float_32 + { + { + // NHWC x AUTO + // We don't support input_format_type to be auto + SoftmaxPostProcessOp::execute_not_supported, + SoftmaxPostProcessOp::execute_not_supported, + SoftmaxPostProcessOp::execute_not_supported, + SoftmaxPostProcessOp::execute_not_supported + }, + { + // NHWC x UINT8 + // We don't support input_format_type to be UINT8 + SoftmaxPostProcessOp::execute_not_supported, + SoftmaxPostProcessOp::execute_not_supported, + SoftmaxPostProcessOp::execute_not_supported, + SoftmaxPostProcessOp::execute_not_supported + }, + { + // NHWC x UINT16 + // We don't support input_format_type to be UINT16 + SoftmaxPostProcessOp::execute_not_supported, + SoftmaxPostProcessOp::execute_not_supported, + SoftmaxPostProcessOp::execute_not_supported, + SoftmaxPostProcessOp::execute_not_supported + }, + { + // NHWC x FLOAT32 + SoftmaxPostProcessOp::execute_not_supported, // We don't support output_format_type format of AUTO + SoftmaxPostProcessOp::execute_not_supported, // We don't support output_format_type format of UINT8 + SoftmaxPostProcessOp::execute_not_supported, // We don't support output_format_type format of UINT16 + SoftmaxPostProcessOp::NHWC_to_NHWC_feature_axis + } + }, + { + { + // NC x AUTO + // We don't support input_format_type to be auto + SoftmaxPostProcessOp::execute_not_supported, + SoftmaxPostProcessOp::execute_not_supported, + SoftmaxPostProcessOp::execute_not_supported, + SoftmaxPostProcessOp::execute_not_supported + }, + { + // NC x UINT8 + // We don't support input_format_type to be UINT8 + SoftmaxPostProcessOp::execute_not_supported, + SoftmaxPostProcessOp::execute_not_supported, + SoftmaxPostProcessOp::execute_not_supported, + SoftmaxPostProcessOp::execute_not_supported, + }, + { + // NC x UINT16 + // We don't support input_format_type to be UINT16 + SoftmaxPostProcessOp::execute_not_supported, + SoftmaxPostProcessOp::execute_not_supported, + SoftmaxPostProcessOp::execute_not_supported, + SoftmaxPostProcessOp::execute_not_supported, + }, + { + // NC x FLOAT32 + SoftmaxPostProcessOp::execute_not_supported, // We don't support output_format_type format of AUTO + SoftmaxPostProcessOp::execute_not_supported, // We don't support output_format_type format of UINT8 + SoftmaxPostProcessOp::execute_not_supported, // We don't support output_format_type format of UINT16 + SoftmaxPostProcessOp::NC_to_NC, + } + } +}; + +hailo_status SoftmaxPostProcessOp::execute(const std::map &inputs, + std::map &outputs) +{ + auto &input_name = inputs.begin()->first; + auto &output_name = outputs.begin()->first; + auto &input_metadata = m_inputs_metadata[input_name]; + auto &output_metadata = m_outputs_metadata[output_name]; + + uint8_t format_index = UINT8_MAX; + switch (input_metadata.format.order) { + case HAILO_FORMAT_ORDER_NHWC: + format_index = 0; + break; + case HAILO_FORMAT_ORDER_NC: + format_index = 1; + break; + default: + LOGGER__ERROR("Softmax post-process received invalid input order {}", + HailoRTCommon::get_format_order_str(input_metadata.format.order)); + return HAILO_INVALID_ARGUMENT; + } + return SoftmaxPostProcessOp::m_softmax_function_array[format_index][input_metadata.format.type][output_metadata.format.type](input_metadata, output_metadata, inputs, outputs); +} + +std::string SoftmaxPostProcessOp::get_op_description() +{ + auto config_info = fmt::format("SoftmaxPostProcess Op, Name: {}", m_name); + return config_info; +} + +hailo_status SoftmaxPostProcessOp::validate_metadata() +{ + assert(m_inputs_metadata.size() == hailort::net_flow::SOFTMAX_NUMBER_OF_SRCS); + assert(m_outputs_metadata.size() == hailort::net_flow::SOFTMAX_NUMBER_OF_DSTS); + + auto &input_metadata = m_inputs_metadata.begin()->second; + auto &output_metadata = m_outputs_metadata.begin()->second; + + CHECK( + ((input_metadata.format.flags & HAILO_FORMAT_FLAGS_QUANTIZED) == 0) && ((output_metadata.format.flags & HAILO_FORMAT_FLAGS_QUANTIZED) == 0), + HAILO_INVALID_OPERATION, "Softmax op is supported only on dequantized data"); + + CHECK( + ((input_metadata.format.order == HAILO_FORMAT_ORDER_NHWC) && (output_metadata.format.order == HAILO_FORMAT_ORDER_NHWC)) || + ((input_metadata.format.order == HAILO_FORMAT_ORDER_NC) && (output_metadata.format.order == HAILO_FORMAT_ORDER_NC)), + HAILO_INVALID_OPERATION, "Softmax op is not supported for src format order ({}) and dst format order ({})", + HailoRTCommon::get_format_order_str(input_metadata.format.order), + HailoRTCommon::get_format_order_str(output_metadata.format.order)); + + CHECK(input_metadata.shape.features == output_metadata.shape.features, HAILO_INVALID_OPERATION, + "Softmax op is supported only when src num of features ({}) is equal to dst num of features ({})", + input_metadata.shape.features, output_metadata.shape.features); + CHECK(input_metadata.shape.height == output_metadata.shape.height, HAILO_INVALID_OPERATION, + "Softmax op is supported only when src height ({}) is equal to dst height ({})", + input_metadata.shape.height, output_metadata.shape.height); + CHECK(input_metadata.shape.width == output_metadata.shape.width, HAILO_INVALID_OPERATION, + "Softmax op is supported only when src width ({}) is equal to dst width ({})", + input_metadata.shape.width, output_metadata.shape.width); + CHECK(input_metadata.format.type == HAILO_FORMAT_TYPE_FLOAT32, + HAILO_INVALID_OPERATION, "Src format type {} is not valid. Must be {}", + HailoRTCommon::get_format_type_str(input_metadata.format.type), + HailoRTCommon::get_format_type_str(HAILO_FORMAT_TYPE_FLOAT32)); + CHECK(output_metadata.format.type == HAILO_FORMAT_TYPE_FLOAT32, + HAILO_INVALID_OPERATION, "Dst format type {} is not valid. Must be {}", + HailoRTCommon::get_format_type_str(output_metadata.format.type), + HailoRTCommon::get_format_type_str(HAILO_FORMAT_TYPE_FLOAT32)); + CHECK(!(HAILO_FORMAT_FLAGS_HOST_ARGMAX & output_metadata.format.flags), HAILO_INVALID_ARGUMENT, "Output {} is marked as argmax, which is not supported for this model.", + m_outputs_metadata.begin()->first); + CHECK(!(HAILO_FORMAT_FLAGS_QUANTIZED & output_metadata.format.flags), HAILO_INVALID_ARGUMENT, "Output {} is marked as quantized, which is not supported for this model.", + m_outputs_metadata.begin()->first); + + return HAILO_SUCCESS; +} + +Expected> SoftmaxPostProcessOp::create(const std::map &inputs_metadata, + std::map &outputs_metadata) +{ + auto op = std::shared_ptr(new (std::nothrow) SoftmaxPostProcessOp(inputs_metadata, outputs_metadata)); + CHECK_AS_EXPECTED(op != nullptr, HAILO_OUT_OF_HOST_MEMORY); + + return std::shared_ptr(std::move(op)); +} + +} /* namespace net_flow */ +} /* namespace hailort */ \ No newline at end of file diff --git a/hailort/libhailort/src/net_flow/ops/softmax_post_process.hpp b/hailort/libhailort/src/net_flow/ops/softmax_post_process.hpp new file mode 100644 index 0000000..9ebdb8a --- /dev/null +++ b/hailort/libhailort/src/net_flow/ops/softmax_post_process.hpp @@ -0,0 +1,139 @@ +/** + * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved. + * Distributed under the MIT license (https://opensource.org/licenses/MIT) +**/ +/** + * @file softmax_post_process.hpp + * @brief: Softmax op perform softmax op as described: https://www.tensorflow.org/api_docs/python/tf/nn/softmax + * A few notes: + * - Support only on features axis + * - Support only on NHWC and NC input data order + **/ + +#ifndef _HAILO_SOFTMAX_POST_PROCESS_HPP_ +#define _HAILO_SOFTMAX_POST_PROCESS_HPP_ + +#include "hailo/hailort.h" +#include "net_flow/ops/op.hpp" +#include "common/utils.hpp" +#include "hailo/quantization.hpp" + +#include + +namespace hailort +{ +namespace net_flow +{ + +#define SOFTMAX_NUM_OF_POSSIBLE_FORMAT_ORDERS (2) // NHWC, NC +#define SOFTMAX_NUM_OF_POSSIBLE_FORMAT_TYPES (4) // Auto, UINT8, UINT16, FLOAT32 + +constexpr std::size_t SOFTMAX_NUMBER_OF_SRCS {1}; +constexpr std::size_t SOFTMAX_NUMBER_OF_DSTS {1}; + +typedef hailo_status (*SoftmaxFunction)(const BufferMetaData &input_metadata, const BufferMetaData &output_metadata, + const std::map &inputs, std::map &outputs); + +class SoftmaxPostProcessOp : public Op +{ + +private: + SoftmaxPostProcessOp(const std::map &inputs_metadata, + const std::map &outputs_metadata) + : Op(inputs_metadata, outputs_metadata, "Softmax-Post-Process") + {} + + template + static hailo_status NHWC_to_NHWC_feature_axis(const BufferMetaData &input_metadata, const BufferMetaData &output_metadata, + const std::map &inputs, std::map &outputs) + { + auto src_ptr = (dst_type*)inputs.begin()->second.data(); + auto dst_ptr = (src_type*)outputs.begin()->second.data(); + const auto src_row_size = input_metadata.shape.width * input_metadata.shape.features; + const auto dst_row_size = output_metadata.shape.width * output_metadata.shape.features; + const auto src_width_size = input_metadata.shape.features; + const auto dst_width_size = output_metadata.shape.features; + + for (uint32_t r = 0; r < input_metadata.shape.height; r++) { // H axis - rows + dst_type *src_row = src_ptr + (r * src_row_size); + src_type *dst_row = dst_ptr + (r * dst_row_size); + for (uint32_t w = 0; w < input_metadata.shape.width; w++) { // W axis - coloums + dst_type *src_col = src_row + (w * src_width_size); + src_type *dst_col = dst_row + (w * dst_width_size); + // In order to avoid overflows, we will perform the following: + // For each HW, we will find the maximal c value and then we will substract this value from + // all of the values in this HW. This will preserve the original softmax values + prevent overflows + src_type max_val = std::numeric_limits::min(); + for (uint32_t c = 0; c < input_metadata.shape.features; c++) { + auto ¤t_value = *(src_col + c); + if (current_value > max_val) + max_val = current_value; + } + dst_type sum_exp = 0; // denominator + for (uint32_t c = 0; c < input_metadata.shape.features; c++) { // C axis - features + auto ¤t_value = *(src_col + c); + current_value -= max_val; // This step preserves the original softmax values + prevent overflows + current_value = std::exp(static_cast(current_value)); // Set src_ptr[c] to e^(src_ptr[c]) so that we only calculate it once + sum_exp += current_value; + } + for (uint32_t c = 0; c < input_metadata.shape.features; c++) { + const auto ¤t_value = *(src_col + c); + dst_col[c] = static_cast(current_value / sum_exp); + } + } + } + return HAILO_SUCCESS; + } + + template + static hailo_status NC_to_NC(const BufferMetaData &input_metadata, const BufferMetaData &output_metadata, + const std::map &inputs, std::map &outputs) + { + (void) output_metadata; + auto src_ptr = (src_type*)inputs.begin()->second.data(); + auto dst_ptr = (dst_type*)outputs.begin()->second.data(); + // In order to avoid overflows, we will perform the following: + // For each HW, we will find the maximal c value and then we will substract this value from + // all of the values in this HW. This will preserve the original softmax values + prevent overflows + src_type max_val = std::numeric_limits::min(); + for (uint32_t c = 0; c < input_metadata.shape.features; c++) { + auto ¤t_value = *(src_ptr + c); + if (current_value > max_val) + max_val = current_value; + } + dst_type sum_exp = 0; + for (uint32_t c = 0; c < input_metadata.shape.features; c++) { + auto ¤t_value = *(src_ptr + c); + current_value -= max_val; // This step preserves the original softmax values + prevent overflows + current_value = std::exp(static_cast(current_value)); // Set src_ptr[c] to e^(src_ptr[c]) + sum_exp += current_value; + } + for (uint32_t c = 0; c < input_metadata.shape.features; c++) { + dst_ptr[c] = static_cast(src_ptr[c] / sum_exp); + } + return HAILO_SUCCESS; + } + + static hailo_status execute_not_supported(const BufferMetaData &input_metadata, const BufferMetaData &output_metadata, + const std::map &inputs, std::map &outputs); + + public: + static Expected> create(const std::map &inputs_metadata, + std::map &outputs_metadata); + virtual hailo_status execute(const std::map &inputs, + std::map &outputs) override; + virtual std::string get_op_description() override; + hailo_status validate_metadata() override; + + // A 3D array of softmax functions to call: + // 1st dim represent the data format order (NHWC and NC are supported) + // 2nd dim represent the input data type (only float_32 is supported) + // 3rd dim represent the output data type (only float_32 is supported) + static SoftmaxFunction m_softmax_function_array[SOFTMAX_NUM_OF_POSSIBLE_FORMAT_ORDERS][SOFTMAX_NUM_OF_POSSIBLE_FORMAT_TYPES][SOFTMAX_NUM_OF_POSSIBLE_FORMAT_TYPES]; + +}; + +} /* namespace net_flow */ +} /* namespace hailort */ + +#endif /* _HAILO_SOFTMAX_POST_PROCESS_HPP_ */ \ No newline at end of file diff --git a/hailort/libhailort/src/net_flow/ops/ssd_post_process.cpp b/hailort/libhailort/src/net_flow/ops/ssd_post_process.cpp index 12816c1..4504630 100644 --- a/hailort/libhailort/src/net_flow/ops/ssd_post_process.cpp +++ b/hailort/libhailort/src/net_flow/ops/ssd_post_process.cpp @@ -16,16 +16,21 @@ namespace hailort namespace net_flow { +hailo_status SSDPostProcessOp::validate_metadata() +{ + auto status = NmsPostProcessOp::validate_metadata(); + if (HAILO_SUCCESS != status) { + return status; + } + + return HAILO_SUCCESS; +} + Expected> SSDPostProcessOp::create(const std::map &inputs_metadata, const std::map &outputs_metadata, const NmsPostProcessConfig &nms_post_process_config, const SSDPostProcessConfig &ssd_post_process_config) { - for (auto &name_to_inputs_metadata : inputs_metadata) { - CHECK_AS_EXPECTED(name_to_inputs_metadata.second.format.order == HAILO_FORMAT_ORDER_NHCW, HAILO_INVALID_ARGUMENT, - "SSDPostProcessOp: Unexpected input format {}", name_to_inputs_metadata.second.format.order); - } - // Validate each anchor is mapped by reg and cls inputs for (const auto ®_to_cls_name : ssd_post_process_config.reg_to_cls_inputs) { CHECK_AS_EXPECTED(ssd_post_process_config.anchors.count(reg_to_cls_name.first), HAILO_INVALID_ARGUMENT, @@ -63,6 +68,7 @@ Expected> SSDPostProcessOp::create(const std::map(new (std::nothrow) SSDPostProcessOp(inputs_metadata, outputs_metadata, nms_post_process_config, ssd_post_process_config)); CHECK_AS_EXPECTED(op != nullptr, HAILO_OUT_OF_HOST_MEMORY); + return std::shared_ptr(std::move(op)); } @@ -73,8 +79,8 @@ hailo_status SSDPostProcessOp::execute(const std::map & m_ssd_config.anchors.size(), inputs.size()); std::vector detections; - std::vector classes_detections_count(m_nms_config.classes, 0); - detections.reserve(m_nms_config.max_proposals_per_class * m_nms_config.classes); + std::vector classes_detections_count(m_nms_config.number_of_classes, 0); + detections.reserve(m_nms_config.max_proposals_per_class * m_nms_config.number_of_classes); for (const auto ®_to_cls : m_ssd_config.reg_to_cls_inputs) { assert(inputs.count(reg_to_cls.first)); assert(inputs.count(reg_to_cls.second)); @@ -113,18 +119,20 @@ hailo_status SSDPostProcessOp::extract_detections(const std::string ®_input_n const auto &layer_anchors = m_ssd_config.anchors[reg_input_name]; assert(layer_anchors.size() % 2 == 0); const size_t num_of_anchors = (layer_anchors.size() / 2); + // TODO: HRT-11044 support mixed data types + auto data_size_in_bytes = HailoRTCommon::get_data_bytes(m_inputs_metadata.begin()->second.format.type); // Validate reg buffer size static const uint32_t reg_entry_size = 4; auto number_of_entries = reg_padded_shape.height * reg_padded_shape.width * num_of_anchors; - auto buffer_size = number_of_entries * reg_entry_size; + auto buffer_size = number_of_entries * reg_entry_size * data_size_in_bytes; CHECK(buffer_size == reg_buffer.size(), HAILO_INVALID_ARGUMENT, "Failed to extract_detections, reg {} buffer_size should be {}, but is {}", reg_input_name, buffer_size, reg_buffer.size()); // Validate cls buffer size - const uint32_t cls_entry_size = m_nms_config.classes; + const uint32_t cls_entry_size = m_nms_config.number_of_classes; number_of_entries = cls_padded_shape.height * cls_padded_shape.width * num_of_anchors; - buffer_size = number_of_entries * cls_entry_size; + buffer_size = number_of_entries * cls_entry_size * data_size_in_bytes; CHECK(buffer_size == cls_buffer.size(), HAILO_INVALID_ARGUMENT, "Failed to extract_detections, cls {} buffer_size should be {}, but is {}", cls_input_name, buffer_size, cls_buffer.size()); @@ -167,7 +175,7 @@ hailo_status SSDPostProcessOp::extract_detections(const std::string ®_input_n detections, classes_detections_count); CHECK_SUCCESS(status); } else if (m_inputs_metadata[reg_input_name].format.type == HAILO_FORMAT_TYPE_FLOAT32) { - // For testing - TODO: Remove after generator tests are in, and return error. + // For testing - TODO: HRT-9341 - Remove after generator tests are in, and return error. auto status = extract_bbox_detections( reg_input_name, cls_input_name, reg_buffer, cls_buffer, diff --git a/hailort/libhailort/src/net_flow/ops/ssd_post_process.hpp b/hailort/libhailort/src/net_flow/ops/ssd_post_process.hpp index 5bc9b10..bdce014 100644 --- a/hailort/libhailort/src/net_flow/ops/ssd_post_process.hpp +++ b/hailort/libhailort/src/net_flow/ops/ssd_post_process.hpp @@ -60,6 +60,7 @@ public: hailo_status execute(const std::map &inputs, std::map &outputs) override; std::string get_op_description() override; + hailo_status validate_metadata() override; // TODO: HRT-10676 static const uint32_t DEFAULT_Y_OFFSET_IDX = 0; static const uint32_t DEFAULT_X_OFFSET_IDX = 1; @@ -92,7 +93,7 @@ private: classes_detections_count[max_id_score_pair.first]++; } } else { - for (uint32_t class_index = 0; class_index < m_nms_config.classes; class_index++) { + for (uint32_t class_index = 0; class_index < m_nms_config.number_of_classes; class_index++) { auto class_id = class_index; if (m_nms_config.background_removal) { if (m_nms_config.background_removal_index == class_index) { diff --git a/hailort/libhailort/src/net_flow/ops/yolo_post_process.cpp b/hailort/libhailort/src/net_flow/ops/yolo_post_process.cpp index 2f6a118..2c4b90a 100644 --- a/hailort/libhailort/src/net_flow/ops/yolo_post_process.cpp +++ b/hailort/libhailort/src/net_flow/ops/yolo_post_process.cpp @@ -17,6 +17,17 @@ namespace hailort namespace net_flow { +hailo_status YOLOv5PostProcessOp::validate_metadata() +{ + auto status = NmsPostProcessOp::validate_metadata(); + if (HAILO_SUCCESS != status) { + return status; + } + + return HAILO_SUCCESS; +} + +//TODO- move to a dedicated module and maybe convert all yolo function to yolov5, HRT-10858 Expected> YOLOv5PostProcessOp::create(const std::map &inputs_metadata, const std::map &outputs_metadata, const NmsPostProcessConfig &nms_post_process_config, @@ -28,26 +39,7 @@ Expected> YOLOv5PostProcessOp::create(const std::map(new (std::nothrow) YOLOv5PostProcessOp(inputs_metadata, outputs_metadata, nms_post_process_config, yolo_post_process_config)); CHECK_AS_EXPECTED(op != nullptr, HAILO_OUT_OF_HOST_MEMORY); - return std::shared_ptr(std::move(op)); -} -Expected> YOLOXPostProcessOp::create(const std::map &inputs_metadata, - const std::map &outputs_metadata, - const NmsPostProcessConfig &nms_post_process_config, - const YoloPostProcessConfig &yolo_post_process_config) -{ - for (auto &name_to_inputs_metadata : inputs_metadata) { - CHECK_AS_EXPECTED(name_to_inputs_metadata.second.format.order == HAILO_FORMAT_ORDER_NHCW, HAILO_INVALID_ARGUMENT, - "YOLOv5PostProcessOp: Unexpected input format {}", name_to_inputs_metadata.second.format.order); - } - auto modified_yolo_post_process_config = yolo_post_process_config; - for (auto &name_to_meta : inputs_metadata) { - std::vector anchors = {1, 1}; - modified_yolo_post_process_config.anchors.insert({name_to_meta.first, anchors}); - } - auto op = std::shared_ptr(new (std::nothrow) YOLOXPostProcessOp(inputs_metadata, outputs_metadata, nms_post_process_config, - modified_yolo_post_process_config)); - CHECK_AS_EXPECTED(op != nullptr, HAILO_OUT_OF_HOST_MEMORY); return std::shared_ptr(std::move(op)); } @@ -58,8 +50,8 @@ hailo_status YOLOPostProcessOp::execute(const std::map m_yolo_config.anchors.size(), inputs.size()); std::vector detections; - std::vector classes_detections_count(m_nms_config.classes, 0); - detections.reserve(m_nms_config.max_proposals_per_class * m_nms_config.classes); + std::vector classes_detections_count(m_nms_config.number_of_classes, 0); + detections.reserve(m_nms_config.max_proposals_per_class * m_nms_config.number_of_classes); for (const auto &name_to_input : inputs) { hailo_status status; auto &name = name_to_input.first; @@ -71,7 +63,7 @@ hailo_status YOLOPostProcessOp::execute(const std::map status = extract_detections(name_to_input.second, input_metadata.quant_info, input_metadata.shape, input_metadata.padded_shape, m_yolo_config.anchors[name], detections, classes_detections_count); } else { - CHECK_SUCCESS(HAILO_INVALID_ARGUMENT, "YOLOv5 post-process received invalid input type"); + CHECK_SUCCESS(HAILO_INVALID_ARGUMENT, "YOLO post-process received invalid input type {}", input_metadata.format.type); } CHECK_SUCCESS(status); } @@ -100,18 +92,6 @@ hailo_bbox_float32_t YOLOv5PostProcessOp::decode(float32_t tx, float32_t ty, flo return hailo_bbox_float32_t{y_min, x_min, (y_min+h), (x_min+w), 0}; } -hailo_bbox_float32_t YOLOXPostProcessOp::decode(float32_t tx, float32_t ty, float32_t tw, float32_t th, - int wa, int ha, uint32_t col, uint32_t row, uint32_t w_stride, uint32_t h_stride) const -{ - auto w = exp(tw) * static_cast(wa) / m_yolo_config.image_width; - auto h = exp(th) * static_cast(ha) / m_yolo_config.image_height; - auto x_center = (tx + static_cast(col)) / static_cast(w_stride); - auto y_center = (ty + static_cast(row)) / static_cast(h_stride); - auto x_min = (x_center - (w / 2.0f)); - auto y_min = (y_center - (h / 2.0f)); - return hailo_bbox_float32_t{y_min, x_min, (y_min+h), (x_min+w), 0}; -} - } // namespace net_flow } // namespace hailort diff --git a/hailort/libhailort/src/net_flow/ops/yolo_post_process.hpp b/hailort/libhailort/src/net_flow/ops/yolo_post_process.hpp index 0c61ca1..049d587 100644 --- a/hailort/libhailort/src/net_flow/ops/yolo_post_process.hpp +++ b/hailort/libhailort/src/net_flow/ops/yolo_post_process.hpp @@ -39,6 +39,7 @@ class YOLOPostProcessOp : public NmsPostProcessOp public: hailo_status execute(const std::map &inputs, std::map &outputs) override; std::string get_op_description() override; + virtual hailo_status validate_metadata() = 0; // TODO: HRT-10676 protected: virtual hailo_bbox_float32_t decode(float32_t tx, float32_t ty, float32_t tw, float32_t th, @@ -92,10 +93,10 @@ private: assert(layer_anchors.size() % 2 == 0); const size_t num_of_anchors = (layer_anchors.size() / 2); - uint32_t entry_size = (uint32_t)((CLASSES_START_INDEX + m_nms_config.classes) * sizeof(DeviceType)); + uint32_t entry_size = (uint32_t)(CLASSES_START_INDEX + m_nms_config.number_of_classes); auto number_of_entries = padded_shape.height * padded_shape.width * num_of_anchors; // TODO: this can also be part of the Op configuration - auto buffer_size = number_of_entries * entry_size; + auto buffer_size = number_of_entries * entry_size * sizeof(DeviceType); CHECK(buffer_size == buffer.size(), HAILO_INVALID_ARGUMENT, "Failed to extract_detections, buffer_size should be {}, but is {}", buffer_size, buffer.size()); @@ -105,7 +106,6 @@ private: for (uint32_t col = 0; col < shape.width; col++) { for (uint32_t anchor = 0; anchor < num_of_anchors; anchor++) { auto entry_idx = (row_size * row) + col + ((anchor * entry_size) * padded_shape.width); - auto objectness = Quantization::dequantize_output(data[entry_idx + OBJECTNESS_OFFSET], quant_info); if (objectness < m_nms_config.nms_score_th) { continue; @@ -130,7 +130,7 @@ private: } } else { - for (uint32_t class_index = 0; class_index < m_nms_config.classes; class_index++) { + for (uint32_t class_index = 0; class_index < m_nms_config.number_of_classes; class_index++) { auto class_entry_idx = entry_idx + ((CLASSES_START_INDEX + class_index) * padded_shape.width); auto class_confidence = Quantization::dequantize_output( data[class_entry_idx], quant_info); @@ -157,6 +157,7 @@ public: const std::map &outputs_metadata, const NmsPostProcessConfig &nms_post_process_config, const YoloPostProcessConfig &yolo_post_process_config); + hailo_status validate_metadata() override; // TODO: HRT-10676 protected: virtual hailo_bbox_float32_t decode(float32_t tx, float32_t ty, float32_t tw, float32_t th, @@ -171,27 +172,6 @@ private: {} }; -class YOLOXPostProcessOp : public YOLOPostProcessOp -{ -public: - static Expected> create(const std::map &inputs_metadata, - const std::map &outputs_metadata, - const NmsPostProcessConfig &nms_post_process_config, - const YoloPostProcessConfig &yolo_post_process_config); - -protected: - virtual hailo_bbox_float32_t decode(float32_t tx, float32_t ty, float32_t tw, float32_t th, - int wa, int ha, uint32_t col, uint32_t row, uint32_t w_stride, uint32_t h_stride) const override; - -private: - YOLOXPostProcessOp(const std::map &inputs_metadata, - const std::map &outputs_metadata, - const NmsPostProcessConfig &nms_post_process_config, - const YoloPostProcessConfig &yolo_post_process_config) - : YOLOPostProcessOp(inputs_metadata, outputs_metadata, nms_post_process_config, yolo_post_process_config, "YOLOX-Post-Process") - {} -}; - } // namespace net_flow } // namespace hailort diff --git a/hailort/libhailort/src/net_flow/ops/yolox_post_process.cpp b/hailort/libhailort/src/net_flow/ops/yolox_post_process.cpp new file mode 100644 index 0000000..23229e8 --- /dev/null +++ b/hailort/libhailort/src/net_flow/ops/yolox_post_process.cpp @@ -0,0 +1,132 @@ +/** + * Copyright (c) 2022 Hailo Technologies Ltd. All rights reserved. + * Distributed under the MIT license (https://opensource.org/licenses/MIT) +**/ +/** + * @file yolox_post_process.cpp + * @brief YOLOX post process + * + **/ + +#include "net_flow/ops/yolox_post_process.hpp" + +namespace hailort +{ +namespace net_flow +{ + +Expected> YOLOXPostProcessOp::create(const std::map &inputs_metadata, + const std::map &outputs_metadata, + const NmsPostProcessConfig &nms_post_process_config, + const YoloxPostProcessConfig &yolox_post_process_config) +{ + auto op = std::shared_ptr(new (std::nothrow) YOLOXPostProcessOp(inputs_metadata, outputs_metadata, nms_post_process_config, + yolox_post_process_config)); + CHECK_AS_EXPECTED(op != nullptr, HAILO_OUT_OF_HOST_MEMORY); + + return std::shared_ptr(std::move(op)); +} + +hailo_status YOLOXPostProcessOp::validate_metadata() +{ + auto status = NmsPostProcessOp::validate_metadata(); + if (HAILO_SUCCESS != status) { + return status; + } + + // Validate regs, clss and objs matching layers have same shape + for (const auto &layer_names : m_yolox_config.input_names) { + CHECK(contains(m_inputs_metadata, layer_names.reg), HAILO_INVALID_ARGUMENT, + "YOLOXPostProcessOp: inputs_metadata does not contain reg layer {}", layer_names.reg); + CHECK(contains(m_inputs_metadata, layer_names.cls), HAILO_INVALID_ARGUMENT, + "YOLOXPostProcessOp: inputs_metadata does not contain cls layer {}", layer_names.cls); + CHECK(contains(m_inputs_metadata, layer_names.obj), HAILO_INVALID_ARGUMENT, + "YOLOXPostProcessOp: inputs_metadata does not contain obj layer {}", layer_names.obj); + + const auto ®_input_metadata = m_inputs_metadata.at(layer_names.reg); + const auto &cls_input_metadata = m_inputs_metadata.at(layer_names.cls); + const auto &obj_input_metadata = m_inputs_metadata.at(layer_names.obj); + + // NOTE: padded shape might be different because features might be different, + // and padding is added when width*features % 8 != 0 + CHECK((reg_input_metadata.shape.height == cls_input_metadata.shape.height) + && (reg_input_metadata.shape.width == cls_input_metadata.shape.width), + HAILO_INVALID_ARGUMENT, "YOLOXPostProcess: reg input {} has different shape than cls input {}", + layer_names.reg, layer_names.cls); + CHECK((obj_input_metadata.shape.height == reg_input_metadata.shape.height) + && (obj_input_metadata.shape.width == reg_input_metadata.shape.width), + HAILO_INVALID_ARGUMENT, "YOLOXPostProcess: reg input {} has different shape than obj input {}", + layer_names.reg, layer_names.obj); + + CHECK((cls_input_metadata.format.type == reg_input_metadata.format.type) + && (cls_input_metadata.format.flags == reg_input_metadata.format.flags) + && (cls_input_metadata.format.order == reg_input_metadata.format.order), + HAILO_INVALID_ARGUMENT, "YOLOXPostProcess: reg input {} has different format than cls input {}", + layer_names.reg, layer_names.cls); + CHECK((obj_input_metadata.format.type == reg_input_metadata.format.type) + && (obj_input_metadata.format.flags == reg_input_metadata.format.flags) + && (obj_input_metadata.format.order == reg_input_metadata.format.order), + HAILO_INVALID_ARGUMENT, "YOLOXPostProcess: reg input {} has different format than obj input {}", + layer_names.reg, layer_names.obj); + + } + + return HAILO_SUCCESS; +} + +hailo_status YOLOXPostProcessOp::execute(const std::map &inputs, std::map &outputs) +{ + std::vector detections; + std::vector classes_detections_count(m_nms_config.number_of_classes, 0); + detections.reserve(m_nms_config.max_proposals_per_class * m_nms_config.number_of_classes); + for (const auto &layers_names_triplet : m_yolox_config.input_names) { + hailo_status status; + assert(contains(inputs, layers_names_triplet.cls)); + assert(contains(inputs, layers_names_triplet.obj)); + assert(contains(inputs, layers_names_triplet.reg)); + + auto &input_metadata = m_inputs_metadata[layers_names_triplet.reg]; + if (input_metadata.format.type == HAILO_FORMAT_TYPE_UINT8) { + status = extract_detections(layers_names_triplet, inputs.at(layers_names_triplet.reg), inputs.at(layers_names_triplet.cls), + inputs.at(layers_names_triplet.obj), detections, classes_detections_count); + } else if (input_metadata.format.type == HAILO_FORMAT_TYPE_UINT16) { + status = extract_detections(layers_names_triplet, inputs.at(layers_names_triplet.reg), inputs.at(layers_names_triplet.cls), + inputs.at(layers_names_triplet.obj), detections, classes_detections_count); + } else { + CHECK_SUCCESS(HAILO_INVALID_ARGUMENT, "YOLO post-process received invalid input type {}", input_metadata.format.type); + } + + CHECK_SUCCESS(status); + } + + return hailo_nms_format(std::move(detections), outputs.begin()->second, classes_detections_count); +} + +hailo_bbox_float32_t YOLOXPostProcessOp::decode(float32_t tx, float32_t ty, float32_t tw, float32_t th, + uint32_t col, uint32_t row, float32_t reg_shape_width, float32_t reg_shape_height) const +{ + /** + * Note that the calculations are bit different from the source (In order to save some run time) + * Each "/ reg_shape_width" is equivalent to "* w_stride / m_yolox_config.image_width". + * Each "/ reg_shape_height" is equivalent to "* h_stride / m_yolox_config.image_height". + **/ + auto w = exp(tw) / reg_shape_width; + auto h = exp(th) / reg_shape_height; + auto x_center = (tx + static_cast(col)) / reg_shape_width; + auto y_center = (ty + static_cast(row)) / reg_shape_height; + auto x_min = (x_center - (w / 2.0f)); + auto y_min = (y_center - (h / 2.0f)); + + return hailo_bbox_float32_t{y_min, x_min, (y_min+h), (x_min+w), 0}; +} + +std::string YOLOXPostProcessOp::get_op_description() +{ + auto nms_config_info = get_nms_config_description(); + auto config_info = fmt::format("Name: {}, {}, Image height: {:.2f}, Image width: {:.2f}", + m_name, nms_config_info, m_yolox_config.image_height, m_yolox_config.image_width); + return config_info; +} + +} +} diff --git a/hailort/libhailort/src/net_flow/ops/yolox_post_process.hpp b/hailort/libhailort/src/net_flow/ops/yolox_post_process.hpp new file mode 100644 index 0000000..de5d268 --- /dev/null +++ b/hailort/libhailort/src/net_flow/ops/yolox_post_process.hpp @@ -0,0 +1,175 @@ +/** + * Copyright (c) 2022 Hailo Technologies Ltd. All rights reserved. + * Distributed under the MIT license (https://opensource.org/licenses/MIT) +**/ +/** + * @file yolox_post_process.hpp + * @brief YOLOX post process + * + **/ + +#ifndef _HAILO_YOLOX_POST_PROCESS_HPP_ +#define _HAILO_YOLOX_POST_PROCESS_HPP_ + +#include "net_flow/ops/nms_post_process.hpp" + +namespace hailort +{ +namespace net_flow +{ + +struct MatchingLayersNames +{ + // Regression layer + std::string reg; + + // Objectness layer + std::string obj; + + // Classifications layer + std::string cls; +}; + +struct YoloxPostProcessConfig +{ + // The image height. + float32_t image_height = 0; + + // The image width. + float32_t image_width = 0; + + // A vector off three strings that represents the relations between the outputs names. + std::vector input_names; +}; + +class YOLOXPostProcessOp : public NmsPostProcessOp +{ +public: + static Expected> create(const std::map &inputs_metadata, + const std::map &outputs_metadata, + const NmsPostProcessConfig &nms_post_process_config, + const YoloxPostProcessConfig &yolo_post_process_config); + + hailo_status execute(const std::map &inputs, std::map &outputs) override; + std::string get_op_description() override; + hailo_status validate_metadata() override;// TODO: HRT-10676 + +private: + template + hailo_status extract_detections(const MatchingLayersNames &layers_names, const MemoryView ®_buffer, const MemoryView &cls_buffer, + const MemoryView &obj_buffer, std::vector &detections, std::vector &classes_detections_count) + { + const auto ®_shape = m_inputs_metadata[layers_names.reg].shape; + const auto ®_padded_shape = m_inputs_metadata[layers_names.reg].padded_shape; + const auto &cls_padded_shape = m_inputs_metadata[layers_names.cls].padded_shape; + const auto &obj_padded_shape = m_inputs_metadata[layers_names.obj].padded_shape; + const auto ®_quant_info = m_inputs_metadata[layers_names.reg].quant_info; + const auto &cls_quant_info = m_inputs_metadata[layers_names.cls].quant_info; + const auto &obj_quant_info = m_inputs_metadata[layers_names.obj].quant_info; + + static const uint32_t X_INDEX = 0; + static const uint32_t Y_INDEX = 1; + static const uint32_t W_INDEX = 2; + static const uint32_t H_INDEX = 3; + + const uint32_t X_OFFSET = X_INDEX * reg_padded_shape.width; + const uint32_t Y_OFFSET = Y_INDEX * reg_padded_shape.width; + const uint32_t W_OFFSET = W_INDEX * reg_padded_shape.width; + const uint32_t H_OFFSET = H_INDEX * reg_padded_shape.width; + + static const uint32_t CLASSES_START_INDEX = 0; + + // Validate regression buffer size + static const uint32_t reg_entry_size = 4; + auto number_of_entries = reg_padded_shape.height * reg_padded_shape.width; + auto buffer_size = number_of_entries * reg_entry_size * sizeof(DeviceType); + CHECK(buffer_size == reg_buffer.size(), HAILO_INVALID_ARGUMENT, + "Failed to extract_detections, reg {} buffer_size should be {}, but is {}", layers_names.reg, buffer_size, reg_buffer.size()); + + // Validate classes buffer size + const uint32_t cls_entry_size = m_nms_config.number_of_classes; + number_of_entries = cls_padded_shape.height * cls_padded_shape.width; + buffer_size = number_of_entries * cls_entry_size * sizeof(DeviceType); + CHECK(buffer_size == cls_buffer.size(), HAILO_INVALID_ARGUMENT, + "Failed to extract_detections, cls {} buffer_size should be {}, but is {}", layers_names.cls, buffer_size, cls_buffer.size()); + + // Validate objectness buffer size + static const uint32_t obj_entry_size = 1; + number_of_entries = obj_padded_shape.height * obj_padded_shape.width; + buffer_size = number_of_entries * obj_entry_size * sizeof(DeviceType); + CHECK(buffer_size == obj_buffer.size(), HAILO_INVALID_ARGUMENT, + "Failed to extract_detections, obj {} buffer_size should be {}, but is {}", layers_names.obj, buffer_size, obj_buffer.size()); + + auto reg_row_size = reg_padded_shape.width * reg_padded_shape.features; + auto cls_row_size = cls_padded_shape.width * cls_padded_shape.features; + auto obj_row_size = obj_padded_shape.width * obj_padded_shape.features; + + DeviceType *reg_data = (DeviceType*)reg_buffer.data(); + DeviceType *obj_data = (DeviceType*)obj_buffer.data(); + DeviceType *cls_data = (DeviceType*)cls_buffer.data(); + + for (uint32_t row = 0; row < reg_shape.height; row++) { + for (uint32_t col = 0; col < reg_shape.width; col++) { + auto obj_idx = (obj_row_size * row) + col; + auto objectness = Quantization::dequantize_output(obj_data[obj_idx], obj_quant_info); + + if (objectness < m_nms_config.nms_score_th) { + continue; + } + + auto reg_idx = (reg_row_size * row) + col; + auto cls_idx = (cls_row_size * row) + col; + + auto tx = Quantization::dequantize_output(reg_data[reg_idx + X_OFFSET], reg_quant_info); + auto ty = Quantization::dequantize_output(reg_data[reg_idx + Y_OFFSET], reg_quant_info); + auto tw = Quantization::dequantize_output(reg_data[reg_idx + W_OFFSET], reg_quant_info); + auto th = Quantization::dequantize_output(reg_data[reg_idx + H_OFFSET], reg_quant_info); + auto bbox = decode(tx, ty, tw, th, col, row, static_cast(reg_shape.width), static_cast(reg_shape.height)); + + if (m_nms_config.cross_classes) { + // Pre-NMS optimization. If NMS checks IOU over different classes, only the maximum class is relevant + auto max_id_score_pair = get_max_class(cls_data, cls_idx, CLASSES_START_INDEX, objectness, cls_quant_info, cls_padded_shape.width); + bbox.score = max_id_score_pair.second; + if (max_id_score_pair.second >= m_nms_config.nms_score_th) { + detections.emplace_back(DetectionBbox(bbox, max_id_score_pair.first)); + classes_detections_count[max_id_score_pair.first]++; + } + } + else { + for (uint32_t curr_class_idx = 0; curr_class_idx < m_nms_config.number_of_classes; curr_class_idx++) { + auto class_entry_idx = cls_idx + (curr_class_idx * cls_padded_shape.width); + auto class_confidence = Quantization::dequantize_output( + cls_data[class_entry_idx], cls_quant_info); + auto class_score = class_confidence * objectness; + if (class_score >= m_nms_config.nms_score_th) { + bbox.score = class_score; + detections.emplace_back(DetectionBbox(bbox, curr_class_idx)); + classes_detections_count[curr_class_idx]++; + } + } + } + } + } + + return HAILO_SUCCESS; + } + + virtual hailo_bbox_float32_t decode(float32_t tx, float32_t ty, float32_t tw, float32_t th, + uint32_t col, uint32_t row, float32_t w_stride, float32_t h_stride) const; + + YoloxPostProcessConfig m_yolox_config; + + YOLOXPostProcessOp(const std::map &inputs_metadata, + const std::map &outputs_metadata, + const NmsPostProcessConfig &nms_post_process_config, + const YoloxPostProcessConfig &yolo_post_process_config) + : NmsPostProcessOp(inputs_metadata, outputs_metadata, nms_post_process_config, "YOLOX-Post-Process") + , m_yolox_config(yolo_post_process_config) + {} + +}; + +} // namespace net_flow +} // namespace hailort + +#endif // _HAILO_YOLOX_POST_PROCESS_HPP_ diff --git a/hailort/libhailort/src/net_flow/pipeline/pipeline.cpp b/hailort/libhailort/src/net_flow/pipeline/pipeline.cpp index 03abf9b..521cc86 100644 --- a/hailort/libhailort/src/net_flow/pipeline/pipeline.cpp +++ b/hailort/libhailort/src/net_flow/pipeline/pipeline.cpp @@ -381,9 +381,9 @@ hailo_status PipelinePad::deactivate() return m_element.deactivate(); } -hailo_status PipelinePad::post_deactivate() +hailo_status PipelinePad::post_deactivate(bool should_clear_abort) { - return m_element.post_deactivate(); + return m_element.post_deactivate(should_clear_abort); } hailo_status PipelinePad::clear() @@ -406,9 +406,9 @@ hailo_status PipelinePad::wait_for_finish() return m_element.wait_for_finish(); } -hailo_status PipelinePad::resume() +hailo_status PipelinePad::clear_abort() { - return m_element.resume(); + return m_element.clear_abort(); } hailo_status PipelinePad::run_push(PipelineBuffer &&buffer) @@ -582,9 +582,9 @@ hailo_status PipelineElement::deactivate() return execute_deactivate(); } -hailo_status PipelineElement::post_deactivate() +hailo_status PipelineElement::post_deactivate(bool should_clear_abort) { - return execute_post_deactivate(); + return execute_post_deactivate(should_clear_abort); } hailo_status PipelineElement::clear() @@ -602,9 +602,9 @@ hailo_status PipelineElement::abort() return execute_abort(); } -hailo_status PipelineElement::resume() +hailo_status PipelineElement::clear_abort() { - return execute_resume(); + return execute_clear_abort(); } hailo_status PipelineElement::wait_for_finish() @@ -622,9 +622,9 @@ hailo_status PipelineElement::execute_deactivate() return execute([&](auto *pad){ return pad->deactivate(); }); } -hailo_status PipelineElement::execute_post_deactivate() +hailo_status PipelineElement::execute_post_deactivate(bool should_clear_abort) { - return execute([&](auto *pad){ return pad->post_deactivate(); }); + return execute([&](auto *pad){ return pad->post_deactivate(should_clear_abort); }); } hailo_status PipelineElement::execute_clear() @@ -642,9 +642,9 @@ hailo_status PipelineElement::execute_abort() return execute([&](auto *pad){ return pad->abort(); }); } -hailo_status PipelineElement::execute_resume() +hailo_status PipelineElement::execute_clear_abort() { - return execute([&](auto *pad){ return pad->resume(); }); + return execute([&](auto *pad){ return pad->clear_abort(); }); } hailo_status PipelineElement::execute_wait_for_finish() @@ -830,7 +830,7 @@ hailo_status BaseQueueElement::execute_activate() return HAILO_SUCCESS; } -hailo_status BaseQueueElement::execute_post_deactivate() +hailo_status BaseQueueElement::execute_post_deactivate(bool should_clear_abort) { hailo_status status = m_deactivation_event.wait(INIFINITE_TIMEOUT()); if (HAILO_SUCCESS != status) { @@ -842,7 +842,7 @@ hailo_status BaseQueueElement::execute_post_deactivate() LOGGER__ERROR("Failed to reset of deactivation event in {} with status {}", name(), status); } - return PipelineElement::execute_post_deactivate(); + return PipelineElement::execute_post_deactivate(should_clear_abort); } hailo_status BaseQueueElement::execute_clear() @@ -877,14 +877,12 @@ hailo_status PushQueueElement::execute_abort() return m_activation_event.signal(); } -hailo_status BaseQueueElement::execute_resume() +hailo_status BaseQueueElement::execute_clear_abort() { auto status = m_shutdown_event->reset(); CHECK_SUCCESS(status); m_pipeline_status->store(HAILO_SUCCESS); - status = PipelineElement::execute_resume(); - CHECK_SUCCESS(status); - return m_activation_event.signal(); + return PipelineElement::execute_clear_abort(); } hailo_status BaseQueueElement::set_timeout(std::chrono::milliseconds timeout) @@ -1258,7 +1256,8 @@ Expected UserBufferQueueElement::run_pull(PipelineBuffer &&optio LOGGER__INFO("Shutdown event was signaled in dequeue of queue element {}!", name()); return make_unexpected(HAILO_SHUTDOWN_EVENT_SIGNALED); } - CHECK_AS_EXPECTED(HAILO_TIMEOUT != output.status(), HAILO_TIMEOUT, "{} (D2H) failed with status={} (timeout={}ms)", name(), HAILO_TIMEOUT, m_timeout.count()); + CHECK_AS_EXPECTED(HAILO_TIMEOUT != output.status(), HAILO_TIMEOUT, "{} (D2H) failed with status={} (timeout={}ms)", + name(), HAILO_TIMEOUT, m_timeout.count()); CHECK_EXPECTED(output); CHECK_AS_EXPECTED(output->data() == optional.data(), HAILO_INTERNAL_FAILURE, "The buffer received in {} was not the same as the user buffer!", name()); @@ -1461,6 +1460,7 @@ hailo_status BaseDemuxElement::execute_activate() } m_is_activated = true;// TODO Should this always be true, no matter the status of source().activate()? m_was_stream_aborted = false; + return PipelineElement::execute_activate(); } @@ -1488,12 +1488,12 @@ hailo_status BaseDemuxElement::execute_deactivate() return HAILO_SUCCESS; } -hailo_status BaseDemuxElement::execute_post_deactivate() +hailo_status BaseDemuxElement::execute_post_deactivate(bool should_clear_abort) { for (uint32_t i = 0; i < m_was_source_called.size(); i++) { m_was_source_called[i] = false; } - return PipelineElement::execute_post_deactivate(); + return PipelineElement::execute_post_deactivate(should_clear_abort); } hailo_status BaseDemuxElement::execute_abort() diff --git a/hailort/libhailort/src/net_flow/pipeline/pipeline.hpp b/hailort/libhailort/src/net_flow/pipeline/pipeline.hpp index 702f8a7..77d56dc 100644 --- a/hailort/libhailort/src/net_flow/pipeline/pipeline.hpp +++ b/hailort/libhailort/src/net_flow/pipeline/pipeline.hpp @@ -204,12 +204,12 @@ public: hailo_status activate(); hailo_status deactivate(); - hailo_status post_deactivate(); + hailo_status post_deactivate(bool should_clear_abort); hailo_status clear(); hailo_status flush(); hailo_status abort(); hailo_status wait_for_finish(); - hailo_status resume(); + hailo_status clear_abort(); virtual hailo_status run_push(PipelineBuffer &&buffer); virtual Expected run_pull(PipelineBuffer &&optional = PipelineBuffer()); void set_push_complete_callback(PushCompleteCallback push_complete_callback); @@ -252,11 +252,11 @@ public: hailo_status activate(); hailo_status deactivate(); - hailo_status post_deactivate(); + hailo_status post_deactivate(bool should_clear_abort); hailo_status clear(); hailo_status flush(); hailo_status abort(); - hailo_status resume(); + hailo_status clear_abort(); hailo_status wait_for_finish(); virtual hailo_status run_push(PipelineBuffer &&buffer) = 0; virtual Expected run_pull(PipelineBuffer &&optional, const PipelinePad &source) = 0; @@ -291,11 +291,11 @@ protected: virtual std::vector execution_pads() = 0; virtual hailo_status execute_activate(); virtual hailo_status execute_deactivate(); - virtual hailo_status execute_post_deactivate(); + virtual hailo_status execute_post_deactivate(bool should_clear_abort); virtual hailo_status execute_clear(); virtual hailo_status execute_flush(); virtual hailo_status execute_abort(); - virtual hailo_status execute_resume(); + virtual hailo_status execute_clear_abort(); virtual hailo_status execute_wait_for_finish(); virtual hailo_status execute(std::function); @@ -372,9 +372,9 @@ protected: hailo_status pipeline_status(); virtual hailo_status execute_activate() override; - virtual hailo_status execute_post_deactivate() override; + virtual hailo_status execute_post_deactivate(bool should_clear_abort) override; virtual hailo_status execute_clear() override; - virtual hailo_status execute_resume() override; + virtual hailo_status execute_clear_abort() override; virtual hailo_status execute_wait_for_finish() override; /// Starts/stops the queue thread. This functions needs to be called on subclasses ctor and dtor @@ -527,7 +527,7 @@ public: protected: virtual hailo_status execute_activate() override; virtual hailo_status execute_deactivate() override; - virtual hailo_status execute_post_deactivate() override; + virtual hailo_status execute_post_deactivate(bool should_clear_abort) override; virtual hailo_status execute_abort() override; virtual Expected> action(PipelineBuffer &&input) = 0; virtual std::vector execution_pads() override; diff --git a/hailort/libhailort/src/net_flow/pipeline/vstream.cpp b/hailort/libhailort/src/net_flow/pipeline/vstream.cpp index adfc6fa..426bd4b 100644 --- a/hailort/libhailort/src/net_flow/pipeline/vstream.cpp +++ b/hailort/libhailort/src/net_flow/pipeline/vstream.cpp @@ -7,8 +7,10 @@ * @brief Implementation of the virtual stream **/ +#include "common/utils.hpp" #include "hailo/vstream.hpp" #include "hailo/hailort_defaults.hpp" +#include "hailo/hailort_common.hpp" #include "common/runtime_statistics_internal.hpp" @@ -130,8 +132,14 @@ Expected PreInferElement::action(PipelineBuffer &&input, Pipelin Expected> PostInferElement::create(const hailo_3d_image_shape_t &src_image_shape, const hailo_format_t &src_format, const hailo_3d_image_shape_t &dst_image_shape, const hailo_format_t &dst_format, const hailo_quant_info_t &dst_quant_info, const hailo_nms_info_t &nms_info, const std::string &name, - hailo_pipeline_elem_stats_flags_t elem_flags, std::shared_ptr> pipeline_status) + hailo_pipeline_elem_stats_flags_t elem_flags, std::shared_ptr> pipeline_status, + std::chrono::milliseconds timeout, hailo_vstream_stats_flags_t vstream_flags, EventPtr shutdown_event, + size_t buffer_pool_size) { + auto frame_size = (dst_format.order == HAILO_FORMAT_ORDER_HAILO_NMS) ? HailoRTCommon::get_nms_host_frame_size(nms_info, dst_format) : HailoRTCommon::get_frame_size(dst_image_shape, dst_format); + auto buffer_pool_expected = BufferPool::create(frame_size, buffer_pool_size, shutdown_event, elem_flags, vstream_flags); + CHECK_EXPECTED(buffer_pool_expected, "Failed creating BufferPool for {}", name); + auto transform_context = OutputTransformContext::create(src_image_shape, src_format, dst_image_shape, dst_format, dst_quant_info, nms_info); CHECK_EXPECTED(transform_context, "Failed Creating OutputTransformContext"); @@ -140,7 +148,7 @@ Expected> PostInferElement::create(const hailo CHECK_EXPECTED(duration_collector); auto post_infer_elem_ptr = make_shared_nothrow(transform_context.release(), - name, duration_collector.release(), std::move(pipeline_status)); + name, duration_collector.release(), std::move(pipeline_status), buffer_pool_expected.release(), timeout); CHECK_AS_EXPECTED(nullptr != post_infer_elem_ptr, HAILO_OUT_OF_HOST_MEMORY); LOGGER__INFO("Created {}", post_infer_elem_ptr->name()); @@ -150,17 +158,22 @@ Expected> PostInferElement::create(const hailo Expected> PostInferElement::create(const hailo_3d_image_shape_t &src_image_shape, const hailo_format_t &src_format, const hailo_3d_image_shape_t &dst_image_shape, const hailo_format_t &dst_format, const hailo_quant_info_t &dst_quant_info, const hailo_nms_info_t &nms_info, - const std::string &name, const hailo_vstream_params_t &vstream_params, std::shared_ptr> pipeline_status) + const std::string &name, const hailo_vstream_params_t &vstream_params, std::shared_ptr> pipeline_status, + EventPtr shutdown_event) { return PostInferElement::create(src_image_shape, src_format, dst_image_shape, dst_format, dst_quant_info, nms_info, - name, vstream_params.pipeline_elements_stats_flags, pipeline_status); + name, vstream_params.pipeline_elements_stats_flags, pipeline_status, std::chrono::milliseconds(vstream_params.timeout_ms), + vstream_params.vstream_stats_flags, shutdown_event, vstream_params.queue_size); } PostInferElement::PostInferElement(std::unique_ptr &&transform_context, const std::string &name, DurationCollector &&duration_collector, - std::shared_ptr> &&pipeline_status) : + std::shared_ptr> &&pipeline_status, + BufferPoolPtr buffer_pool, std::chrono::milliseconds timeout) : FilterElement(name, std::move(duration_collector), std::move(pipeline_status)), - m_transform_context(std::move(transform_context)) + m_transform_context(std::move(transform_context)), + m_pool(buffer_pool), + m_timeout(timeout) {} hailo_status PostInferElement::run_push(PipelineBuffer &&/*buffer*/) @@ -184,18 +197,30 @@ std::string PostInferElement::description() const Expected PostInferElement::action(PipelineBuffer &&input, PipelineBuffer &&optional) { - CHECK_AS_EXPECTED(optional, HAILO_INVALID_ARGUMENT, "Optional buffer must be valid in {}!", name()); + auto buffer = m_pool->get_available_buffer(std::move(optional), m_timeout); + if (HAILO_SHUTDOWN_EVENT_SIGNALED == buffer.status()) { + return make_unexpected(buffer.status()); + } + CHECK_EXPECTED(buffer, "{} (D2H) failed with status={}", name(), buffer.status()); // Note: The latency to be measured starts as the buffer is read from the HW (it's 'input' in this case) - optional.set_metadata(input.get_metadata()); + buffer->set_metadata(input.get_metadata()); - auto dst = optional.as_view(); + auto dst = buffer->as_view(); m_duration_collector.start_measurement(); const auto status = m_transform_context->transform(input.as_view(), dst); m_duration_collector.complete_measurement(); CHECK_SUCCESS_AS_EXPECTED(status); - return std::move(optional); + return buffer.release(); +} + +std::vector PostInferElement::get_queue_size_accumulators() +{ + if (nullptr == m_pool->get_queue_size_accumulator()) { + return std::vector(); + } + return {m_pool->get_queue_size_accumulator()}; } static hailo_nms_info_t fuse_nms_info(const std::vector &nms_infos) @@ -426,6 +451,116 @@ Expected> TransformDemuxElement::action(PipelineBuff return outputs; } +Expected> ArgmaxPostProcessElement::create(std::shared_ptr argmax_op, + const std::string &name, hailo_pipeline_elem_stats_flags_t elem_flags, + std::shared_ptr> pipeline_status) +{ + auto duration_collector = DurationCollector::create(elem_flags); + CHECK_EXPECTED(duration_collector); + auto argmax_elem_ptr = make_shared_nothrow(argmax_op, + name, duration_collector.release(), std::move(pipeline_status)); + CHECK_AS_EXPECTED(nullptr != argmax_elem_ptr, HAILO_OUT_OF_HOST_MEMORY); + LOGGER__INFO("Created {}", argmax_elem_ptr->name()); + return argmax_elem_ptr; +} + +ArgmaxPostProcessElement::ArgmaxPostProcessElement(std::shared_ptr argmax_op, const std::string &name, + DurationCollector &&duration_collector, + std::shared_ptr> &&pipeline_status) : + FilterElement(name, std::move(duration_collector), std::move(pipeline_status)), + m_argmax_op(argmax_op) +{} + +hailo_status ArgmaxPostProcessElement::run_push(PipelineBuffer &&/*buffer*/) +{ + LOGGER__ERROR("ArgmaxPostProcessElement does not support run_push operation"); + return HAILO_INVALID_OPERATION; +} + +PipelinePad &ArgmaxPostProcessElement::next_pad() +{ + // Note: The next elem to be run is upstream from this elem (i.e. buffers are pulled) + return *m_sinks[0].prev(); +} + +std::string ArgmaxPostProcessElement::description() const +{ + std::stringstream element_description; + element_description << "(" << this->name() << " | " << m_argmax_op->get_op_description() << ")"; + return element_description.str(); +} + +Expected ArgmaxPostProcessElement::action(PipelineBuffer &&input, PipelineBuffer &&optional) +{ + std::map inputs; + std::map outputs; + auto &input_name = m_argmax_op->inputs_metadata().begin()->first; + auto &output_name = m_argmax_op->outputs_metadata().begin()->first; + inputs.insert({input_name, input.as_view()}); + outputs.insert({output_name, optional.as_view()}); + m_duration_collector.start_measurement(); + auto post_process_result = m_argmax_op->execute(inputs, outputs); + CHECK_SUCCESS_AS_EXPECTED(post_process_result); + m_duration_collector.complete_measurement(); + + return std::move(optional); +} + +Expected> SoftmaxPostProcessElement::create(std::shared_ptr softmax_op, + const std::string &name, hailo_pipeline_elem_stats_flags_t elem_flags, + std::shared_ptr> pipeline_status) +{ + auto duration_collector = DurationCollector::create(elem_flags); + CHECK_EXPECTED(duration_collector); + auto softmax_elem_ptr = make_shared_nothrow(softmax_op, + name, duration_collector.release(), std::move(pipeline_status)); + CHECK_AS_EXPECTED(nullptr != softmax_elem_ptr, HAILO_OUT_OF_HOST_MEMORY); + LOGGER__INFO("Created {}", softmax_elem_ptr->name()); + return softmax_elem_ptr; +} + +SoftmaxPostProcessElement::SoftmaxPostProcessElement(std::shared_ptr softmax_op, const std::string &name, + DurationCollector &&duration_collector, + std::shared_ptr> &&pipeline_status) : + FilterElement(name, std::move(duration_collector), std::move(pipeline_status)), + m_softmax_op(softmax_op) +{} + +hailo_status SoftmaxPostProcessElement::run_push(PipelineBuffer &&/*buffer*/) +{ + LOGGER__ERROR("SoftmaxPostProcessElement does not support run_push operation"); + return HAILO_INVALID_OPERATION; +} + +PipelinePad &SoftmaxPostProcessElement::next_pad() +{ + // Note: The next elem to be run is upstream from this elem (i.e. buffers are pulled) + return *m_sinks[0].prev(); +} + +std::string SoftmaxPostProcessElement::description() const +{ + std::stringstream element_description; + element_description << "(" << this->name() << " | " << m_softmax_op->get_op_description() << ")"; + return element_description.str(); +} + +Expected SoftmaxPostProcessElement::action(PipelineBuffer &&input, PipelineBuffer &&optional) +{ + std::map inputs; + std::map outputs; + auto &input_name = m_softmax_op->inputs_metadata().begin()->first; + auto &output_name = m_softmax_op->outputs_metadata().begin()->first; + inputs.insert({input_name, input.as_view()}); + outputs.insert({output_name, optional.as_view()}); + m_duration_collector.start_measurement(); + auto post_process_result = m_softmax_op->execute(inputs, outputs); + CHECK_SUCCESS_AS_EXPECTED(post_process_result); + m_duration_collector.complete_measurement(); + + return std::move(optional); +} + BaseVStream::BaseVStream(const hailo_vstream_info_t &vstream_info, const hailo_vstream_params_t &vstream_params, std::shared_ptr pipeline_entry, std::vector> &&pipeline, std::shared_ptr> &&pipeline_status, @@ -494,28 +629,38 @@ hailo_status BaseVStream::start_vstream() auto status = m_shutdown_event->reset(); CHECK_SUCCESS(status); - LOGGER__DEBUG("Activating {}...", name()); - status = m_entry_element->activate(); - CHECK_SUCCESS(status); - status = resume(); CHECK(((status == HAILO_SUCCESS) || (status == HAILO_STREAM_NOT_ACTIVATED)), status, "Failed to resume stream in {}", name()); + LOGGER__DEBUG("Activating {}...", name()); + status = m_entry_element->activate(); + CHECK_SUCCESS(status); + m_is_activated = true; return HAILO_SUCCESS; } hailo_status BaseVStream::abort() { + auto status = m_entry_element->abort(); + CHECK_SUCCESS(status); m_is_aborted = true; - return m_entry_element->abort(); + + return HAILO_SUCCESS; } hailo_status BaseVStream::resume() { + auto status = m_entry_element->clear_abort(); + CHECK_SUCCESS(status); m_is_aborted = false; - return m_entry_element->resume(); + + if (m_is_activated) { + status = m_entry_element->activate(); + CHECK_SUCCESS(status); + } + return HAILO_SUCCESS; } hailo_status BaseVStream::stop_vstream() @@ -528,7 +673,10 @@ hailo_status BaseVStream::stop_vstream() LOGGER__WARNING("Failed deactivate of vstream {} status {}", name(), status); } - status = m_entry_element->post_deactivate(); + // If VStream was aborted, do not clear low-level stream abortion, + // otherwise flush would be called on low-level stream d-tor when there is no receiver. + auto should_clear_abort = (!m_is_aborted); + status = m_entry_element->post_deactivate(should_clear_abort); if (HAILO_SUCCESS != status) { LOGGER__WARNING("Failed post deactivate of vstream {} status {}", name(), status); } @@ -538,9 +686,12 @@ hailo_status BaseVStream::stop_vstream() hailo_status BaseVStream::stop_and_clear() { - auto status = m_core_op_activated_event->wait(std::chrono::milliseconds(0)); - CHECK(HAILO_TIMEOUT == status, HAILO_INVALID_OPERATION, - "Trying to clear {} vstream before its network group is deactivated", name()); + auto status = HAILO_SUCCESS; + if (nullptr != m_core_op_activated_event) { + status = m_core_op_activated_event->wait(std::chrono::milliseconds(0)); + CHECK(HAILO_TIMEOUT == status, HAILO_INVALID_OPERATION, + "Trying to clear {} vstream before its network group is deactivated", name()); + } status = stop_vstream(); CHECK_SUCCESS(status); @@ -553,8 +704,8 @@ hailo_status BaseVStream::stop_and_clear() LOGGER__TRACE("Overwritting current pipeline status {}", curr_pipeline_status); m_pipeline_status->store(HAILO_SUCCESS); } - - return HAILO_SUCCESS; + + return status; } size_t BaseVStream::get_frame_size() const @@ -758,6 +909,11 @@ hailo_status InputVStream::after_fork_in_child() return m_vstream->after_fork_in_child(); } +bool InputVStream::is_aborted() +{ + return m_vstream->is_aborted(); +} + InputVStream::InputVStream(std::shared_ptr vstream) : m_vstream(std::move(vstream)) {} Expected OutputVStream::create( @@ -902,6 +1058,11 @@ hailo_status OutputVStream::after_fork_in_child() return m_vstream->after_fork_in_child(); } +bool OutputVStream::is_aborted() +{ + return m_vstream->is_aborted(); +} + OutputVStream::OutputVStream(std::shared_ptr vstream) : m_vstream(std::move(vstream)) {} std::map get_pipeline_accumulators_by_type( @@ -1008,11 +1169,6 @@ InputVStreamImpl::InputVStreamImpl(const hailo_vstream_info_t &vstream_info, con InputVStreamImpl::~InputVStreamImpl() { (void)stop_vstream(); - if (m_is_aborted) { - // If VStream was aborted, do not clear low-level stream abortion, - // otherwise flush would be called on low-level stream d-tor when there is no receiver. - (void)abort(); - } } hailo_status InputVStreamImpl::write(const MemoryView &buffer) @@ -1074,7 +1230,7 @@ InputVStreamClient::InputVStreamClient(std::unique_ptr client, InputVStreamClient::~InputVStreamClient() { - auto reply = m_client->InputVStream_release(m_handle); + auto reply = m_client->InputVStream_release(m_handle, OsUtils::get_curr_pid()); if (reply != HAILO_SUCCESS) { LOGGER__CRITICAL("InputVStream_release failed!"); } @@ -1103,6 +1259,24 @@ hailo_status InputVStreamClient::resume() return m_client->InputVStream_resume(m_handle); } +hailo_status InputVStreamClient::stop_and_clear() +{ + auto expected_client = HailoRtRpcClientUtils::create_client(); + CHECK_EXPECTED_AS_STATUS(expected_client); + auto stop_and_clear_client = expected_client.release(); + + return stop_and_clear_client->InputVStream_stop_and_clear(m_handle); +} + +hailo_status InputVStreamClient::start_vstream() +{ + auto expected_client = HailoRtRpcClientUtils::create_client(); + CHECK_EXPECTED_AS_STATUS(expected_client); + auto start_vstream_client = expected_client.release(); + + return start_vstream_client->InputVStream_start_vstream(m_handle); +} + size_t InputVStreamClient::get_frame_size() const { auto frame_size = m_client->InputVStream_get_frame_size(m_handle); @@ -1198,6 +1372,17 @@ hailo_status InputVStreamClient::after_fork_in_child() m_handle = expected_dup_handle.value(); return HAILO_SUCCESS; } + +bool InputVStreamClient::is_aborted() +{ + auto is_aborted_exp = m_client->InputVStream_is_aborted(m_handle); + if (!is_aborted_exp) { + LOGGER__CRITICAL("InputVStream_is_aborted failed with status={}", is_aborted_exp.status()); + return true; + } + return is_aborted_exp.release(); +} + #endif // HAILO_SUPPORT_MULTI_PROCESS std::string InputVStreamInternal::get_pipeline_description() const @@ -1300,11 +1485,6 @@ OutputVStreamImpl::OutputVStreamImpl(const hailo_vstream_info_t &vstream_info, c OutputVStreamImpl::~OutputVStreamImpl() { (void)stop_vstream(); - if (m_is_aborted) { - // If VStream was aborted, do not clear low-level stream abortion, - // otherwise flush would be called on low-level stream d-tor when there is no receiver. - (void)abort(); - } } hailo_status OutputVStreamImpl::read(MemoryView buffer) @@ -1361,7 +1541,7 @@ OutputVStreamClient::OutputVStreamClient(std::unique_ptr clien OutputVStreamClient::~OutputVStreamClient() { - auto reply = m_client->OutputVStream_release(m_handle); + auto reply = m_client->OutputVStream_release(m_handle, OsUtils::get_curr_pid()); if (reply != HAILO_SUCCESS) { LOGGER__CRITICAL("OutputVStream_release failed!"); } @@ -1385,6 +1565,24 @@ hailo_status OutputVStreamClient::resume() return m_client->OutputVStream_resume(m_handle); } +hailo_status OutputVStreamClient::stop_and_clear() +{ + auto expected_client = HailoRtRpcClientUtils::create_client(); + CHECK_EXPECTED_AS_STATUS(expected_client); + auto stop_and_clear_client = expected_client.release(); + + return stop_and_clear_client->OutputVStream_stop_and_clear(m_handle); +} + +hailo_status OutputVStreamClient::start_vstream() +{ + auto expected_client = HailoRtRpcClientUtils::create_client(); + CHECK_EXPECTED_AS_STATUS(expected_client); + auto start_vstream_client = expected_client.release(); + + return start_vstream_client->OutputVStream_start_vstream(m_handle); +} + size_t OutputVStreamClient::get_frame_size() const { auto frame_size = m_client->OutputVStream_get_frame_size(m_handle); @@ -1409,7 +1607,7 @@ std::string OutputVStreamClient::name() const { auto expected_name = m_client->OutputVStream_name(m_handle); if (!expected_name) { - LOGGER__CRITICAL("InputVStream_name failed with status={}", expected_name.status()); + LOGGER__CRITICAL("OutputVStream_name failed with status={}", expected_name.status()); return ""; } return expected_name.release(); @@ -1419,7 +1617,7 @@ std::string OutputVStreamClient::network_name() const { auto expected_name = m_client->OutputVStream_network_name(m_handle); if (!expected_name) { - LOGGER__CRITICAL("InputVStream_name failed with status={}", expected_name.status()); + LOGGER__CRITICAL("OutputVStream_name failed with status={}", expected_name.status()); return ""; } return expected_name.release(); @@ -1480,6 +1678,16 @@ hailo_status OutputVStreamClient::after_fork_in_child() m_handle = expected_dup_handle.value(); return HAILO_SUCCESS; } + +bool OutputVStreamClient::is_aborted() +{ + auto is_aborted_exp = m_client->OutputVStream_is_aborted(m_handle); + if (!is_aborted_exp) { + LOGGER__CRITICAL("OutputVStream_is_aborted failed with status={}", is_aborted_exp.status()); + return true; + } + return is_aborted_exp.release(); +} #endif // HAILO_SUPPORT_MULTI_PROCESS Expected> HwReadElement::create(std::shared_ptr stream, const std::string &name, std::chrono::milliseconds timeout, @@ -1535,11 +1743,13 @@ std::string HwReadElement::description() const return element_description.str(); } -hailo_status HwReadElement::execute_post_deactivate() +hailo_status HwReadElement::execute_post_deactivate(bool should_clear_abort) { - auto status = m_stream->clear_abort(); - CHECK(((HAILO_SUCCESS == status) || (HAILO_STREAM_NOT_ACTIVATED == status)), status, - "Failed to clear abort stream in {}", name()); + if (should_clear_abort) { + auto status = m_stream->clear_abort(); + CHECK(((HAILO_SUCCESS == status) || (HAILO_STREAM_NOT_ACTIVATED == status)), status, + "Failed to clear abort stream in {}", name()); + } return HAILO_SUCCESS; } @@ -1561,11 +1771,11 @@ hailo_status HwReadElement::execute_abort() return HAILO_SUCCESS; } -hailo_status HwReadElement::execute_resume() +hailo_status HwReadElement::execute_clear_abort() { auto status = m_stream->clear_abort(); CHECK(((status == HAILO_SUCCESS) || (status == HAILO_STREAM_NOT_ACTIVATED)), status, - "Failed to execute resume stream in {}", name()); + "Failed to execute clear_abort stream in {}", name()); return HAILO_SUCCESS; } @@ -1734,7 +1944,9 @@ hailo_status HwWriteElement::execute_deactivate() hailo_status flush_status = m_stream->flush(); if (HAILO_STREAM_ABORTED_BY_USER == flush_status) { LOGGER__INFO("Failed flushing input stream {} because stream was aborted", m_stream->to_string()); - // TODO: HRT-3621 + return HAILO_SUCCESS; + } else if (HAILO_STREAM_NOT_ACTIVATED == flush_status) { + LOGGER__INFO("Failed flushing input stream {} because stream is not activated", m_stream->to_string()); return HAILO_SUCCESS; } else if (HAILO_SUCCESS != flush_status) { LOGGER__ERROR("flush has failed in {} with status {}", name(), flush_status); @@ -1746,11 +1958,13 @@ hailo_status HwWriteElement::execute_deactivate() return HAILO_SUCCESS; } -hailo_status HwWriteElement::execute_post_deactivate() +hailo_status HwWriteElement::execute_post_deactivate(bool should_clear_abort) { - auto status = m_stream->clear_abort(); - CHECK(((status == HAILO_SUCCESS) || (status == HAILO_STREAM_NOT_ACTIVATED)), status, - "Failed to clear abort stream in {}", name()); + if (should_clear_abort) { + auto status = m_stream->clear_abort(); + CHECK(((status == HAILO_SUCCESS) || (status == HAILO_STREAM_NOT_ACTIVATED)), status, + "Failed to clear abort stream in {}", name()); + } return HAILO_SUCCESS; } @@ -1778,11 +1992,11 @@ hailo_status HwWriteElement::execute_abort() return HAILO_SUCCESS; } -hailo_status HwWriteElement::execute_resume() +hailo_status HwWriteElement::execute_clear_abort() { auto status = m_stream->clear_abort(); CHECK(((status == HAILO_SUCCESS) || (status == HAILO_STREAM_NOT_ACTIVATED)), status, - "Failed to execute resume stream in {}", name()); + "Failed to execute clear_abort stream in {}", name()); return HAILO_SUCCESS; } @@ -1876,12 +2090,51 @@ Expected, std::vector>> VStre static hailo_vstream_params_t expand_vstream_params_autos(const hailo_stream_info_t &stream_info, const hailo_vstream_params_t &vstream_params) { + if (HAILO_FORMAT_ORDER_HAILO_NMS == stream_info.format.order) { + // TODO (HRT-11082): On NMS, return error if UINT16 + if (HAILO_FORMAT_TYPE_UINT16 == vstream_params.user_buffer_format.type) { + LOGGER__WARNING("Passing 'HAILO_FORMAT_TYPE_UINT16' for NMS output is deprecated and will soon be unsupported. "\ + "One should use HAILO_FORMAT_TYPE_FLOAT32"); + } + } auto local_vstream_params = vstream_params; local_vstream_params.user_buffer_format = HailoRTDefaults::expand_auto_format(vstream_params.user_buffer_format, stream_info.format); return local_vstream_params; } +static hailo_vstream_params_t expand_vstream_params_autos_argmax(const hailo_vstream_params_t &vstream_params, + hailo_format_t &op_input_format) +{ + auto local_vstream_params = vstream_params; + if (local_vstream_params.user_buffer_format.type == HAILO_FORMAT_TYPE_AUTO) { + local_vstream_params.user_buffer_format.type = op_input_format.type; + } + if (local_vstream_params.user_buffer_format.order == HAILO_FORMAT_ORDER_AUTO) { + if (op_input_format.order == HAILO_FORMAT_ORDER_NHCW || op_input_format.order == HAILO_FORMAT_ORDER_NHWC) { + local_vstream_params.user_buffer_format.order = HAILO_FORMAT_ORDER_NHW; + } + if (op_input_format.order == HAILO_FORMAT_ORDER_NC) { + local_vstream_params.user_buffer_format.order = HAILO_FORMAT_ORDER_NC; + } + } + return local_vstream_params; +} + +static hailo_vstream_params_t expand_vstream_params_autos_softmax(const hailo_vstream_params_t &vstream_params, + hailo_format_t &op_input_format) +{ + auto local_vstream_params = vstream_params; + // Type should be float32, after de-quantization, and order NHWC or NC in softmax + if (local_vstream_params.user_buffer_format.type == HAILO_FORMAT_TYPE_AUTO) { + local_vstream_params.user_buffer_format.type = HAILO_FORMAT_TYPE_FLOAT32; + } + if (local_vstream_params.user_buffer_format.order == HAILO_FORMAT_ORDER_AUTO) { + local_vstream_params.user_buffer_format.order = op_input_format.order; + } + return local_vstream_params; +} + Expected> VStreamsBuilder::create_input_vstreams(ConfiguredNetworkGroup &net_group, const std::map &inputs_params) { @@ -2001,21 +2254,18 @@ Expected> VStreamsBuilderUtils::create_outputs(std::s CHECK_AS_EXPECTED(!(hw_read_stream_stats_flags & HAILO_VSTREAM_STATS_MEASURE_FPS), HAILO_NOT_IMPLEMENTED, "Pipeline FPS statistics measurement is not implemented"); - auto hw_read_elem = HwReadElement::create(output_stream, - PipelineObject::create_element_name("HwReadElement", output_stream->name(), output_stream->get_info().index), - HAILO_INFINITE_TIMEOUT, buffer_pool_size, hw_read_element_stats_flags, hw_read_stream_stats_flags, shutdown_event, pipeline_status); - CHECK_EXPECTED(hw_read_elem); - elements.push_back(hw_read_elem.value()); + auto hw_read_element = add_hw_read_element(output_stream, pipeline_status, elements, "HwReadElement", shutdown_event, + buffer_pool_size, hw_read_element_stats_flags, hw_read_stream_stats_flags); + CHECK_EXPECTED(hw_read_element); if (output_stream->get_info().is_mux) { - hailo_status status = add_demux(output_stream, vstreams_params_map, std::move(elements), vstreams, hw_read_elem.value(), + hailo_status status = add_demux(output_stream, vstreams_params_map, std::move(elements), vstreams, hw_read_element.value(), shutdown_event, pipeline_status, output_vstream_infos); CHECK_SUCCESS_AS_EXPECTED(status); } else { auto vstream_info = output_vstream_infos.find(output_stream->name()); CHECK_AS_EXPECTED(vstream_info != output_vstream_infos.end(), HAILO_NOT_FOUND, "Failed to find vstream info of {}", output_stream->name()); - assert(1 == vstreams_params_map.size()); auto vstream_params = expand_vstream_params_autos(output_stream->get_info(), vstreams_params_map.begin()->second); @@ -2027,37 +2277,27 @@ Expected> VStreamsBuilderUtils::create_outputs(std::s vstream_params.user_buffer_format, output_stream->get_info().quant_info); if (should_transform) { - auto hw_read_queue_elem = PullQueueElement::create( - PipelineObject::create_element_name("PullQueueElement_hw_read", output_stream->name(), output_stream->get_info().index), - vstream_params, shutdown_event, pipeline_status); - CHECK_EXPECTED(hw_read_queue_elem); - elements.push_back(hw_read_queue_elem.value()); - CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(hw_read_elem.value(), hw_read_queue_elem.value())); - - auto post_infer_elem = PostInferElement::create(output_stream->get_info().hw_shape, output_stream->get_info().format, - output_stream->get_info().shape, vstream_params.user_buffer_format, output_stream->get_info().quant_info, output_stream->get_info().nms_info, - PipelineObject::create_element_name("PostInferElement", output_stream->name(), output_stream->get_info().index), - vstream_params, pipeline_status); - CHECK_EXPECTED(post_infer_elem); - elements.push_back(post_infer_elem.value()); - CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(hw_read_queue_elem.value(), post_infer_elem.value())); - - auto post_infer_queue_elem = UserBufferQueueElement::create( - PipelineObject::create_element_name("UserBufferQueueElement_post_infer", output_stream->name(), output_stream->get_info().index), - vstream_params, shutdown_event, pipeline_status); - CHECK_EXPECTED(post_infer_queue_elem); - elements.push_back(post_infer_queue_elem.value()); - CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(post_infer_elem.value(), post_infer_queue_elem.value())); - + auto hw_read_queue_element = add_pull_queue_element(output_stream, pipeline_status, elements, "PullQueueElement_hw_read", + shutdown_event, vstream_params); + CHECK_EXPECTED(hw_read_queue_element); + CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(hw_read_element.value(), hw_read_queue_element.value())); + + auto post_infer_element = add_post_infer_element(output_stream, pipeline_status, elements, + "PostInferElement", vstream_params, shutdown_event); + CHECK_EXPECTED(post_infer_element); + CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(hw_read_queue_element.value(), post_infer_element.value())); + auto user_buffer_queue_element = add_user_buffer_queue_element(output_stream, pipeline_status, elements, + "UserBufferQueueElement", shutdown_event, vstream_params); + CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(post_infer_element.value(), user_buffer_queue_element.value())); output_stream->set_timeout(std::chrono::milliseconds(HAILO_INFINITE)); - hw_read_queue_elem->get()->set_timeout(std::chrono::milliseconds(HAILO_INFINITE)); - auto vstream = OutputVStream::create(vstream_info->second, vstream_params, post_infer_queue_elem.release(), std::move(elements), + hw_read_queue_element->get()->set_timeout(std::chrono::milliseconds(HAILO_INFINITE)); + auto vstream = OutputVStream::create(vstream_info->second, vstream_params, user_buffer_queue_element.release(), std::move(elements), std::move(pipeline_status), shutdown_event, core_op_activated_event, pipeline_latency_accumulator.release()); CHECK_EXPECTED(vstream); vstreams.emplace_back(vstream.release()); } else { output_stream->set_timeout(std::chrono::milliseconds(vstream_params.timeout_ms)); - auto vstream = OutputVStream::create(vstream_info->second, vstream_params, hw_read_elem.release(), std::move(elements), + auto vstream = OutputVStream::create(vstream_info->second, vstream_params, hw_read_element.release(), std::move(elements), std::move(pipeline_status), shutdown_event, core_op_activated_event, pipeline_latency_accumulator.release()); CHECK_EXPECTED(vstream); vstreams.emplace_back(vstream.release()); @@ -2071,6 +2311,91 @@ Expected> VStreamsBuilderUtils::create_outputs(std::s return vstreams; } +Expected> VStreamsBuilderUtils::create_output_post_process_softmax(std::shared_ptr output_stream, + const NameToVStreamParamsMap &vstreams_params_map, const hailo_vstream_info_t &output_vstream_info, const NetFlowElement &softmax_op) +{ + std::vector> elements; + std::vector vstreams; + + EventPtr core_op_activated_event = nullptr; + if (!output_stream->is_scheduled()) { + core_op_activated_event = output_stream->get_core_op_activated_event(); + } + + auto shutdown_event = Event::create_shared(Event::State::not_signalled); + CHECK_AS_EXPECTED(nullptr != shutdown_event, HAILO_OUT_OF_HOST_MEMORY); + + auto pipeline_status = make_shared_nothrow>(HAILO_SUCCESS); + CHECK_AS_EXPECTED(nullptr != pipeline_status, HAILO_OUT_OF_HOST_MEMORY); + + assert(!vstreams_params_map.empty()); + + // Note: In case of multiple values in vstreams_params_map (e.g. in the case of demux), we'll set the + // pipeline_elements_stats_flags for the hw_read_element as bitwise or of all the flags. + hailo_pipeline_elem_stats_flags_t hw_read_element_stats_flags = HAILO_PIPELINE_ELEM_STATS_NONE; + hailo_vstream_stats_flags_t hw_read_stream_stats_flags = HAILO_VSTREAM_STATS_NONE; + size_t buffer_pool_size = 0; + for (const auto &elem_name_params : vstreams_params_map) { + hw_read_element_stats_flags |= elem_name_params.second.pipeline_elements_stats_flags; + hw_read_stream_stats_flags |= elem_name_params.second.vstream_stats_flags; + buffer_pool_size += elem_name_params.second.queue_size; + } + + // TODO (HRT-4522): Support this measurement + CHECK_AS_EXPECTED(!(hw_read_stream_stats_flags & HAILO_VSTREAM_STATS_MEASURE_FPS), HAILO_NOT_IMPLEMENTED, + "Pipeline FPS statistics measurement is not implemented"); + + assert(1 == vstreams_params_map.size()); + auto op_input_format = softmax_op.op->inputs_metadata().begin()->second.format; + auto vstream_params = expand_vstream_params_autos_softmax(vstreams_params_map.begin()->second, op_input_format); + if (HAILO_FORMAT_FLAGS_QUANTIZED & vstream_params.user_buffer_format.flags) { + vstream_params.user_buffer_format.flags &= ~HAILO_FORMAT_FLAGS_QUANTIZED; + LOGGER__WARNING("Note: The output_vstream {} format flag is marked as quantized, which is not supported with {}. " + "flag has been automatically set to False.", softmax_op.output_vstream_info.name, softmax_op.op->get_name()); + } + + auto pipeline_latency_accumulator = create_pipeline_latency_accumulator(vstream_params); + CHECK_EXPECTED(pipeline_latency_accumulator); + + auto hw_read_element = add_hw_read_element(output_stream, pipeline_status, elements, "HwReadElement", shutdown_event, + buffer_pool_size, hw_read_element_stats_flags, hw_read_stream_stats_flags); + CHECK_EXPECTED(hw_read_element); + + auto hw_read_queue_element = add_pull_queue_element(output_stream, pipeline_status, elements, "PullQueueElement_hw_read", + shutdown_event, vstream_params); + CHECK_EXPECTED(hw_read_queue_element); + CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(hw_read_element.value(), hw_read_queue_element.value())); + + auto post_infer_element = add_post_infer_element(output_stream, pipeline_status, elements, + "PostInferElement", vstream_params, shutdown_event); + CHECK_EXPECTED(post_infer_element); + CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(hw_read_queue_element.value(), post_infer_element.value())); + + auto pre_softmax_queue_element = add_pull_queue_element(output_stream, pipeline_status, elements, "PullQueueElement_pre_softmax", + shutdown_event, vstream_params); + CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(post_infer_element.value(), pre_softmax_queue_element.value())); + + auto softmax_element = add_softmax_element(output_stream, pipeline_status, elements, "SoftmaxPostProcessElement", + vstream_params, softmax_op); + CHECK_EXPECTED(softmax_element); + CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(pre_softmax_queue_element.value(), softmax_element.value())); + auto user_buffer_queue_element = add_user_buffer_queue_element(output_stream, pipeline_status, elements, + "UserBufferQueueElement", shutdown_event, vstream_params); + CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(softmax_element.value(), user_buffer_queue_element.value())); + output_stream->set_timeout(std::chrono::milliseconds(HAILO_INFINITE)); + hw_read_queue_element->get()->set_timeout(std::chrono::milliseconds(HAILO_INFINITE)); + auto vstream = OutputVStream::create(output_vstream_info, vstream_params, user_buffer_queue_element.release(), std::move(elements), + std::move(pipeline_status), shutdown_event, core_op_activated_event, pipeline_latency_accumulator.release()); + CHECK_EXPECTED(vstream); + vstreams.emplace_back(vstream.release()); + + for (const auto &curr_vstream : vstreams) { + LOGGER__INFO("{}", curr_vstream.get_pipeline_description()); + } + + return vstreams; +} + InputVStream VStreamsBuilderUtils::create_input(std::shared_ptr input_vstream) { return InputVStream(std::move(input_vstream)); @@ -2085,6 +2410,92 @@ static bool are_formats_equal(const hailo_format_t &format1, const hailo_format_ return ((format1.order == format2.order) && (format1.flags == format2.flags) && (format1.type == format2.type)); } +Expected> VStreamsBuilderUtils::create_output_vstreams_from_streams(const OutputStreamWithParamsVector &all_output_streams, + OutputStreamPtrVector &output_streams, const hailo_vstream_params_t &vstream_params, + const std::unordered_map> &post_process_ops, + const std::unordered_map &op_inputs_to_op_name, const std::map &output_vstream_infos_map) +{ + auto first_stream_info = output_streams[0]->get_info(); + if ((HAILO_FORMAT_ORDER_HAILO_NMS == first_stream_info.format.order) && + (first_stream_info.nms_info.is_defused)) { + // Case defuse NMS + return create_output_nms(output_streams, vstream_params, output_vstream_infos_map); + } else if (contains(op_inputs_to_op_name, static_cast(first_stream_info.name))) { + // Case post-process on host + auto &op_name = op_inputs_to_op_name.at(first_stream_info.name); + auto &op = post_process_ops.at(op_name); + switch (op.get()->op_type) { + case HAILO_NET_FLOW_OP_TYPE_NMS: + { + assert(1 <= op->op->outputs_metadata().size()); + auto updated_outputs_metadata = op->op->outputs_metadata(); + updated_outputs_metadata.begin()->second.format = vstream_params.user_buffer_format; + if (HAILO_FORMAT_ORDER_AUTO == updated_outputs_metadata.begin()->second.format.order) { + updated_outputs_metadata.begin()->second.format.order = HAILO_FORMAT_ORDER_HAILO_NMS; + } + if (HAILO_FORMAT_TYPE_AUTO == updated_outputs_metadata.begin()->second.format.type) { + updated_outputs_metadata.begin()->second.format.type = HAILO_FORMAT_TYPE_FLOAT32; + } + if (HAILO_FORMAT_FLAGS_QUANTIZED & updated_outputs_metadata.begin()->second.format.flags) { + updated_outputs_metadata.begin()->second.format.flags &= ~HAILO_FORMAT_FLAGS_QUANTIZED; + LOGGER__WARNING("Note: The output_vstream {} format flag is marked as quantized, which is not supported with {}. " + "flag has been automatically set to False.", op->output_vstream_info.name, op->op->get_name()); + } + + op->op->set_outputs_metadata(updated_outputs_metadata); + CHECK_SUCCESS_AS_EXPECTED(op->op->validate_metadata()); + return create_output_post_process_nms(output_streams, vstream_params, output_vstream_infos_map, *op); + } + + case HAILO_NET_FLOW_OP_TYPE_ARGMAX: + { + assert(output_streams.size() == 1); + NameToVStreamParamsMap name_to_vstream_params_map; + for (auto &output_stream : all_output_streams) { + if (output_stream.first->get_info().name == output_streams[0]->get_info().name) { + for (auto &vstream : output_stream.second) { + name_to_vstream_params_map.insert(vstream); + } + } + } + auto output_vstream_info = output_vstream_infos_map.at(op.get()->name); + return create_output_post_process_argmax(output_streams[0], name_to_vstream_params_map, output_vstream_info, *op); + } + + case HAILO_NET_FLOW_OP_TYPE_SOFTMAX: + { + assert(output_streams.size() == 1); + NameToVStreamParamsMap name_to_vstream_params_map; + for (auto &output_stream : all_output_streams) { + if (output_stream.first->get_info().name == output_streams[0]->get_info().name) { + for (auto &vstream : output_stream.second) { + name_to_vstream_params_map.insert(vstream); + } + } + } + auto output_vstream_info = output_vstream_infos_map.at(op.get()->name); + return create_output_post_process_softmax(output_streams[0], name_to_vstream_params_map, output_vstream_info, *op); + } + + default: + LOGGER__ERROR("op type {} of op {} is not in any of the supported post process OP types", op.get()->op_type, op_name); + return make_unexpected(HAILO_INVALID_OPERATION); + } + } else { + // All other cases + assert(output_streams.size() == 1); + NameToVStreamParamsMap name_to_vstream_params_map; + for (auto &output_stream : all_output_streams) { + if (output_stream.first->get_info().name == output_streams[0]->get_info().name) { + for (auto &vstream : output_stream.second) { + name_to_vstream_params_map.insert(vstream); + } + } + } + return create_outputs(output_streams[0], name_to_vstream_params_map, output_vstream_infos_map); + } +} + Expected> VStreamsBuilderUtils::create_output_nms(OutputStreamPtrVector &output_streams, hailo_vstream_params_t vstreams_params, const std::map &output_vstream_infos) @@ -2139,6 +2550,174 @@ Expected> VStreamsBuilderUtils::create_output_post_pr return vstreams; } +Expected> VStreamsBuilderUtils::add_hw_read_element(std::shared_ptr &output_stream, + std::shared_ptr> &pipeline_status, std::vector> &elements, + const std::string &element_name, EventPtr &shutdown_event, size_t buffer_pool_size, + const hailo_pipeline_elem_stats_flags_t &hw_read_element_stats_flags, const hailo_vstream_stats_flags_t &hw_read_stream_stats_flags) +{ + auto hw_read_elem = HwReadElement::create(output_stream, + PipelineObject::create_element_name(element_name, output_stream->name(), output_stream->get_info().index), + HAILO_INFINITE_TIMEOUT, buffer_pool_size, hw_read_element_stats_flags, hw_read_stream_stats_flags, shutdown_event, pipeline_status); + CHECK_EXPECTED(hw_read_elem); + elements.push_back(hw_read_elem.value()); + return hw_read_elem; +} + +Expected> VStreamsBuilderUtils::add_pull_queue_element(std::shared_ptr &output_stream, + std::shared_ptr> &pipeline_status, std::vector> &elements, + const std::string &element_name, EventPtr &shutdown_event, const hailo_vstream_params_t &vstream_params) +{ + auto pull_queue_elem = PullQueueElement::create( + PipelineObject::create_element_name(element_name, output_stream->name(), output_stream->get_info().index), + vstream_params, shutdown_event, pipeline_status); + CHECK_EXPECTED(pull_queue_elem); + elements.push_back(pull_queue_elem.value()); + return pull_queue_elem; +} + +Expected> VStreamsBuilderUtils::add_argmax_element(std::shared_ptr &output_stream, + std::shared_ptr> &pipeline_status, std::vector> &elements, + const std::string &element_name, hailo_vstream_params_t &vstream_params, const NetFlowElement &argmax_op) +{ + // Updating metadata according to user request. TODO: HRT-9737 + auto updated_outputs_metadata = argmax_op.op.get()->outputs_metadata(); + updated_outputs_metadata.begin()->second.format = vstream_params.user_buffer_format; + argmax_op.op.get()->set_outputs_metadata(updated_outputs_metadata); + CHECK_SUCCESS_AS_EXPECTED(argmax_op.op.get()->validate_metadata()); + // Updating metadata according to use request. TODO: HRT-9737 - End + auto argmax_element = ArgmaxPostProcessElement::create(argmax_op.op, + PipelineObject::create_element_name(element_name, output_stream->name(), output_stream->get_info().index), + vstream_params.pipeline_elements_stats_flags, pipeline_status); + CHECK_EXPECTED(argmax_element); + elements.push_back(argmax_element.value()); + return argmax_element; +} + +Expected> VStreamsBuilderUtils::add_softmax_element(std::shared_ptr &output_stream, + std::shared_ptr> &pipeline_status, std::vector> &elements, + const std::string &element_name, hailo_vstream_params_t &vstream_params, const NetFlowElement &softmax_op) +{ + // Updating metadata according to user request. TODO: HRT-9737 + // Currently softmax only supports inputs to be float32 and order NHWC or NC + auto updated_inputs_metadata = softmax_op.op.get()->inputs_metadata(); + updated_inputs_metadata.begin()->second.format = vstream_params.user_buffer_format; + softmax_op.op.get()->set_inputs_metadata(updated_inputs_metadata); + + auto updated_outputs_metadata = softmax_op.op.get()->outputs_metadata(); + updated_outputs_metadata.begin()->second.format = vstream_params.user_buffer_format; + softmax_op.op.get()->set_outputs_metadata(updated_outputs_metadata); + CHECK_SUCCESS_AS_EXPECTED(softmax_op.op.get()->validate_metadata()); + // Updating metadata according to use request. TODO: HRT-9737 - End + auto softmax_element = SoftmaxPostProcessElement::create(softmax_op.op, + PipelineObject::create_element_name(element_name, output_stream->name(), output_stream->get_info().index), + vstream_params.pipeline_elements_stats_flags, pipeline_status); + CHECK_EXPECTED(softmax_element); + elements.push_back(softmax_element.value()); + return softmax_element; +} + +Expected> VStreamsBuilderUtils::add_user_buffer_queue_element(std::shared_ptr &output_stream, + std::shared_ptr> &pipeline_status, std::vector> &elements, + const std::string &element_name, EventPtr &shutdown_event, const hailo_vstream_params_t &vstream_params) +{ + auto post_argmax_queue_element = UserBufferQueueElement::create( + PipelineObject::create_element_name(element_name, output_stream->name(), output_stream->get_info().index), + vstream_params, shutdown_event, pipeline_status); + CHECK_EXPECTED(post_argmax_queue_element); + elements.push_back(post_argmax_queue_element.value()); + return post_argmax_queue_element; +} + +Expected> VStreamsBuilderUtils::add_post_infer_element(std::shared_ptr &output_stream, + std::shared_ptr> &pipeline_status, std::vector> &elements, + const std::string &element_name, const hailo_vstream_params_t &vstream_params, EventPtr shutdown_event) +{ + auto post_infer_element = PostInferElement::create(output_stream->get_info().hw_shape, output_stream->get_info().format, + output_stream->get_info().shape, vstream_params.user_buffer_format, output_stream->get_info().quant_info, output_stream->get_info().nms_info, + PipelineObject::create_element_name(element_name, output_stream->name(), output_stream->get_info().index), + vstream_params, pipeline_status, shutdown_event); + CHECK_EXPECTED(post_infer_element); + elements.push_back(post_infer_element.value()); + return post_infer_element; +} + +Expected> VStreamsBuilderUtils::create_output_post_process_argmax(std::shared_ptr output_stream, + const NameToVStreamParamsMap &vstreams_params_map, const hailo_vstream_info_t &output_vstream_info, const NetFlowElement &argmax_op) +{ + std::vector> elements; + std::vector vstreams; + + EventPtr core_op_activated_event = nullptr; + if (!output_stream->is_scheduled()) { + core_op_activated_event = output_stream->get_core_op_activated_event(); + } + + auto shutdown_event = Event::create_shared(Event::State::not_signalled); + CHECK_AS_EXPECTED(nullptr != shutdown_event, HAILO_OUT_OF_HOST_MEMORY); + + auto pipeline_status = make_shared_nothrow>(HAILO_SUCCESS); + CHECK_AS_EXPECTED(nullptr != pipeline_status, HAILO_OUT_OF_HOST_MEMORY); + + assert(!vstreams_params_map.empty()); + + // Note: In case of multiple values in vstreams_params_map (e.g. in the case of demux), we'll set the + // pipeline_elements_stats_flags for the hw_read_element as bitwise or of all the flags. + hailo_pipeline_elem_stats_flags_t hw_read_element_stats_flags = HAILO_PIPELINE_ELEM_STATS_NONE; + hailo_vstream_stats_flags_t hw_read_stream_stats_flags = HAILO_VSTREAM_STATS_NONE; + size_t buffer_pool_size = 0; + for (const auto &elem_name_params : vstreams_params_map) { + hw_read_element_stats_flags |= elem_name_params.second.pipeline_elements_stats_flags; + hw_read_stream_stats_flags |= elem_name_params.second.vstream_stats_flags; + buffer_pool_size += elem_name_params.second.queue_size; + } + + // TODO (HRT-4522): Support this measurement + CHECK_AS_EXPECTED(!(hw_read_stream_stats_flags & HAILO_VSTREAM_STATS_MEASURE_FPS), HAILO_NOT_IMPLEMENTED, + "Pipeline FPS statistics measurement is not implemented"); + + auto hw_read_element = add_hw_read_element(output_stream, pipeline_status, elements, "HwReadElement", shutdown_event, + buffer_pool_size, hw_read_element_stats_flags, hw_read_stream_stats_flags); + CHECK_EXPECTED(hw_read_element); + + assert(1 == vstreams_params_map.size()); + auto op_input_format = argmax_op.op->inputs_metadata().begin()->second.format; + auto vstream_params = expand_vstream_params_autos_argmax(vstreams_params_map.begin()->second, op_input_format); + + auto hw_read_queue_element = add_pull_queue_element(output_stream, pipeline_status, elements, "PullQueueElement_hw_read", + shutdown_event, vstream_params); + CHECK_EXPECTED(hw_read_queue_element); + + CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(hw_read_element.value(), hw_read_queue_element.value())); + + auto argmax_element = add_argmax_element(output_stream, pipeline_status, elements, "ArgmaxPostProcessElement", + vstream_params, argmax_op); + CHECK_EXPECTED(argmax_element); + + CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(hw_read_queue_element.value(), argmax_element.value())); + + auto post_argmax_queue_element = add_user_buffer_queue_element(output_stream, pipeline_status, elements, + "UserBufferQueueElement_post_argmax", shutdown_event, vstream_params); + CHECK_EXPECTED(post_argmax_queue_element); + + CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(argmax_element.value(), post_argmax_queue_element.value())); + + auto pipeline_latency_accumulator = create_pipeline_latency_accumulator(vstream_params); + CHECK_EXPECTED(pipeline_latency_accumulator); + + output_stream->set_timeout(std::chrono::milliseconds(HAILO_INFINITE)); + hw_read_queue_element->get()->set_timeout(std::chrono::milliseconds(HAILO_INFINITE)); + auto vstream = OutputVStream::create(output_vstream_info, vstream_params, post_argmax_queue_element.release(), std::move(elements), + std::move(pipeline_status), shutdown_event, core_op_activated_event, pipeline_latency_accumulator.release()); + CHECK_EXPECTED(vstream); + vstreams.emplace_back(vstream.release()); + + for (const auto ¤t_vstream : vstreams) { + LOGGER__INFO("{}", current_vstream.get_pipeline_description()); + } + + return vstreams; +} + hailo_status VStreamsBuilderUtils::add_demux(std::shared_ptr output_stream, NameToVStreamParamsMap &vstreams_params_map, std::vector> &&base_elements, std::vector &vstreams, std::shared_ptr hw_read_elem, EventPtr shutdown_event, std::shared_ptr> pipeline_status, @@ -2212,7 +2791,7 @@ hailo_status VStreamsBuilderUtils::add_demux(std::shared_ptr outpu auto post_infer_elem = PostInferElement::create(edge_info.hw_shape, edge_info.format, edge_info.shape, vstream_params.user_buffer_format, edge_info.quant_info, edge_info.nms_info, PipelineObject::create_element_name("PostInferElement", edge_info.name, edge_info.index), - vstream_params, pipeline_status); + vstream_params, pipeline_status, shutdown_event); CHECK_EXPECTED_AS_STATUS(post_infer_elem); current_vstream_elements.push_back(post_infer_elem.value()); CHECK_SUCCESS(PipelinePad::link_pads(demux_queue_elem.value(), post_infer_elem.value())); @@ -2267,7 +2846,7 @@ hailo_status VStreamsBuilderUtils::add_nms_fuse(OutputStreamPtrVector &output_st auto vstream_info = output_vstream_infos.find(fused_layer_name); CHECK(vstream_info != output_vstream_infos.end(), HAILO_NOT_FOUND, - "Failed to find vstream info of {}", fused_layer_name); + "Failed to find vstream info of {}. Could be due to use of old HEF. Try to re-compile network with newer Dataflow Compiler version", fused_layer_name); vstreams_params = expand_vstream_params_autos(first_defused_stream_info, vstreams_params); auto nms_elem = NmsMuxElement::create(nms_infos, @@ -2320,7 +2899,8 @@ hailo_status VStreamsBuilderUtils::add_nms_fuse(OutputStreamPtrVector &output_st auto post_infer_elem = PostInferElement::create({}, src_stream_format, {}, vstreams_params.user_buffer_format, vstream_info->second.quant_info, fused_layer_nms_info, - PipelineObject::create_element_name("PostInferElement", fused_layer_name, 0), vstreams_params, pipeline_status); + PipelineObject::create_element_name("PostInferElement", fused_layer_name, 0), vstreams_params, pipeline_status, + shutdown_event); CHECK_EXPECTED_AS_STATUS(post_infer_elem); elements.push_back(post_infer_elem.value()); diff --git a/hailort/libhailort/src/net_flow/pipeline/vstream_internal.hpp b/hailort/libhailort/src/net_flow/pipeline/vstream_internal.hpp index ef1126f..587d360 100644 --- a/hailort/libhailort/src/net_flow/pipeline/vstream_internal.hpp +++ b/hailort/libhailort/src/net_flow/pipeline/vstream_internal.hpp @@ -70,6 +70,7 @@ public: virtual hailo_status before_fork() { return HAILO_SUCCESS; }; virtual hailo_status after_fork_in_parent() { return HAILO_SUCCESS; }; virtual hailo_status after_fork_in_child() { return HAILO_SUCCESS; }; + virtual bool is_aborted() { return m_is_aborted; }; protected: BaseVStream(const hailo_vstream_info_t &vstream_info, const hailo_vstream_params_t &vstream_params, @@ -235,6 +236,9 @@ public: virtual hailo_status before_fork() override; virtual hailo_status after_fork_in_parent() override; virtual hailo_status after_fork_in_child() override; + virtual hailo_status stop_and_clear() override; + virtual hailo_status start_vstream() override; + virtual bool is_aborted() override; private: InputVStreamClient(std::unique_ptr client, uint32_t input_vstream_handle, hailo_format_t &&user_buffer_format, @@ -274,6 +278,9 @@ public: virtual hailo_status before_fork() override; virtual hailo_status after_fork_in_parent() override; virtual hailo_status after_fork_in_child() override; + virtual hailo_status stop_and_clear() override; + virtual hailo_status start_vstream() override; + virtual bool is_aborted() override; private: OutputVStreamClient(std::unique_ptr client, uint32_t outputs_vstream_handle, hailo_format_t &&user_buffer_format, @@ -323,22 +330,68 @@ public: static Expected> create(const hailo_3d_image_shape_t &src_image_shape, const hailo_format_t &src_format, const hailo_3d_image_shape_t &dst_image_shape, const hailo_format_t &dst_format, const hailo_quant_info_t &dst_quant_info, const hailo_nms_info_t &nms_info, const std::string &name, - hailo_pipeline_elem_stats_flags_t elem_flags, std::shared_ptr> pipeline_status); + hailo_pipeline_elem_stats_flags_t elem_flags, std::shared_ptr> pipeline_status, + std::chrono::milliseconds timeout, hailo_vstream_stats_flags_t vstream_flags, EventPtr shutdown_event, + size_t buffer_pool_size); static Expected> create(const hailo_3d_image_shape_t &src_image_shape, const hailo_format_t &src_format, const hailo_3d_image_shape_t &dst_image_shape, const hailo_format_t &dst_format, const hailo_quant_info_t &dst_quant_info, const hailo_nms_info_t &nms_info, - const std::string &name, const hailo_vstream_params_t &vstream_params, std::shared_ptr> pipeline_status); + const std::string &name, const hailo_vstream_params_t &vstream_params, std::shared_ptr> pipeline_status, EventPtr shutdown_event); PostInferElement(std::unique_ptr &&transform_context, const std::string &name, - DurationCollector &&duration_collector, std::shared_ptr> &&pipeline_status); + DurationCollector &&duration_collector, std::shared_ptr> &&pipeline_status, BufferPoolPtr buffer_pool, + std::chrono::milliseconds timeout); virtual ~PostInferElement() = default; virtual hailo_status run_push(PipelineBuffer &&buffer) override; virtual PipelinePad &next_pad() override; virtual std::string description() const override; + virtual std::vector get_queue_size_accumulators() override; protected: virtual Expected action(PipelineBuffer &&input, PipelineBuffer &&optional) override; private: std::unique_ptr m_transform_context; + BufferPoolPtr m_pool; + std::chrono::milliseconds m_timeout; +}; + +class ArgmaxPostProcessElement : public FilterElement +{ +public: + static Expected> create(std::shared_ptr argmax_op, + const std::string &name, hailo_pipeline_elem_stats_flags_t elem_flags, + std::shared_ptr> pipeline_status); + ArgmaxPostProcessElement(std::shared_ptr argmax_op, const std::string &name, + DurationCollector &&duration_collector, std::shared_ptr> &&pipeline_status); + virtual ~ArgmaxPostProcessElement() = default; + virtual hailo_status run_push(PipelineBuffer &&buffer) override; + virtual PipelinePad &next_pad() override; + virtual std::string description() const override; + +protected: + virtual Expected action(PipelineBuffer &&input, PipelineBuffer &&optional) override; + +private: + std::shared_ptr m_argmax_op; +}; + +class SoftmaxPostProcessElement : public FilterElement +{ +public: + static Expected> create(std::shared_ptr softmax_op, + const std::string &name, hailo_pipeline_elem_stats_flags_t elem_flags, + std::shared_ptr> pipeline_status); + SoftmaxPostProcessElement(std::shared_ptr softmax_op, const std::string &name, + DurationCollector &&duration_collector, std::shared_ptr> &&pipeline_status); + virtual ~SoftmaxPostProcessElement() = default; + virtual hailo_status run_push(PipelineBuffer &&buffer) override; + virtual PipelinePad &next_pad() override; + virtual std::string description() const override; + +protected: + virtual Expected action(PipelineBuffer &&input, PipelineBuffer &&optional) override; + +private: + std::shared_ptr m_softmax_op; }; class NmsPostProcessMuxElement : public BaseMuxElement @@ -429,11 +482,11 @@ public: virtual Expected run_pull(PipelineBuffer &&optional, const PipelinePad &source) override; virtual hailo_status execute_activate() override; virtual hailo_status execute_deactivate() override; - virtual hailo_status execute_post_deactivate() override; + virtual hailo_status execute_post_deactivate(bool should_clear_abort) override; virtual hailo_status execute_clear() override; virtual hailo_status execute_flush() override; virtual hailo_status execute_abort() override; - virtual hailo_status execute_resume() override; + virtual hailo_status execute_clear_abort() override; virtual hailo_status execute_wait_for_finish() override; uint32_t get_invalid_frames_count(); virtual std::string description() const override; @@ -461,11 +514,11 @@ public: virtual Expected run_pull(PipelineBuffer &&optional, const PipelinePad &source) override; virtual hailo_status execute_activate() override; virtual hailo_status execute_deactivate() override; - virtual hailo_status execute_post_deactivate() override; + virtual hailo_status execute_post_deactivate(bool should_clear_abort) override; virtual hailo_status execute_clear() override; virtual hailo_status execute_flush() override; virtual hailo_status execute_abort() override; - virtual hailo_status execute_resume() override; + virtual hailo_status execute_clear_abort() override; virtual hailo_status execute_wait_for_finish() override; virtual std::string description() const override; @@ -498,10 +551,33 @@ public: static Expected> create_output_nms(OutputStreamPtrVector &output_streams, hailo_vstream_params_t vstreams_params, const std::map &output_vstream_infos); + static Expected> create_output_vstreams_from_streams(const OutputStreamWithParamsVector &all_output_streams, + OutputStreamPtrVector &output_streams, const hailo_vstream_params_t &vstream_params, + const std::unordered_map> &post_process_ops, + const std::unordered_map &op_inputs_to_op_name, const std::map &output_vstream_infos_map); static Expected> create_output_post_process_nms(OutputStreamPtrVector &output_streams, hailo_vstream_params_t vstreams_params, const std::map &output_vstream_infos, const NetFlowElement &nms_op); + static Expected> add_hw_read_element(std::shared_ptr &output_stream, + std::shared_ptr> &pipeline_status, std::vector> &elements, + const std::string &element_name, EventPtr &shutdown_event, size_t buffer_pool_size, + const hailo_pipeline_elem_stats_flags_t &hw_read_element_stats_flags, const hailo_vstream_stats_flags_t &hw_read_stream_stats_flags); + static Expected> add_pull_queue_element(std::shared_ptr &output_stream, + std::shared_ptr> &pipeline_status, std::vector> &elements, + const std::string &element_name, EventPtr &shutdown_event, const hailo_vstream_params_t &vstream_params); + static Expected> add_argmax_element(std::shared_ptr &output_stream, + std::shared_ptr> &pipeline_status, std::vector> &elements, + const std::string &element_name, hailo_vstream_params_t &vstream_params, const NetFlowElement &argmax_op); + static Expected> add_softmax_element(std::shared_ptr &output_stream, + std::shared_ptr> &pipeline_status, std::vector> &elements, + const std::string &element_name, hailo_vstream_params_t &vstream_params, const NetFlowElement &softmax_op); + static Expected> add_user_buffer_queue_element(std::shared_ptr &output_stream, + std::shared_ptr> &pipeline_status, std::vector> &elements, + const std::string &element_name, EventPtr &shutdown_event, const hailo_vstream_params_t &vstream_params); + static Expected> add_post_infer_element(std::shared_ptr &output_stream, + std::shared_ptr> &pipeline_status, std::vector> &elements, + const std::string &element_name, const hailo_vstream_params_t &vstream_params, EventPtr shutdown_event); static hailo_status add_demux(std::shared_ptr output_stream, NameToVStreamParamsMap &vstreams_params_map, std::vector> &&elements, std::vector &vstreams, std::shared_ptr hw_read_elem, EventPtr shutdown_event, std::shared_ptr> pipeline_status, @@ -516,6 +592,12 @@ public: const std::map &output_vstream_infos, const NetFlowElement &nms_op); static Expected create_pipeline_latency_accumulator(const hailo_vstream_params_t &vstreams_params); + +private: + static Expected> create_output_post_process_argmax(std::shared_ptr output_stream, + const NameToVStreamParamsMap &vstreams_params_map, const hailo_vstream_info_t &output_vstream_info, const NetFlowElement &argmax_op); + static Expected> create_output_post_process_softmax(std::shared_ptr output_stream, + const NameToVStreamParamsMap &vstreams_params_map, const hailo_vstream_info_t &output_vstream_info, const NetFlowElement &softmax_op); }; } /* namespace hailort */ diff --git a/hailort/libhailort/src/network_group/network_group.cpp b/hailort/libhailort/src/network_group/network_group.cpp index bc09df0..2fa911f 100644 --- a/hailort/libhailort/src/network_group/network_group.cpp +++ b/hailort/libhailort/src/network_group/network_group.cpp @@ -27,6 +27,27 @@ namespace hailort { +Expected> ConfiguredNetworkGroup::duplicate_network_group_client(uint32_t handle, const std::string &network_group_name) +{ +#ifdef HAILO_SUPPORT_MULTI_PROCESS + auto net_group_client = ConfiguredNetworkGroupClient::duplicate_network_group_client(handle, network_group_name); + CHECK_EXPECTED(net_group_client); + + return std::shared_ptr(net_group_client.release()); +#else + (void)handle; + (void)network_group_name; + LOGGER__ERROR("`duplicate_network_group_client()` requires service compilation with HAILO_BUILD_SERVICE"); + return make_unexpected(HAILO_INVALID_OPERATION); +#endif // HAILO_SUPPORT_MULTI_PROCESS +} + +Expected ConfiguredNetworkGroup::get_client_handle() const +{ + LOGGER__ERROR("`get_client_handle()` is valid only when working with HailoRT Service!"); + return make_unexpected(HAILO_INVALID_OPERATION); +} + Expected> ConfiguredNetworkGroup::activate() { const auto network_group_params = HailoRTDefaults::get_active_network_group_params(); @@ -40,6 +61,11 @@ Expected> ConfiguredNetworkGroupBase::act } /* Network group base functions */ +Expected ConfiguredNetworkGroupBase::run_hw_infer_estimator() +{ + return get_core_op()->run_hw_infer_estimator(); +} + Expected ConfiguredNetworkGroupBase::get_latency_measurement(const std::string &network_name) { return get_core_op()->get_latency_measurement(network_name); @@ -48,12 +74,81 @@ Expected ConfiguredNetworkGroupBase::get_latency_measu Expected ConfiguredNetworkGroupBase::get_output_streams_from_vstream_names( const std::map &outputs_params) { - return get_core_op()->get_output_streams_from_vstream_names(outputs_params); + OutputStreamWithParamsVector results; + std::unordered_map outputs_edges_params; + for (auto &name_params_pair : outputs_params) { + auto stream_names = m_network_group_metadata.get_stream_names_from_vstream_name(name_params_pair.first); + CHECK_EXPECTED(stream_names); + + for (auto &stream_name : stream_names.value()) { + auto stream = get_shared_output_stream_by_name(stream_name); + CHECK_EXPECTED(stream); + if (stream.value()->get_info().is_mux) { + outputs_edges_params.emplace(name_params_pair); + } + else { + NameToVStreamParamsMap name_to_params = {name_params_pair}; + results.emplace_back(stream.value(), name_to_params); + } + } + } + // Add non mux streams to result + hailo_status status = add_mux_streams_by_edges_names(results, outputs_edges_params); + CHECK_SUCCESS_AS_EXPECTED(status); + + return results; +} + +// This function adds to results the OutputStreams that correspond to the edges in outputs_edges_params. +// If an edge name appears in outputs_edges_params then all of its predecessors must appear in outputs_edges_params as well, Otherwise, an error is returned. +// We use the set seen_edges in order to mark the edges already evaluated by one of its' predecessor. +hailo_status ConfiguredNetworkGroupBase::add_mux_streams_by_edges_names(OutputStreamWithParamsVector &results, + const std::unordered_map &outputs_edges_params) +{ + std::unordered_set seen_edges; + for (auto &name_params_pair : outputs_edges_params) { + if (seen_edges.end() != seen_edges.find(name_params_pair.first)) { + // Edge has already been seen by one of its predecessors + continue; + } + auto output_streams = get_output_streams_by_vstream_name(name_params_pair.first); + CHECK_EXPECTED_AS_STATUS(output_streams); + CHECK(output_streams->size() == 1, HAILO_INVALID_ARGUMENT, + "mux streams cannot be separated into multiple streams"); + auto output_stream = output_streams.release()[0]; + + // TODO: Find a better way to get the mux edges without creating OutputDemuxer + auto expected_demuxer = OutputDemuxer::create(*output_stream); + CHECK_EXPECTED_AS_STATUS(expected_demuxer); + + NameToVStreamParamsMap name_to_params; + for (auto &edge : expected_demuxer.value()->get_edges_stream_info()) { + auto edge_name_params_pair = outputs_edges_params.find(edge.name); + CHECK(edge_name_params_pair != outputs_edges_params.end(), HAILO_INVALID_ARGUMENT, + "All edges of stream {} must be in output vstream params. edge {} is missing.", + name_params_pair.first, edge.name); + seen_edges.insert(edge.name); + name_to_params.insert(*edge_name_params_pair); + } + results.emplace_back(output_stream, name_to_params); + } + return HAILO_SUCCESS; } Expected ConfiguredNetworkGroupBase::get_output_streams_by_vstream_name(const std::string &name) { - return get_core_op()->get_output_streams_by_vstream_name(name); + auto stream_names = m_network_group_metadata.get_stream_names_from_vstream_name(name); + CHECK_EXPECTED(stream_names); + + OutputStreamPtrVector output_streams; + output_streams.reserve(stream_names->size()); + for (const auto &stream_name : stream_names.value()) { + auto stream = get_shared_output_stream_by_name(stream_name); + CHECK_EXPECTED(stream); + output_streams.emplace_back(stream.value()); + } + + return output_streams; } Expected ConfiguredNetworkGroupBase::get_layer_info(const std::string &stream_name) @@ -63,10 +158,10 @@ Expected ConfiguredNetworkGroupBase::get_layer_info(const std::string ConfiguredNetworkGroupBase::ConfiguredNetworkGroupBase( const ConfigureNetworkParams &config_params, std::vector> &&core_ops, - std::vector> &&net_flow_ops) : + NetworkGroupMetadata &&metadata) : m_config_params(config_params), m_core_ops(std::move(core_ops)), - m_net_flow_ops(std::move(net_flow_ops)) + m_network_group_metadata(std::move(metadata)) {} // static func @@ -101,12 +196,12 @@ Expected> ConfiguredNetworkGroupBase::act const std::string &ConfiguredNetworkGroupBase::get_network_group_name() const { - return get_core_op_metadata()->core_op_name(); + return m_network_group_metadata.name(); } const std::string &ConfiguredNetworkGroupBase::name() const { - return get_core_op_metadata()->core_op_name(); + return m_network_group_metadata.name(); } hailo_status ConfiguredNetworkGroupBase::activate_low_level_streams(uint16_t dynamic_batch_size, bool resume_pending_stream_transfers) @@ -136,19 +231,32 @@ Expected ConfiguredNetworkGroupBase::get_stream_batch_size(const std:: return get_core_op()->get_stream_batch_size(stream_name); } -bool ConfiguredNetworkGroupBase::is_multi_context() const +Expected> ConfiguredNetworkGroupBase::get_sorted_output_names() { - return get_core_op()->is_multi_context(); + auto res = m_network_group_metadata.get_sorted_output_names(); + return res; } -const ConfigureNetworkParams ConfiguredNetworkGroupBase::get_config_params() const +Expected> ConfiguredNetworkGroupBase::get_stream_names_from_vstream_name(const std::string &vstream_name) { - return get_core_op()->get_config_params(); + auto res = m_network_group_metadata.get_stream_names_from_vstream_name(vstream_name); + return res; } Expected> ConfiguredNetworkGroupBase::get_vstream_names_from_stream_name(const std::string &stream_name) { - return get_core_op()->get_vstream_names_from_stream_name(stream_name); + auto res = m_network_group_metadata.get_vstream_names_from_stream_name(stream_name); + return res; +} + +bool ConfiguredNetworkGroupBase::is_multi_context() const +{ + return get_core_op()->is_multi_context(); +} + +const ConfigureNetworkParams ConfiguredNetworkGroupBase::get_config_params() const +{ + return get_core_op()->get_config_params(); } const SupportedFeatures &ConfiguredNetworkGroupBase::get_supported_features() @@ -234,56 +342,95 @@ hailo_status ConfiguredNetworkGroupBase::wait_for_activation(const std::chrono:: Expected>> ConfiguredNetworkGroupBase::get_output_vstream_groups() { - return get_core_op()->get_output_vstream_groups(); + std::vector> results; + + for (auto output_stream : get_output_streams()) { + auto vstreams_group = m_network_group_metadata.get_vstream_names_from_stream_name(output_stream.get().name()); + CHECK_EXPECTED(vstreams_group); + results.push_back(vstreams_group.release()); + } + + return results; } Expected>> ConfiguredNetworkGroupBase::make_output_vstream_params_groups( bool quantized, hailo_format_type_t format_type, uint32_t timeout_ms, uint32_t queue_size) { - return get_core_op()->make_output_vstream_params_groups(quantized, format_type, timeout_ms, queue_size); + auto params = make_output_vstream_params(quantized, format_type, timeout_ms, queue_size); + CHECK_EXPECTED(params); + + auto groups = get_output_vstream_groups(); + CHECK_EXPECTED(groups); + + std::vector> results(groups->size(), std::map()); + + size_t pipeline_group_index = 0; + for (const auto &group : groups.release()) { + for (const auto &name_pair : params.value()) { + if (contains(group, name_pair.first)) { + results[pipeline_group_index].insert(name_pair); + } + } + pipeline_group_index++; + } + + return results; } Expected> ConfiguredNetworkGroupBase::make_input_vstream_params( bool quantized, hailo_format_type_t format_type, uint32_t timeout_ms, uint32_t queue_size, const std::string &network_name) { - return get_core_op()->make_input_vstream_params(quantized, format_type, timeout_ms, queue_size, network_name); + auto input_vstream_infos = m_network_group_metadata.get_input_vstream_infos(network_name); + CHECK_EXPECTED(input_vstream_infos); + + std::map res; + auto status = Hef::Impl::fill_missing_vstream_params_with_default(res, input_vstream_infos.value(), quantized, + format_type, timeout_ms, queue_size); + CHECK_SUCCESS_AS_EXPECTED(status); + return res; } Expected> ConfiguredNetworkGroupBase::make_output_vstream_params( bool quantized, hailo_format_type_t format_type, uint32_t timeout_ms, uint32_t queue_size, const std::string &network_name) { - return get_core_op()->make_output_vstream_params(quantized, format_type, timeout_ms, queue_size, network_name); + auto output_vstream_infos = m_network_group_metadata.get_output_vstream_infos(network_name); + CHECK_EXPECTED(output_vstream_infos); + std::map res; + auto status = Hef::Impl::fill_missing_vstream_params_with_default(res, output_vstream_infos.value(), quantized, + format_type, timeout_ms, queue_size); + CHECK_SUCCESS_AS_EXPECTED(status); + return res; } Expected> ConfiguredNetworkGroupBase::get_network_infos() const { - return get_core_op()->get_network_infos(); + return m_network_group_metadata.get_network_infos(); } Expected> ConfiguredNetworkGroupBase::get_all_stream_infos( const std::string &network_name) const { - return get_core_op()->get_all_stream_infos(network_name); + return get_core_op_metadata()->get_all_stream_infos(network_name); } Expected> ConfiguredNetworkGroupBase::get_input_vstream_infos( const std::string &network_name) const { - return get_core_op()->get_input_vstream_infos(network_name); + return m_network_group_metadata.get_input_vstream_infos(network_name); } Expected> ConfiguredNetworkGroupBase::get_output_vstream_infos( const std::string &network_name) const { - return get_core_op()->get_output_vstream_infos(network_name); + return m_network_group_metadata.get_output_vstream_infos(network_name); } Expected> ConfiguredNetworkGroupBase::get_all_vstream_infos( const std::string &network_name) const { - return get_core_op()->get_all_vstream_infos(network_name); + return m_network_group_metadata.get_all_vstream_infos(network_name); } AccumulatorPtr ConfiguredNetworkGroupBase::get_activation_time_accumulator() const @@ -341,67 +488,49 @@ Expected> ConfiguredNetworkGroupBase::create_input_vst return vstreams; } -Expected> ConfiguredNetworkGroupBase::create_output_vstreams(const std::map &outputs_params) +Expected> ConfiguredNetworkGroupBase::create_output_vstreams(const std::map &vstreams_params) { std::vector vstreams; - vstreams.reserve(outputs_params.size()); - auto output_streams = get_output_streams_from_vstream_names(outputs_params); - CHECK_EXPECTED(output_streams); + vstreams.reserve(vstreams_params.size()); + auto all_output_streams_expected = get_output_streams_from_vstream_names(vstreams_params); + CHECK_EXPECTED(all_output_streams_expected); + auto all_output_streams = all_output_streams_expected.release(); auto output_vstream_infos = get_output_vstream_infos(); CHECK_EXPECTED(output_vstream_infos); auto output_vstream_infos_map = vstream_infos_vector_to_map(output_vstream_infos.release()); - // We iterate through all output streams, and if they are nms, we collect them together by their original stream name. - // We need this step because all nms output streams of the same original stream need to be fused together - - std::unordered_map> post_process_nms_ops; - std::set post_process_stream_inputs; - for (auto &op : m_net_flow_ops) { - post_process_nms_ops.insert({op->name, op}); - post_process_stream_inputs.insert(op->input_streams.begin(), op->input_streams.end()); - } - std::map> nms_op_output_streams; - std::map> nms_output_streams; - for (auto &stream_params_pair : output_streams.value()) { - if ((HAILO_FORMAT_ORDER_HAILO_NMS == stream_params_pair.first->get_info().format.order && stream_params_pair.first->get_info().nms_info.is_defused) && - (outputs_params.end() != outputs_params.find(stream_params_pair.first->get_info().nms_info.defuse_info.original_name))) { - auto original_name = stream_params_pair.first->get_info().nms_info.defuse_info.original_name; - nms_output_streams.emplace(original_name, std::pair( - OutputStreamPtrVector(), outputs_params.at(original_name))); - nms_output_streams[original_name].first.push_back(stream_params_pair.first); - } else if (post_process_stream_inputs.count(stream_params_pair.first->get_info().name)) { - for (auto &op : m_net_flow_ops) { - if (op->input_streams.count(stream_params_pair.first->get_info().name)) { - assert(op->op->outputs_metadata().size() == 1); - nms_op_output_streams.emplace(op->name, std::pair( - OutputStreamPtrVector(), outputs_params.at(op->op->outputs_metadata().begin()->first))); - nms_op_output_streams[op->name].first.push_back(stream_params_pair.first); - } - } - } else { - auto outputs = VStreamsBuilderUtils::create_outputs(stream_params_pair.first, stream_params_pair.second, output_vstream_infos_map); - CHECK_EXPECTED(outputs); - vstreams.insert(vstreams.end(), std::make_move_iterator(outputs->begin()), std::make_move_iterator(outputs->end())); + // Building DBs that connect output_vstreams, output_streams and ops. + // Note: Assuming each post process op has a unique output streams. + // In other words, not possible for an output stream to be connected to more than one op + std::unordered_map> post_process_ops; + std::unordered_map op_inputs_to_op_name; + for (auto &op : m_network_group_metadata.m_net_flow_ops) { + post_process_ops.insert({op->name, op}); + for (auto &input_stream : op->input_streams) { + op_inputs_to_op_name.insert({input_stream, op->name}); } } - for (auto &nms_output_stream_pair : nms_output_streams) { - auto outputs = VStreamsBuilderUtils::create_output_nms(nms_output_stream_pair.second.first, nms_output_stream_pair.second.second, - output_vstream_infos_map); - CHECK_EXPECTED(outputs); - vstreams.insert(vstreams.end(), std::make_move_iterator(outputs->begin()), std::make_move_iterator(outputs->end())); - } - for (auto &nms_output_stream_pair : nms_op_output_streams) { - auto op = post_process_nms_ops.at(nms_output_stream_pair.first); - auto outputs = VStreamsBuilderUtils::create_output_post_process_nms(nms_output_stream_pair.second.first, - nms_output_stream_pair.second.second, output_vstream_infos_map, - *op); + + // streams_added is a vector which holds all stream names which vstreams connected to them were already added (for demux cases) + std::vector streams_added; + for (auto &vstream_params : vstreams_params) { + auto output_streams = get_output_streams_by_vstream_name(vstream_params.first); + CHECK_EXPECTED(output_streams); + if (contains(streams_added, static_cast(output_streams.value()[0]->get_info().name))) { + continue; + } + for (auto &output_stream : output_streams.value()) { + streams_added.push_back(output_stream->get_info().name); + } + + auto outputs = VStreamsBuilderUtils::create_output_vstreams_from_streams(all_output_streams, output_streams.value(), vstream_params.second, + post_process_ops, op_inputs_to_op_name, output_vstream_infos_map); CHECK_EXPECTED(outputs); vstreams.insert(vstreams.end(), std::make_move_iterator(outputs->begin()), std::make_move_iterator(outputs->end())); } get_core_op()->set_vstreams_multiplexer_callbacks(vstreams); - return vstreams; } diff --git a/hailort/libhailort/src/network_group/network_group_internal.hpp b/hailort/libhailort/src/network_group/network_group_internal.hpp index 11c5513..31cb962 100644 --- a/hailort/libhailort/src/network_group/network_group_internal.hpp +++ b/hailort/libhailort/src/network_group/network_group_internal.hpp @@ -51,16 +51,17 @@ namespace hailort { +using stream_name_t = std::string; +using op_name_t = std::string; class ConfiguredNetworkGroupBase : public ConfiguredNetworkGroup { public: static Expected> create(const ConfigureNetworkParams &config_params, - std::vector> &&core_ops, std::vector> &&net_flow_ops) + std::vector> &&core_ops, NetworkGroupMetadata &&metadata) { auto net_group_ptr = std::shared_ptr(new (std::nothrow) - ConfiguredNetworkGroupBase(config_params, std::move(core_ops), std::move(net_flow_ops))); - // auto net_group_ptr = make_shared_nothrow(config_params, std::move(core_ops), std::move(net_flow_ops)); + ConfiguredNetworkGroupBase(config_params, std::move(core_ops), std::move(metadata))); CHECK_NOT_NULL_AS_EXPECTED(net_group_ptr, HAILO_OUT_OF_HOST_MEMORY); return net_group_ptr; @@ -118,16 +119,21 @@ public: virtual bool is_multi_context() const override; virtual const ConfigureNetworkParams get_config_params() const override; + virtual Expected run_hw_infer_estimator() override; + // TODO: HRT-9551 - Change to get_core_op_by_name() when multiple core_ops supported std::shared_ptr get_core_op() const; // TODO: HRT-9546 Remove const std::shared_ptr get_core_op_metadata() const; - Expected> get_vstream_names_from_stream_name(const std::string &stream_name); const SupportedFeatures &get_supported_features(); Expected get_stream_batch_size(const std::string &stream_name); + virtual Expected> get_sorted_output_names() override; + virtual Expected> get_stream_names_from_vstream_name(const std::string &vstream_name) override; + virtual Expected> get_vstream_names_from_stream_name(const std::string &stream_name) override; + virtual Expected> create_input_vstreams(const std::map &inputs_params) override; virtual Expected> create_output_vstreams(const std::map &outputs_params) override; @@ -204,7 +210,7 @@ public: private: ConfiguredNetworkGroupBase(const ConfigureNetworkParams &config_params, - std::vector> &&core_ops, std::vector> &&net_flow_ops); + std::vector> &&core_ops, NetworkGroupMetadata &&metadata); static uint16_t get_smallest_configured_batch_size(const ConfigureNetworkParams &config_params); hailo_status create_vdma_input_stream(Device &device, const std::string &stream_name, @@ -225,7 +231,7 @@ private: const ConfigureNetworkParams m_config_params; std::vector> m_core_ops; - std::vector> m_net_flow_ops; + NetworkGroupMetadata m_network_group_metadata; friend class VDeviceCoreOp; friend class VDeviceActivatedCoreOp; @@ -289,6 +295,12 @@ public: virtual bool is_multi_context() const override; virtual const ConfigureNetworkParams get_config_params() const override; + virtual Expected> get_sorted_output_names() override; + virtual Expected> get_stream_names_from_vstream_name(const std::string &vstream_name) override; + virtual Expected> get_vstream_names_from_stream_name(const std::string &stream_name) override; + + virtual Expected run_hw_infer_estimator() override; + virtual Expected> create_input_vstreams(const std::map &inputs_params); virtual Expected> create_output_vstreams(const std::map &outputs_params); @@ -296,7 +308,16 @@ public: virtual hailo_status after_fork_in_parent() override; virtual hailo_status after_fork_in_child() override; + virtual Expected get_client_handle() const override + { + auto val = m_handle; + return val; + }; + + static Expected> duplicate_network_group_client(uint32_t handle, const std::string &network_group_name); + private: + ConfiguredNetworkGroupClient(uint32_t handle, const std::string &network_group_name); hailo_status create_client(); std::unique_ptr m_client; diff --git a/hailort/libhailort/src/os/CMakeLists.txt b/hailort/libhailort/src/os/CMakeLists.txt index 4e52af3..8e8273c 100644 --- a/hailort/libhailort/src/os/CMakeLists.txt +++ b/hailort/libhailort/src/os/CMakeLists.txt @@ -8,7 +8,7 @@ elseif(UNIX) if (CMAKE_SYSTEM_NAME STREQUAL QNX) set(HAILO_FULL_OS_DIR ${HAILO_OS_DIR}/qnx) else() - set(HAILO_FULL_OS_DIR ${HAILO_OS_DIR}/unix) + set(HAILO_FULL_OS_DIR ${HAILO_OS_DIR}/linux) endif() else() message(FATAL_ERROR "Unexpeced platform target, stopping build") @@ -19,13 +19,12 @@ set(HAILO_OS_DIR ${HAILO_OS_DIR} PARENT_SCOPE) set(HAILO_FULL_OS_DIR ${HAILO_FULL_OS_DIR} PARENT_SCOPE) -set(files - ${HAILO_OS_DIR}/microsec_timer.cpp - ${HAILO_OS_DIR}/file_descriptor.cpp - ${HAILO_OS_DIR}/mmap_buffer.cpp - ${HAILO_OS_DIR}/hailort_driver.cpp - ${HAILO_FULL_OS_DIR}/event.cpp - ${HAILO_FULL_OS_DIR}/driver_scan.cpp -) +if(WIN32) + add_subdirectory(windows) +elseif(UNIX) + add_subdirectory(posix) +else() + message(FATAL_ERROR "Unexpeced platform target, stopping build") +endif() -set(HAILORT_CPP_OS_SOURCES ${files} PARENT_SCOPE) +set(HAILORT_CPP_SOURCES ${HAILORT_CPP_SOURCES} ${SRC_FILES} PARENT_SCOPE) \ No newline at end of file diff --git a/hailort/libhailort/src/os/hailort_driver.hpp b/hailort/libhailort/src/os/hailort_driver.hpp index ff2b3ab..7205d59 100755 --- a/hailort/libhailort/src/os/hailort_driver.hpp +++ b/hailort/libhailort/src/os/hailort_driver.hpp @@ -56,6 +56,7 @@ constexpr uint8_t MAX_H2D_CHANNEL_INDEX = 15; constexpr uint8_t MIN_D2H_CHANNEL_INDEX = MAX_H2D_CHANNEL_INDEX + 1; constexpr uint8_t MAX_D2H_CHANNEL_INDEX = 31; +constexpr size_t SIZE_OF_SINGLE_DESCRIPTOR = 0x10; // NOTE: don't change members from this struct without updating all code using it (platform specific) struct ChannelInterruptTimestamp { @@ -85,16 +86,22 @@ struct IrqData { using ChannelsBitmap = std::array; #if defined(__linux__) || defined(_MSC_VER) +// Unique handle returned from the driver. using vdma_mapped_buffer_driver_identifier = uintptr_t; #elif defined(__QNX__) -struct vdma_mapped_buffer_driver_identifier { - shm_handle_t shm_handle; - int shm_fd; -}; +// Identifier is the shared memory file descriptor. +using vdma_mapped_buffer_driver_identifier = int; #else #error "unsupported platform!" #endif // defined(__linux__) || defined(_MSC_VER) +struct DescriptorsListInfo { + uintptr_t handle; // Unique identifier for the driver. + uint64_t dma_address; + size_t desc_count; + void *user_address; +}; + class HailoRTDriver final { public: @@ -110,6 +117,11 @@ public: BOTH }; + enum class DmaSyncDirection { + TO_HOST = 0, + TO_DEVICE + }; + enum class DmaType { PCIE, DRAM @@ -136,7 +148,7 @@ public: using VdmaBufferHandle = size_t; - static Expected create(const std::string &dev_path); + static Expected create(const DeviceInfo &device_info); // TODO: HRT-7309 add implementation for Windows #if defined(__linux__) || defined(__QNX__) @@ -153,7 +165,7 @@ public: hailo_status write_vdma_channel_register(vdma::ChannelId channel_id, DmaDirection data_direction, size_t offset, size_t reg_size, uint32_t data); - hailo_status vdma_buffer_sync(VdmaBufferHandle buffer, DmaDirection sync_direction, size_t offset, size_t count); + hailo_status vdma_buffer_sync(VdmaBufferHandle buffer, DmaSyncDirection sync_direction, size_t offset, size_t count); hailo_status vdma_interrupts_enable(const ChannelsBitmap &channels_bitmap, bool enable_timestamps_measure); hailo_status vdma_interrupts_disable(const ChannelsBitmap &channel_id); @@ -197,18 +209,18 @@ public: hailo_status vdma_buffer_unmap(VdmaBufferHandle handle); /** - * Allocate vdma descriptors buffer that is accessable via kernel mode, user mode and the given board (using DMA). - * + * Allocate vdma descriptors list object that can bind to some buffer. Used for scatter gather vdma. + * * @param[in] desc_count - number of descriptors to allocate. The descriptor max size is DESC_MAX_SIZE. - * @return Upon success, returns Expected of a pair . - * Otherwise, returns Unexpected of ::hailo_status error. + * @param[in] is_circular - if true, the descriptors list can be used in a circular (and desc_count must be power + * of 2) */ - Expected> descriptors_list_create(size_t desc_count); - + Expected descriptors_list_create(size_t desc_count, bool is_circular); + /** - * Frees a vdma descriptors buffer allocated by 'create_descriptors_buffer'. + * Frees a vdma descriptors buffer allocated by 'descriptors_list_create'. */ - hailo_status descriptors_list_release(uintptr_t desc_handle); + hailo_status descriptors_list_release(const DescriptorsListInfo &descriptors_list_info); /** * Configure vdma channel descriptors to point to the given user address. @@ -233,15 +245,14 @@ public: hailo_status vdma_continuous_buffer_free(uintptr_t buffer_handle); /** - * The actual desc page size might be smaller than the once requested, depends on the host capabilities. + * Marks the device as used for vDMA operations. Only one open FD can be marked at once. + * The device is "unmarked" only on FD close. */ - uint16_t calc_desc_page_size(uint16_t requested_size) const + hailo_status mark_as_used(); + + const std::string &device_id() const { - if (m_desc_max_page_size < requested_size) { - LOGGER__WARNING("Requested desc page size ({}) is bigger than max on this host ({}).", - requested_size, m_desc_max_page_size); - } - return static_cast(std::min(static_cast(requested_size), static_cast(m_desc_max_page_size))); + return m_device_info.device_id; } inline DmaType dma_type() const @@ -251,21 +262,8 @@ public: FileDescriptor& fd() {return m_fd;} - const std::string &dev_path() const + inline bool allocate_driver_buffer() const { - return m_dev_path; - } - - hailo_status mark_as_used(); - -#ifdef __QNX__ - inline pid_t resource_manager_pid() const - { - return m_resource_manager_pid; - } -#endif // __QNX__ - - inline bool allocate_driver_buffer() const { return m_allocate_driver_buffer; } @@ -297,7 +295,12 @@ private: hailo_status read_memory_ioctl(MemoryType memory_type, uint64_t address, void *buf, size_t size); hailo_status write_memory_ioctl(MemoryType memory_type, uint64_t address, const void *buf, size_t size); - HailoRTDriver(const std::string &dev_path, FileDescriptor &&fd, hailo_status &status); + Expected> descriptors_list_create_ioctl(size_t desc_count, bool is_circular); + hailo_status descriptors_list_release_ioctl(uintptr_t desc_handle); + Expected descriptors_list_create_mmap(uintptr_t desc_handle, size_t desc_count); + hailo_status descriptors_list_create_munmap(void *address, size_t desc_count); + + HailoRTDriver(const DeviceInfo &device_info, FileDescriptor &&fd, hailo_status &status); bool is_valid_channel_id(const vdma::ChannelId &channel_id); bool is_valid_channels_bitmap(const ChannelsBitmap &bitmap) @@ -313,7 +316,7 @@ private: } FileDescriptor m_fd; - std::string m_dev_path; + DeviceInfo m_device_info; uint16_t m_desc_max_page_size; DmaType m_dma_type; bool m_allocate_driver_buffer; diff --git a/hailort/libhailort/src/os/mmap_buffer.hpp b/hailort/libhailort/src/os/mmap_buffer.hpp index e66cdfd..90c1572 100644 --- a/hailort/libhailort/src/os/mmap_buffer.hpp +++ b/hailort/libhailort/src/os/mmap_buffer.hpp @@ -26,6 +26,10 @@ public: static Expected create_shared_memory(size_t length); static Expected create_file_map(size_t length, FileDescriptor &file, uintptr_t offset); +#if defined(__QNX__) + static Expected create_file_map_nocache(size_t length, FileDescriptor &file, uintptr_t offset); +#endif /* defined(__QNX__) */ + MmapBufferImpl() : m_address(INVALID_ADDR), m_length(0), m_unmappable(false) {} ~MmapBufferImpl() @@ -51,6 +55,8 @@ public: return m_address; } + size_t size() const { return m_length; } + bool is_mapped() const { return (INVALID_ADDR != m_address); @@ -89,6 +95,15 @@ public: return MmapBuffer(std::move(mmap.release())); } +#if defined(__QNX__) + static Expected> create_file_map_nocache(size_t length, FileDescriptor &file, uintptr_t offset) + { + auto mmap = MmapBufferImpl::create_file_map_nocache(length, file, offset); + CHECK_EXPECTED(mmap); + return MmapBuffer(mmap.release()); + } +#endif /* defined(__QNX__) */ + MmapBuffer() = default; ~MmapBuffer() = default; @@ -106,6 +121,8 @@ public: return reinterpret_cast(m_mmap.address()); } + size_t size() const { return m_mmap.size(); } + template std::enable_if_t::value, U&> operator*() { diff --git a/hailort/libhailort/src/os/posix/CMakeLists.txt b/hailort/libhailort/src/os/posix/CMakeLists.txt new file mode 100644 index 0000000..2aa2e8a --- /dev/null +++ b/hailort/libhailort/src/os/posix/CMakeLists.txt @@ -0,0 +1,16 @@ +cmake_minimum_required(VERSION 3.0.0) + +if (CMAKE_SYSTEM_NAME STREQUAL QNX) + add_subdirectory(qnx) +else() + add_subdirectory(linux) +endif() + +set(files + ${CMAKE_CURRENT_SOURCE_DIR}/microsec_timer.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/file_descriptor.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/mmap_buffer.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/hailort_driver.cpp +) + +set(HAILORT_CPP_SOURCES ${HAILORT_CPP_SOURCES} ${files} PARENT_SCOPE) diff --git a/hailort/libhailort/src/os/posix/hailort_driver.cpp b/hailort/libhailort/src/os/posix/hailort_driver.cpp index 47b3a1e..4615f4d 100755 --- a/hailort/libhailort/src/os/posix/hailort_driver.cpp +++ b/hailort/libhailort/src/os/posix/hailort_driver.cpp @@ -107,20 +107,16 @@ const uintptr_t HailoRTDriver::INVALID_DRIVER_BUFFER_HANDLE_VALUE = INVALID_DRIV const size_t HailoRTDriver::INVALID_DRIVER_VDMA_MAPPING_HANDLE_VALUE = INVALID_DRIVER_HANDLE_VALUE; const uint8_t HailoRTDriver::INVALID_VDMA_CHANNEL_INDEX = INVALID_VDMA_CHANNEL; -Expected HailoRTDriver::create(const std::string &dev_path) +Expected HailoRTDriver::create(const DeviceInfo &device_info) { - hailo_status status = HAILO_UNINITIALIZED; + auto fd = FileDescriptor(open(device_info.dev_path.c_str(), O_RDWR)); + CHECK_AS_EXPECTED(fd >= 0, HAILO_DRIVER_FAIL, + "Failed to open device file {} with error {}", device_info.dev_path, errno); - auto fd = FileDescriptor(open(dev_path.c_str(), O_RDWR)); - if (0 > fd) { - LOGGER__ERROR("Failed to open board {}", dev_path); - return make_unexpected(HAILO_OPEN_FILE_FAILURE); - } + hailo_status status = HAILO_UNINITIALIZED; + HailoRTDriver object(device_info, std::move(fd), status); + CHECK_SUCCESS_AS_EXPECTED(status); - HailoRTDriver object(dev_path, std::move(fd), status); - if (HAILO_SUCCESS != status) { - return make_unexpected(status); - } return object; } @@ -155,9 +151,9 @@ static hailo_status validate_driver_version(const hailo_driver_info &driver_info return HAILO_SUCCESS; } -HailoRTDriver::HailoRTDriver(const std::string &dev_path, FileDescriptor &&fd, hailo_status &status) : +HailoRTDriver::HailoRTDriver(const DeviceInfo &device_info, FileDescriptor &&fd, hailo_status &status) : m_fd(std::move(fd)), - m_dev_path(dev_path), + m_device_info(device_info), m_allocate_driver_buffer(false) { hailo_driver_info driver_info = {}; @@ -429,13 +425,13 @@ hailo_status HailoRTDriver::write_memory_ioctl(MemoryType memory_type, uint64_t return HAILO_SUCCESS; } -hailo_status HailoRTDriver::vdma_buffer_sync(VdmaBufferHandle handle, DmaDirection sync_direction, size_t offset, size_t count) +hailo_status HailoRTDriver::vdma_buffer_sync(VdmaBufferHandle handle, DmaSyncDirection sync_direction, + size_t offset, size_t count) { #if defined(__linux__) - CHECK(sync_direction != DmaDirection::BOTH, HAILO_INVALID_ARGUMENT, "Can't sync vdma data both host and device"); hailo_vdma_buffer_sync_params sync_info{ .handle = handle, - .sync_type = (sync_direction == DmaDirection::H2D) ? HAILO_SYNC_FOR_DEVICE : HAILO_SYNC_FOR_HOST, + .sync_type = (sync_direction == DmaSyncDirection::TO_HOST) ? HAILO_SYNC_FOR_CPU : HAILO_SYNC_FOR_DEVICE, .offset = offset, .count = count }; @@ -623,12 +619,11 @@ hailo_status HailoRTDriver::reset_nn_core() return HAILO_SUCCESS; } - + +#if defined(__linux__) Expected HailoRTDriver::vdma_buffer_map(void *user_address, size_t required_size, DmaDirection data_direction, const vdma_mapped_buffer_driver_identifier &driver_buff_handle) { - -#if defined(__linux__) hailo_vdma_buffer_map_params map_user_buffer_info { .user_address = user_address, .size = required_size, @@ -636,29 +631,55 @@ Expected HailoRTDriver::vdma_buffer_map(void *u .allocated_buffer_handle = driver_buff_handle, .mapped_handle = 0 }; + + int err = 0; + auto status = hailo_ioctl(this->m_fd, HAILO_VDMA_BUFFER_MAP, &map_user_buffer_info, err); + if (HAILO_SUCCESS != status) { + LOGGER__ERROR("Failed to map user buffer with errno:{}", err); + return make_unexpected(HAILO_DRIVER_FAIL); + } + + return VdmaBufferHandle(map_user_buffer_info.mapped_handle); +} #elif defined( __QNX__) +Expected HailoRTDriver::vdma_buffer_map(void *user_address, size_t required_size, + DmaDirection data_direction, const vdma_mapped_buffer_driver_identifier &driver_buff_handle) +{ + // Mapping is done by the driver_buff_handle (shm file descriptor), and not by address. + (void)user_address; + + // Create shared memory handle to send to driver + shm_handle_t shm_handle; + int err = shm_create_handle(driver_buff_handle, m_resource_manager_pid, O_RDWR, + &shm_handle, 0); + if (0 != err) { + LOGGER__ERROR("Error creating shm object handle, errno is: {}", errno); + return make_unexpected(HAILO_INTERNAL_FAILURE); + } + hailo_vdma_buffer_map_params map_user_buffer_info { - .shared_memory_handle = driver_buff_handle.shm_handle, + .shared_memory_handle = shm_handle, .size = required_size, .data_direction = direction_to_dma_data_direction(data_direction), .allocated_buffer_handle = INVALID_DRIVER_HANDLE_VALUE, .mapped_handle = 0 }; - (void)user_address; -#else -#error "unsupported platform!" -#endif // __linux__ - - int err = 0; + // Note: The driver will accept the shm_handle, and will mmap it to its own address space. After the driver maps the + // the shm, calling shm_delete_handle is not needed (but can't harm on the otherhand). + // If the ioctl fails, we can't tell if the shm was mapped or not, so we delete it ourself. auto status = hailo_ioctl(this->m_fd, HAILO_VDMA_BUFFER_MAP, &map_user_buffer_info, err); if (HAILO_SUCCESS != status) { LOGGER__ERROR("Failed to map user buffer with errno:{}", err); + shm_delete_handle(shm_handle); return make_unexpected(HAILO_DRIVER_FAIL); } return VdmaBufferHandle(map_user_buffer_info.mapped_handle); } +#else +#error "unsupported platform!" +#endif // __linux__ hailo_status HailoRTDriver::vdma_buffer_unmap(VdmaBufferHandle handle) { @@ -676,9 +697,53 @@ hailo_status HailoRTDriver::vdma_buffer_unmap(VdmaBufferHandle handle) return HAILO_SUCCESS; } -Expected> HailoRTDriver::descriptors_list_create(size_t desc_count) +Expected HailoRTDriver::descriptors_list_create(size_t desc_count, bool is_circular) +{ + auto handle_to_dma_address_pair = descriptors_list_create_ioctl(desc_count, is_circular); + CHECK_EXPECTED(handle_to_dma_address_pair); + + const auto desc_handle = handle_to_dma_address_pair->first; + const auto dma_address = handle_to_dma_address_pair->second; + + auto user_address = descriptors_list_create_mmap(desc_handle, desc_count); + if (!user_address) { + auto status = descriptors_list_release_ioctl(desc_handle); + if (HAILO_SUCCESS != status) { + LOGGER__ERROR("Failed releasing descriptors list, status {}", status); + // continue + } + return make_unexpected(user_address.status()); + } + + return DescriptorsListInfo{desc_handle, dma_address, desc_count, user_address.release()}; +} + +hailo_status HailoRTDriver::descriptors_list_release(const DescriptorsListInfo &descriptors_list_info) { - hailo_desc_list_create_params create_desc_info {.desc_count = desc_count, .desc_handle = 0, .dma_address = 0 }; + hailo_status status = HAILO_SUCCESS; + + auto unmap_status = descriptors_list_create_munmap(descriptors_list_info.user_address, descriptors_list_info.desc_count); + if (HAILO_SUCCESS != unmap_status) { + LOGGER__ERROR("Descriptors list unmap failed with {}", unmap_status); + status = unmap_status; + // continue + } + + auto release_status = descriptors_list_release_ioctl(descriptors_list_info.handle); + if (HAILO_SUCCESS != release_status) { + LOGGER__ERROR("Descriptors list release status failed with {}", release_status); + status = release_status; + // continue + } + + return status; +} + +Expected> HailoRTDriver::descriptors_list_create_ioctl(size_t desc_count, bool is_circular) +{ + hailo_desc_list_create_params create_desc_info{}; + create_desc_info.desc_count = desc_count; + create_desc_info.is_circular = is_circular; int err = 0; auto status = hailo_ioctl(this->m_fd, HAILO_DESC_LIST_CREATE, &create_desc_info, err); @@ -690,7 +755,7 @@ Expected> HailoRTDriver::descriptors_list_create( return std::make_pair(create_desc_info.desc_handle, create_desc_info.dma_address); } -hailo_status HailoRTDriver::descriptors_list_release(uintptr_t desc_handle) +hailo_status HailoRTDriver::descriptors_list_release_ioctl(uintptr_t desc_handle) { int err = 0; auto status = hailo_ioctl(this->m_fd, HAILO_DESC_LIST_RELEASE, &desc_handle, err); @@ -699,9 +764,70 @@ hailo_status HailoRTDriver::descriptors_list_release(uintptr_t desc_handle) return HAILO_DRIVER_FAIL; } - return HAILO_SUCCESS; + return HAILO_SUCCESS; +} + +#if defined(__linux__) +Expected HailoRTDriver::descriptors_list_create_mmap(uintptr_t desc_handle, size_t desc_count) +{ + const size_t buffer_size = desc_count * SIZE_OF_SINGLE_DESCRIPTOR; + void *address = mmap(nullptr, buffer_size, PROT_WRITE | PROT_READ, MAP_SHARED, m_fd, (off_t)desc_handle); + if (MAP_FAILED == address) { + LOGGER__ERROR("Failed to map descriptors list buffer with errno: {}", errno); + return make_unexpected(HAILO_DRIVER_FAIL); + } + return address; +} + +hailo_status HailoRTDriver::descriptors_list_create_munmap(void *address, size_t desc_count) +{ + const size_t buffer_size = desc_count * SIZE_OF_SINGLE_DESCRIPTOR; + if (0 != munmap(address, buffer_size)) { + LOGGER__ERROR("munmap of address {}, length: {} failed with errno: {}", address, buffer_size, errno); + return HAILO_DRIVER_FAIL; + } + return HAILO_SUCCESS; } +#elif defined(__QNX__) + +Expected HailoRTDriver::descriptors_list_create_mmap(uintptr_t desc_handle, size_t desc_count) +{ + const size_t buffer_size = desc_count * SIZE_OF_SINGLE_DESCRIPTOR; + struct hailo_non_linux_desc_list_mmap_params map_vdma_list_params { + .desc_handle = desc_handle, + .size = buffer_size, + .user_address = nullptr, + }; + + int err = 0; + auto status = HailoRTDriver::hailo_ioctl(m_fd, HAILO_NON_LINUX_DESC_LIST_MMAP, &map_vdma_list_params, err); + if (HAILO_SUCCESS != status) { + LOGGER__ERROR("Mmap descriptors list ioctl failed with errno:{}", err); + return make_unexpected(HAILO_DRIVER_FAIL); + } + + void *address = mmap(nullptr, buffer_size, PROT_WRITE | PROT_READ | PROT_NOCACHE, MAP_SHARED | MAP_PHYS, NOFD, + (off_t)map_vdma_list_params.user_address); + CHECK_AS_EXPECTED(MAP_FAILED != address, HAILO_INTERNAL_FAILURE, "Failed to mmap buffer fd with errno:{}", errno); + + return address; +} + +hailo_status HailoRTDriver::descriptors_list_create_munmap(void *address, size_t desc_count) +{ + const size_t buffer_size = desc_count * SIZE_OF_SINGLE_DESCRIPTOR; + if (0 != munmap(address, buffer_size)) { + LOGGER__ERROR("munmap of address {}, length: {} failed with errno: {}", address, buffer_size, errno); + return HAILO_DRIVER_FAIL; + } + return HAILO_SUCCESS; +} + +#else +#error "unsupported platform!" +#endif + hailo_status HailoRTDriver::descriptors_list_bind_vdma_buffer(uintptr_t desc_handle, VdmaBufferHandle buffer_handle, uint16_t desc_page_size, uint8_t channel_index, uint32_t starting_desc) { @@ -764,6 +890,11 @@ Expected> HailoRTDriver::vdma_continuous_buffer_a int err = 0; auto status = hailo_ioctl(this->m_fd, HAILO_VDMA_CONTINUOUS_BUFFER_ALLOC, ¶ms, err); if (HAILO_SUCCESS != status) { + if (ENOMEM == err) { + LOGGER__WARN("Failed to allocate continuous buffer, size 0x{:x}. This failure means there is not a sufficient amount of CMA memory", + size); + return make_unexpected(HAILO_OUT_OF_HOST_CMA_MEMORY); + } LOGGER__ERROR("Failed allocate continuous buffer with errno:{}", err); return make_unexpected(HAILO_DRIVER_FAIL); } diff --git a/hailort/libhailort/src/os/posix/linux/CMakeLists.txt b/hailort/libhailort/src/os/posix/linux/CMakeLists.txt new file mode 100644 index 0000000..cffd810 --- /dev/null +++ b/hailort/libhailort/src/os/posix/linux/CMakeLists.txt @@ -0,0 +1,8 @@ +cmake_minimum_required(VERSION 3.0.0) + +set(files + ${CMAKE_CURRENT_SOURCE_DIR}/driver_scan.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/event.cpp +) + +set(HAILORT_CPP_SOURCES ${HAILORT_CPP_SOURCES} ${files} PARENT_SCOPE) diff --git a/hailort/libhailort/src/os/posix/linux/driver_scan.cpp b/hailort/libhailort/src/os/posix/linux/driver_scan.cpp new file mode 100644 index 0000000..6ba7dae --- /dev/null +++ b/hailort/libhailort/src/os/posix/linux/driver_scan.cpp @@ -0,0 +1,69 @@ +/** + * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved. + * Distributed under the MIT license (https://opensource.org/licenses/MIT) + **/ +/** + * @file driver_scan.cpp + * @brief Parse pcie driver sysfs + **/ + +#include "os/driver_scan.hpp" +#include +#include +#include + +namespace hailort +{ + +#define HAILO_CLASS_PATH ("/sys/class/hailo_chardev") +#define HAILO_BOARD_LOCATION_FILENAME ("board_location") + + +Expected> list_devices() +{ + DIR *dir_iter = opendir(HAILO_CLASS_PATH); + if (!dir_iter) { + if (ENOENT == errno) { + LOGGER__ERROR("Can't find hailo pcie class, this may happen if the driver is not installed (this may happen" + " if the kernel was updated), or if there is no connected Hailo board"); + return make_unexpected(HAILO_PCIE_DRIVER_NOT_INSTALLED); + } + else { + LOGGER__ERROR("Failed to open hailo pcie class ({}), errno {}", HAILO_CLASS_PATH, errno); + return make_unexpected(HAILO_DRIVER_FAIL); + } + } + + std::vector devices; + struct dirent *dir = nullptr; + while ((dir = readdir(dir_iter)) != nullptr) { + std::string device_name(dir->d_name); + if (device_name == "." || device_name == "..") { + continue; + } + devices.push_back(device_name); + } + + closedir(dir_iter); + return devices; +} + +Expected query_device_info(const std::string &device_name) +{ + const std::string device_id_path = std::string(HAILO_CLASS_PATH) + "/" + + device_name + "/" + HAILO_BOARD_LOCATION_FILENAME; + std::ifstream device_id_file(device_id_path); + CHECK_AS_EXPECTED(device_id_file.good(), HAILO_DRIVER_FAIL, "Failed open {}", device_id_path); + + std::string device_id; + std::getline(device_id_file, device_id); + CHECK_AS_EXPECTED(device_id_file.eof(), HAILO_DRIVER_FAIL, "Failed read {}", device_id_path); + + HailoRTDriver::DeviceInfo device_info = {}; + device_info.dev_path = std::string("/dev/") + device_name; + device_info.device_id = device_id; + + return device_info; +} + +} /* namespace hailort */ diff --git a/hailort/libhailort/src/os/posix/linux/event.cpp b/hailort/libhailort/src/os/posix/linux/event.cpp new file mode 100644 index 0000000..c9e753b --- /dev/null +++ b/hailort/libhailort/src/os/posix/linux/event.cpp @@ -0,0 +1,243 @@ +/** + * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved. + * Distributed under the MIT license (https://opensource.org/licenses/MIT) + **/ +/** + * @file event.cpp + * @brief Event & Semaphore wrapper for Unix + **/ + +#include "hailo/hailort.h" +#include "hailo/event.hpp" + +#include "common/utils.hpp" +#include "common/event_internal.hpp" + +#include +#include +#include + + +namespace hailort +{ + +Waitable::~Waitable() +{ + if (-1 != m_handle) { + (void) close(m_handle); + } +} + +Waitable::Waitable(Waitable&& other) : + m_handle(std::exchange(other.m_handle, -1)) +{} + +hailo_status Waitable::wait_for_single_object(underlying_waitable_handle_t handle, std::chrono::milliseconds timeout) +{ + return eventfd_poll(handle, timeout); +} + +hailo_status Waitable::eventfd_poll(underlying_waitable_handle_t fd, std::chrono::milliseconds timeout) +{ + hailo_status status = HAILO_UNINITIALIZED; + struct pollfd pfd{}; + int poll_ret = -1; + + assert(-1 != fd); + + if (UINT32_MAX < timeout.count()) { + status = HAILO_INVALID_ARGUMENT; + LOGGER__ERROR("Invalid timeout value: {}", timeout.count()); + goto l_exit; + } + if (INT_MAX < timeout.count()) { + timeout = std::chrono::milliseconds(INT_MAX); + } + + pfd.fd = fd; + pfd.events = POLLIN; + do { + poll_ret = poll(&pfd, 1, static_cast(timeout.count())); + } while ((0 > poll_ret) && (EINTR == poll_ret)); + + if (0 == poll_ret) { + LOGGER__TRACE("Timeout"); + status = HAILO_TIMEOUT; + goto l_exit; + } + if (0 > poll_ret) { + LOGGER__ERROR("poll failed with errno={}", errno); + status = HAILO_INTERNAL_FAILURE; + goto l_exit; + } + if (0 == (pfd.revents & POLLIN)) { + LOGGER__ERROR("pfd not in read state. revents={}", pfd.revents); + status = HAILO_INTERNAL_FAILURE; + goto l_exit; + } + + status = HAILO_SUCCESS; +l_exit: + return status; +} + +hailo_status Waitable::eventfd_read(underlying_waitable_handle_t fd) +{ + hailo_status status = HAILO_UNINITIALIZED; + ssize_t read_ret = -1; + uint64_t dummy; + + assert(-1 != fd); + + read_ret = read(fd, &dummy, sizeof(dummy)); + if (sizeof(dummy) != read_ret) { + LOGGER__ERROR("read failed. bytes_read={}, expected={}, errno={}", read_ret, sizeof(dummy), errno); + status = HAILO_INTERNAL_FAILURE; + goto l_exit; + } + + status = HAILO_SUCCESS; +l_exit: + return status; +} + +hailo_status Waitable::eventfd_write(underlying_waitable_handle_t fd) +{ + hailo_status status = HAILO_UNINITIALIZED; + ssize_t write_ret = -1; + uint64_t buffer = 1; + + assert(-1 != fd); + + write_ret = write(fd, &buffer, sizeof(buffer)); + if (sizeof(buffer) != write_ret) { + LOGGER__ERROR("write failed. bytes_written={}, expected={}, errno={}", write_ret, sizeof(buffer), errno); + status = HAILO_INTERNAL_FAILURE; + goto l_exit; + } + + status = HAILO_SUCCESS; +l_exit: + return status; +} + +Expected Event::create(const State& initial_state) +{ + const auto handle = open_event_handle(initial_state); + if (-1 == handle) { + return make_unexpected(HAILO_INTERNAL_FAILURE); + } + return Event(handle); +} + +EventPtr Event::create_shared(const State& initial_state) +{ + const auto handle = open_event_handle(initial_state); + if (-1 == handle) { + return nullptr; + } + + return make_shared_nothrow(handle); +} + +hailo_status Event::signal() +{ + return eventfd_write(m_handle); +} + +bool Event::is_auto_reset() +{ + return false; +} + +hailo_status Event::reset() +{ + if (HAILO_TIMEOUT == wait(std::chrono::seconds(0))) { + // Event is not set nothing to do, otherwise `eventfd_read` would block forever + return HAILO_SUCCESS; + } + return eventfd_read(m_handle); +} + +underlying_waitable_handle_t Event::open_event_handle(const State& initial_state) +{ + static const int NO_FLAGS = 0; + const int state = initial_state == State::signalled ? 1 : 0; + const auto handle = eventfd(state, NO_FLAGS); + if (-1 == handle) { + LOGGER__ERROR("Call to eventfd failed with errno={}", errno); + } + return handle; +} + +Expected Semaphore::create(uint32_t initial_count) +{ + const auto handle = open_semaphore_handle(initial_count); + if (-1 == handle) { + return make_unexpected(HAILO_INTERNAL_FAILURE); + } + return Semaphore(handle); +} + +SemaphorePtr Semaphore::create_shared(uint32_t initial_count) +{ + const auto handle = open_semaphore_handle(initial_count); + if (-1 == handle) { + return nullptr; + } + + return make_shared_nothrow(handle); +} + +hailo_status Semaphore::signal() +{ + return eventfd_write(m_handle); +} + +bool Semaphore::is_auto_reset() +{ + return true; +} + +hailo_status Semaphore::post_wait() +{ + return eventfd_read(m_handle); +} + +underlying_waitable_handle_t Semaphore::open_semaphore_handle(uint32_t initial_count) +{ + static const int SEMAPHORE = EFD_SEMAPHORE; + const auto handle = eventfd(initial_count, SEMAPHORE); + if (-1 == handle) { + LOGGER__ERROR("Call to eventfd failed with errno={}", errno); + } + return handle; +} + +Expected WaitableGroup::wait_any(std::chrono::milliseconds timeout) +{ + int poll_ret = -1; + do { + poll_ret = poll(m_waitable_handles.data(), m_waitable_handles.size(), static_cast(timeout.count())); + } while ((0 > poll_ret) && (EINTR == poll_ret)); + + if (0 == poll_ret) { + LOGGER__TRACE("Timeout"); + return make_unexpected(HAILO_TIMEOUT); + } + CHECK_AS_EXPECTED(poll_ret > 0, HAILO_INTERNAL_FAILURE, "poll failed with errno={}", errno); + + for (size_t index = 0; index < m_waitable_handles.size(); index++) { + if (m_waitable_handles[index].revents & POLLIN) { + auto status = m_waitables[index].get().post_wait(); + CHECK_SUCCESS_AS_EXPECTED(status); + + return index; + } + } + + LOGGER__ERROR("None of the pollfd are in read state"); + return make_unexpected(HAILO_INTERNAL_FAILURE); +} + +} /* namespace hailort */ diff --git a/hailort/libhailort/src/os/posix/mmap_buffer.cpp b/hailort/libhailort/src/os/posix/mmap_buffer.cpp index 0d2ff57..0939118 100644 --- a/hailort/libhailort/src/os/posix/mmap_buffer.cpp +++ b/hailort/libhailort/src/os/posix/mmap_buffer.cpp @@ -44,33 +44,19 @@ Expected MmapBufferImpl::create_shared_memory(size_t length) Expected MmapBufferImpl::create_file_map(size_t length, FileDescriptor &file, uintptr_t offset) { -#ifdef __linux__ void *address = mmap(nullptr, length, PROT_WRITE | PROT_READ, MAP_SHARED, file, (off_t)offset); CHECK_AS_EXPECTED(INVALID_ADDR != address, HAILO_INTERNAL_FAILURE, "Failed to mmap buffer fd with errno:{}", errno); -#elif defined(__QNX__) - - // TODO change name of struct - using this sturct because itis exact fields we need ro qnx mmap too (where user address is physical addr) - struct hailo_non_linux_desc_list_mmap_params map_vdma_list_params { - .desc_handle = offset, - .size = length, - .user_address = nullptr, - }; - - int err = 0; - auto status = HailoRTDriver::hailo_ioctl(file, HAILO_NON_LINUX_DESC_LIST_MMAP, &map_vdma_list_params, err); - if (HAILO_SUCCESS != status) { - LOGGER__ERROR("HAILO_NON_LINUX_DESC_LIST_MMAP failed with errno:{}", err); - return make_unexpected(HAILO_DRIVER_FAIL); - } + return MmapBufferImpl(address, length); +} - void *address = mmap(nullptr, length, PROT_WRITE | PROT_READ | PROT_NOCACHE, MAP_SHARED | MAP_PHYS, NOFD, (off_t)map_vdma_list_params.user_address); +#if defined(__QNX__) +Expected MmapBufferImpl::create_file_map_nocache(size_t length, FileDescriptor &file, uintptr_t offset) +{ + void *address = mmap(nullptr, length, PROT_WRITE | PROT_READ | PROT_NOCACHE, MAP_SHARED, file, (off_t)offset); CHECK_AS_EXPECTED(INVALID_ADDR != address, HAILO_INTERNAL_FAILURE, "Failed to mmap buffer fd with errno:{}", errno); -#else -#error "unsupported platform!" -#endif // __linux__ - return MmapBufferImpl(address, length); } +#endif /* defined(__QNX__) */ hailo_status MmapBufferImpl::unmap() { diff --git a/hailort/libhailort/src/os/posix/qnx/CMakeLists.txt b/hailort/libhailort/src/os/posix/qnx/CMakeLists.txt new file mode 100644 index 0000000..cffd810 --- /dev/null +++ b/hailort/libhailort/src/os/posix/qnx/CMakeLists.txt @@ -0,0 +1,8 @@ +cmake_minimum_required(VERSION 3.0.0) + +set(files + ${CMAKE_CURRENT_SOURCE_DIR}/driver_scan.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/event.cpp +) + +set(HAILORT_CPP_SOURCES ${HAILORT_CPP_SOURCES} ${files} PARENT_SCOPE) diff --git a/hailort/libhailort/src/os/posix/qnx/event.cpp b/hailort/libhailort/src/os/posix/qnx/event.cpp index 9303b93..94f1617 100644 --- a/hailort/libhailort/src/os/posix/qnx/event.cpp +++ b/hailort/libhailort/src/os/posix/qnx/event.cpp @@ -14,8 +14,7 @@ #include "hailo/event.hpp" #include "common/utils.hpp" - -#include "utils/event_internal.hpp" +#include "common/event_internal.hpp" #include #include @@ -31,10 +30,6 @@ namespace hailort { -Waitable::Waitable(underlying_waitable_handle_t handle) : - m_handle(handle) -{} - Waitable::~Waitable() { if (INVALID_EVENT_HANDLE != m_handle) { @@ -49,11 +44,6 @@ Waitable::Waitable(Waitable&& other) : m_handle(std::exchange(other.m_handle, INVALID_EVENT_HANDLE)) {} -underlying_waitable_handle_t Waitable::get_underlying_handle() -{ - return m_handle; -} - hailo_status Waitable::wait_for_single_object(underlying_waitable_handle_t handle, std::chrono::milliseconds timeout) { const size_t timeout_ms = (timeout.count() > INT_MAX) ? INT_MAX : static_cast(timeout.count()); @@ -88,11 +78,6 @@ EventPtr Event::create_shared(const State& initial_state) return make_shared_nothrow(handle); } -hailo_status Event::wait(std::chrono::milliseconds timeout) -{ - return wait_for_single_object(m_handle, timeout); -} - hailo_status Event::signal() { const auto result = neosmart::SetEvent(m_handle); @@ -144,27 +129,6 @@ SemaphorePtr Semaphore::create_shared(uint32_t initial_count) return make_shared_nothrow(handle, initial_count); } -hailo_status Semaphore::wait(std::chrono::milliseconds timeout) -{ - auto wait_result = wait_for_single_object(m_handle, timeout); - if (HAILO_SUCCESS == wait_result) { - m_sem_mutex.lock(); - if (0 == m_count.load()) { - LOGGER__ERROR("Waiting on semaphore with 0 value"); - } - if (m_count > 0) { - m_count--; - } - // After decrementing the value of the semaphore - check if the new value is bigger than 0 and if it is signal the event - if (m_count > 0) { - neosmart::SetEvent(m_handle); - } - m_sem_mutex.unlock(); - } - - return wait_result; -} - hailo_status Semaphore::signal() { m_sem_mutex.lock(); @@ -208,71 +172,41 @@ Semaphore::Semaphore(Semaphore&& other) : other.m_sem_mutex.unlock(); } -WaitOrShutdown::WaitOrShutdown(WaitablePtr waitable, EventPtr shutdown_event) : - m_waitable(waitable), - m_shutdown_event(shutdown_event), - m_wait_handle_array(create_wait_handle_array(waitable, shutdown_event)) -{} +hailo_status Semaphore::post_wait() +{ + std::unique_lock lock(m_sem_mutex); + CHECK(m_count.load() > 0, HAILO_INTERNAL_FAILURE, "Wait returned on semaphore with 0 value"); -void Event::post_wait() -{} + m_count--; -void Semaphore::post_wait(){ - m_sem_mutex.lock(); - if (0 == m_count.load()) { - LOGGER__ERROR("Wait Returned on semaphore with 0 value"); - } - if (m_count > 0) { - m_count--; - } // After decrementing the value of the semaphore - check if the new value is bigger than 0 and if it is signal the event if (m_count > 0) { neosmart::SetEvent(m_handle); } - m_sem_mutex.unlock(); + + return HAILO_SUCCESS; } -hailo_status WaitOrShutdown::wait(std::chrono::milliseconds timeout) +Expected WaitableGroup::wait_any(std::chrono::milliseconds timeout) { int wait_index = -1; const uint64_t timeout_ms = (timeout.count() > INT_MAX) ? INT_MAX : static_cast(timeout.count()); - const auto wait_result = neosmart::WaitForMultipleEvents(m_wait_handle_array.data(), static_cast(m_wait_handle_array.size()), - false, timeout_ms, wait_index); - // If semaphore need to subtract from counter + const bool WAIT_FOR_ANY = false; + const auto wait_result = neosmart::WaitForMultipleEvents(m_waitable_handles.data(), + static_cast(m_waitable_handles.size()), WAIT_FOR_ANY, timeout_ms, wait_index); if (0 != wait_result) { if (ETIMEDOUT == wait_result) { - return HAILO_TIMEOUT; + return make_unexpected(HAILO_TIMEOUT); } else { LOGGER__ERROR("WaitForMultipleEvents Failed, error: {}", wait_result); - return HAILO_INTERNAL_FAILURE; + return make_unexpected(HAILO_INTERNAL_FAILURE); } } - - if (WAITABLE_INDEX == wait_index) { - // Meaning it can be a semaphore object - m_waitable->post_wait(); - return HAILO_SUCCESS; - } else if (SHUTDOWN_INDEX == wait_index) { - return HAILO_SHUTDOWN_EVENT_SIGNALED; - } else { - LOGGER__ERROR("Invalid event index signalled in WaitForMultipleEventsFailed, index: {}", wait_index); - return HAILO_INTERNAL_FAILURE; - } -} -hailo_status WaitOrShutdown::signal() -{ - return m_waitable->signal(); -} + auto status = m_waitables[wait_index].get().post_wait(); + CHECK_SUCCESS_AS_EXPECTED(status); -WaitOrShutdown::WaitHandleArray WaitOrShutdown::create_wait_handle_array(WaitablePtr waitable, EventPtr shutdown_event) -{ - // Note the order! - WaitHandleArray handles{ - shutdown_event->get_underlying_handle(), - waitable->get_underlying_handle() - }; - return handles; + return wait_index; } } /* namespace hailort */ diff --git a/hailort/libhailort/src/os/posix/unix/driver_scan.cpp b/hailort/libhailort/src/os/posix/unix/driver_scan.cpp deleted file mode 100644 index 6ba7dae..0000000 --- a/hailort/libhailort/src/os/posix/unix/driver_scan.cpp +++ /dev/null @@ -1,69 +0,0 @@ -/** - * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved. - * Distributed under the MIT license (https://opensource.org/licenses/MIT) - **/ -/** - * @file driver_scan.cpp - * @brief Parse pcie driver sysfs - **/ - -#include "os/driver_scan.hpp" -#include -#include -#include - -namespace hailort -{ - -#define HAILO_CLASS_PATH ("/sys/class/hailo_chardev") -#define HAILO_BOARD_LOCATION_FILENAME ("board_location") - - -Expected> list_devices() -{ - DIR *dir_iter = opendir(HAILO_CLASS_PATH); - if (!dir_iter) { - if (ENOENT == errno) { - LOGGER__ERROR("Can't find hailo pcie class, this may happen if the driver is not installed (this may happen" - " if the kernel was updated), or if there is no connected Hailo board"); - return make_unexpected(HAILO_PCIE_DRIVER_NOT_INSTALLED); - } - else { - LOGGER__ERROR("Failed to open hailo pcie class ({}), errno {}", HAILO_CLASS_PATH, errno); - return make_unexpected(HAILO_DRIVER_FAIL); - } - } - - std::vector devices; - struct dirent *dir = nullptr; - while ((dir = readdir(dir_iter)) != nullptr) { - std::string device_name(dir->d_name); - if (device_name == "." || device_name == "..") { - continue; - } - devices.push_back(device_name); - } - - closedir(dir_iter); - return devices; -} - -Expected query_device_info(const std::string &device_name) -{ - const std::string device_id_path = std::string(HAILO_CLASS_PATH) + "/" + - device_name + "/" + HAILO_BOARD_LOCATION_FILENAME; - std::ifstream device_id_file(device_id_path); - CHECK_AS_EXPECTED(device_id_file.good(), HAILO_DRIVER_FAIL, "Failed open {}", device_id_path); - - std::string device_id; - std::getline(device_id_file, device_id); - CHECK_AS_EXPECTED(device_id_file.eof(), HAILO_DRIVER_FAIL, "Failed read {}", device_id_path); - - HailoRTDriver::DeviceInfo device_info = {}; - device_info.dev_path = std::string("/dev/") + device_name; - device_info.device_id = device_id; - - return device_info; -} - -} /* namespace hailort */ diff --git a/hailort/libhailort/src/os/posix/unix/event.cpp b/hailort/libhailort/src/os/posix/unix/event.cpp deleted file mode 100644 index 4c4525e..0000000 --- a/hailort/libhailort/src/os/posix/unix/event.cpp +++ /dev/null @@ -1,299 +0,0 @@ -/** - * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved. - * Distributed under the MIT license (https://opensource.org/licenses/MIT) - **/ -/** - * @file event.cpp - * @brief Event & Semaphore wrapper for Unix - * - * TODO: doc - **/ - -#include "hailo/hailort.h" -#include "hailo/event.hpp" - -#include "common/utils.hpp" - -#include "utils/event_internal.hpp" - -#include -#include -#include - - -namespace hailort -{ - -Waitable::Waitable(underlying_waitable_handle_t handle) : - m_handle(handle) -{} - -Waitable::~Waitable() -{ - if (-1 != m_handle) { - (void) close(m_handle); - } -} - -Waitable::Waitable(Waitable&& other) : - m_handle(std::exchange(other.m_handle, -1)) -{} - -underlying_waitable_handle_t Waitable::get_underlying_handle() -{ - return m_handle; -} - -hailo_status Waitable::eventfd_poll(underlying_waitable_handle_t fd, std::chrono::milliseconds timeout) -{ - hailo_status status = HAILO_UNINITIALIZED; - struct pollfd pfd{}; - int poll_ret = -1; - - assert(-1 != fd); - - if (UINT32_MAX < timeout.count()) { - status = HAILO_INVALID_ARGUMENT; - LOGGER__ERROR("Invalid timeout value: {}", timeout.count()); - goto l_exit; - } - if (INT_MAX < timeout.count()) { - timeout = std::chrono::milliseconds(INT_MAX); - } - - pfd.fd = fd; - pfd.events = POLLIN; - do { - poll_ret = poll(&pfd, 1, static_cast(timeout.count())); - } while ((0 > poll_ret) && (EINTR == poll_ret)); - - if (0 == poll_ret) { - LOGGER__TRACE("Timeout"); - status = HAILO_TIMEOUT; - goto l_exit; - } - if (0 > poll_ret) { - LOGGER__ERROR("poll failed with errno={}", errno); - status = HAILO_INTERNAL_FAILURE; - goto l_exit; - } - if (0 == (pfd.revents & POLLIN)) { - LOGGER__ERROR("pfd not in read state. revents={}", pfd.revents); - status = HAILO_INTERNAL_FAILURE; - goto l_exit; - } - - status = HAILO_SUCCESS; -l_exit: - return status; -} - -hailo_status Waitable::eventfd_read(underlying_waitable_handle_t fd) -{ - hailo_status status = HAILO_UNINITIALIZED; - ssize_t read_ret = -1; - uint64_t dummy; - - assert(-1 != fd); - - read_ret = read(fd, &dummy, sizeof(dummy)); - if (sizeof(dummy) != read_ret) { - LOGGER__ERROR("read failed. bytes_read={}, expected={}, errno={}", read_ret, sizeof(dummy), errno); - status = HAILO_INTERNAL_FAILURE; - goto l_exit; - } - - status = HAILO_SUCCESS; -l_exit: - return status; -} - -hailo_status Waitable::eventfd_write(underlying_waitable_handle_t fd) -{ - hailo_status status = HAILO_UNINITIALIZED; - ssize_t write_ret = -1; - uint64_t buffer = 1; - - assert(-1 != fd); - - write_ret = write(fd, &buffer, sizeof(buffer)); - if (sizeof(buffer) != write_ret) { - LOGGER__ERROR("write failed. bytes_written={}, expected={}, errno={}", write_ret, sizeof(buffer), errno); - status = HAILO_INTERNAL_FAILURE; - goto l_exit; - } - - status = HAILO_SUCCESS; -l_exit: - return status; -} - -Expected Event::create(const State& initial_state) -{ - const auto handle = open_event_handle(initial_state); - if (-1 == handle) { - return make_unexpected(HAILO_INTERNAL_FAILURE); - } - return Event(handle); -} - -EventPtr Event::create_shared(const State& initial_state) -{ - const auto handle = open_event_handle(initial_state); - if (-1 == handle) { - return nullptr; - } - - return make_shared_nothrow(handle); -} - -hailo_status Event::wait(std::chrono::milliseconds timeout) -{ - return eventfd_poll(m_handle, timeout); -} - -hailo_status Event::signal() -{ - return eventfd_write(m_handle); -} - -bool Event::is_auto_reset() -{ - return false; -} - -hailo_status Event::reset() -{ - if (HAILO_TIMEOUT == wait(std::chrono::seconds(0))) { - // Event is not set nothing to do, otherwise `eventfd_read` would block forever - return HAILO_SUCCESS; - } - return eventfd_read(m_handle); -} - -underlying_waitable_handle_t Event::open_event_handle(const State& initial_state) -{ - static const int NO_FLAGS = 0; - const int state = initial_state == State::signalled ? 1 : 0; - const auto handle = eventfd(state, NO_FLAGS); - if (-1 == handle) { - LOGGER__ERROR("Call to eventfd failed with errno={}", errno); - } - return handle; -} - -Expected Semaphore::create(uint32_t initial_count) -{ - const auto handle = open_semaphore_handle(initial_count); - if (-1 == handle) { - return make_unexpected(HAILO_INTERNAL_FAILURE); - } - return Semaphore(handle); -} - -SemaphorePtr Semaphore::create_shared(uint32_t initial_count) -{ - const auto handle = open_semaphore_handle(initial_count); - if (-1 == handle) { - return nullptr; - } - - return make_shared_nothrow(handle); -} - -hailo_status Semaphore::wait(std::chrono::milliseconds timeout) -{ - // TODO: See SDK-16568 (might be necessary in the future) - hailo_status status = eventfd_poll(m_handle, timeout); - if (HAILO_TIMEOUT == status) { - LOGGER__INFO("eventfd_poll failed, status = {}", status); - return status; - } - CHECK_SUCCESS(status); - - status = eventfd_read(m_handle); - CHECK_SUCCESS(status); - - return HAILO_SUCCESS; -} - -hailo_status Semaphore::signal() -{ - return eventfd_write(m_handle); -} - -bool Semaphore::is_auto_reset() -{ - return true; -} - -underlying_waitable_handle_t Semaphore::open_semaphore_handle(uint32_t initial_count) -{ - static const int SEMAPHORE = EFD_SEMAPHORE; - const auto handle = eventfd(initial_count, SEMAPHORE); - if (-1 == handle) { - LOGGER__ERROR("Call to eventfd failed with errno={}", errno); - } - return handle; -} - -WaitOrShutdown::WaitOrShutdown(WaitablePtr waitable, EventPtr shutdown_event) : - m_waitable(waitable), - m_shutdown_event(shutdown_event), - m_wait_handle_array(create_wait_handle_array(waitable, shutdown_event)) -{} - -hailo_status WaitOrShutdown::wait(std::chrono::milliseconds timeout) -{ - int poll_ret = -1; - do { - poll_ret = poll(m_wait_handle_array.data(), m_wait_handle_array.size(), static_cast(timeout.count())); - } while ((0 > poll_ret) && (EINTR == poll_ret)); - - if (0 == poll_ret) { - LOGGER__TRACE("Timeout"); - return HAILO_TIMEOUT; - } - if (0 > poll_ret) { - LOGGER__ERROR("poll failed with errno={}", errno); - return HAILO_INTERNAL_FAILURE; - } - if ((0 == (m_wait_handle_array[WAITABLE_INDEX].revents & POLLIN)) && - (0 == (m_wait_handle_array[SHUTDOWN_INDEX].revents & POLLIN))) { - LOGGER__ERROR("Both pfds not in read state: waitable.revents={}, shutdown.revents={}", - m_wait_handle_array[WAITABLE_INDEX].revents, m_wait_handle_array[SHUTDOWN_INDEX].revents); - return HAILO_INTERNAL_FAILURE; - } - - if (m_wait_handle_array[SHUTDOWN_INDEX].revents & POLLIN) { - return HAILO_SHUTDOWN_EVENT_SIGNALED; - } - - if (m_waitable->is_auto_reset() && (m_wait_handle_array[WAITABLE_INDEX].revents & POLLIN)) { - uint64_t dummy; - ssize_t read_ret = read(m_wait_handle_array[WAITABLE_INDEX].fd, &dummy, sizeof(dummy)); - if (sizeof(dummy) != read_ret) { - LOGGER__ERROR("read failed. bytes_read={}, expected={}, errno={}", read_ret, sizeof(dummy), errno); - return HAILO_INTERNAL_FAILURE; - } - } - - return HAILO_SUCCESS; -} - -hailo_status WaitOrShutdown::signal() -{ - return m_waitable->signal(); -} - -WaitOrShutdown::WaitHandleArray WaitOrShutdown::create_wait_handle_array(WaitablePtr waitable, EventPtr shutdown_event) -{ - // Note the order! - WaitHandleArray pfds{{ - {shutdown_event->get_underlying_handle(), POLLIN, 0}, - {waitable->get_underlying_handle(), POLLIN, 0} - }}; - return pfds; -} - -} /* namespace hailort */ diff --git a/hailort/libhailort/src/os/windows/CMakeLists.txt b/hailort/libhailort/src/os/windows/CMakeLists.txt new file mode 100644 index 0000000..bba4bed --- /dev/null +++ b/hailort/libhailort/src/os/windows/CMakeLists.txt @@ -0,0 +1,13 @@ +cmake_minimum_required(VERSION 3.0.0) + +set(files + ${CMAKE_CURRENT_SOURCE_DIR}/microsec_timer.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/file_descriptor.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/mmap_buffer.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/hailort_driver.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/event.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/driver_scan.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/virtual_alloc_guard.cpp +) + +set(HAILORT_CPP_SOURCES ${HAILORT_CPP_SOURCES} ${files} PARENT_SCOPE) diff --git a/hailort/libhailort/src/os/windows/driver_scan.cpp b/hailort/libhailort/src/os/windows/driver_scan.cpp index 3870675..cec1bb6 100644 --- a/hailort/libhailort/src/os/windows/driver_scan.cpp +++ b/hailort/libhailort/src/os/windows/driver_scan.cpp @@ -150,7 +150,7 @@ Expected> list_devices() &guid, NULL, CM_GET_DEVICE_INTERFACE_LIST_PRESENT); - CHECK_AS_EXPECTED(cr == CR_SUCCESS && len >= 2, HAILO_PCIE_DRIVER_NOT_INSTALLED, + CHECK_AS_EXPECTED((cr == CR_SUCCESS) && (len > 0), HAILO_PCIE_DRIVER_NOT_INSTALLED, "Driver interface not found error {}", cr); std::string names_str; diff --git a/hailort/libhailort/src/os/windows/event.cpp b/hailort/libhailort/src/os/windows/event.cpp index ab1a35d..d13f29a 100644 --- a/hailort/libhailort/src/os/windows/event.cpp +++ b/hailort/libhailort/src/os/windows/event.cpp @@ -11,8 +11,7 @@ #include "hailo/event.hpp" #include "common/utils.hpp" - -#include "utils/event_internal.hpp" +#include "common/event_internal.hpp" #include #include @@ -21,10 +20,6 @@ namespace hailort { -Waitable::Waitable(underlying_waitable_handle_t handle) : - m_handle(handle) -{} - Waitable::~Waitable() { if (nullptr != m_handle) { @@ -36,11 +31,6 @@ Waitable::Waitable(Waitable&& other) : m_handle(std::exchange(other.m_handle, nullptr)) {} -underlying_waitable_handle_t Waitable::get_underlying_handle() -{ - return m_handle; -} - static DWORD timeout_millies(long long value) { DWORD millies = static_cast(value); @@ -89,11 +79,6 @@ EventPtr Event::create_shared(const State& initial_state) return make_shared_nothrow(handle); } -hailo_status Event::wait(std::chrono::milliseconds timeout) -{ - return wait_for_single_object(m_handle, timeout); -} - hailo_status Event::signal() { const auto result = SetEvent(m_handle); @@ -153,11 +138,6 @@ SemaphorePtr Semaphore::create_shared(uint32_t initial_count) return make_shared_nothrow(handle); } -hailo_status Semaphore::wait(std::chrono::milliseconds timeout) -{ - return wait_for_single_object(m_handle, timeout); -} - hailo_status Semaphore::signal() { static const LONG INCREMENT_BY_ONE = 1; @@ -176,6 +156,12 @@ bool Semaphore::is_auto_reset() return true; } +hailo_status Semaphore::post_wait() +{ + // On windows, after wait on semaphore the counters decrease automatically. + return HAILO_SUCCESS; +} + underlying_waitable_handle_t Semaphore::open_semaphore_handle(uint32_t initial_count) { static const LPSECURITY_ATTRIBUTES NO_INHERITANCE = nullptr; @@ -188,45 +174,24 @@ underlying_waitable_handle_t Semaphore::open_semaphore_handle(uint32_t initial_c return handle; } -WaitOrShutdown::WaitOrShutdown(WaitablePtr waitable, EventPtr shutdown_event) : - m_waitable(waitable), - m_shutdown_event(shutdown_event), - m_wait_handle_array(create_wait_handle_array(waitable, shutdown_event)) -{} - -hailo_status WaitOrShutdown::wait(std::chrono::milliseconds timeout) +Expected WaitableGroup::wait_any(std::chrono::milliseconds timeout) { DWORD wait_millies = timeout_millies(timeout.count()); - static const BOOL WAIT_FOR_ANY = false; - const auto wait_result = WaitForMultipleObjects(static_cast(m_wait_handle_array.size()), - m_wait_handle_array.data(), WAIT_FOR_ANY, wait_millies); - switch (wait_result) { - case WAIT_OBJECT_0 + WAITABLE_INDEX: - return HAILO_SUCCESS; - case WAIT_OBJECT_0 + SHUTDOWN_INDEX: - return HAILO_SHUTDOWN_EVENT_SIGNALED; - case WAIT_TIMEOUT: - return HAILO_TIMEOUT; - default: - LOGGER__ERROR("WaitForMultipleObjects returned {}, last_error={}", wait_result, GetLastError()); - return HAILO_INTERNAL_FAILURE; + const auto WAIT_OBJECT_N = WAIT_OBJECT_0 + m_waitable_handles.size(); + const bool WAIT_FOR_ANY = false; + const auto wait_result = WaitForMultipleObjects(static_cast(m_waitable_handles.size()), + m_waitable_handles.data(), WAIT_FOR_ANY, wait_millies); + if (wait_result == WAIT_TIMEOUT) { + return make_unexpected(HAILO_TIMEOUT); + } else if ((wait_result >= WAIT_OBJECT_0) && (wait_result < WAIT_OBJECT_N)) { + // Object is signaled. + // Note! On windows there is no need to call post_wait() because it is done automatically. + return wait_result - WAIT_OBJECT_0; + } else { + LOGGER__ERROR("WaitForMultipleObjects returned {}, last_error={}", wait_result, GetLastError()); + return make_unexpected(HAILO_INTERNAL_FAILURE); } } -hailo_status WaitOrShutdown::signal() -{ - return m_waitable->signal(); -} - -WaitOrShutdown::WaitHandleArray WaitOrShutdown::create_wait_handle_array(WaitablePtr waitable, EventPtr shutdown_event) -{ - // Note the order! - WaitHandleArray handles{ - shutdown_event->get_underlying_handle(), - waitable->get_underlying_handle() - }; - return handles; -} - } /* namespace hailort */ diff --git a/hailort/libhailort/src/os/windows/hailort_driver.cpp b/hailort/libhailort/src/os/windows/hailort_driver.cpp index 7707978..57b0db6 100644 --- a/hailort/libhailort/src/os/windows/hailort_driver.cpp +++ b/hailort/libhailort/src/os/windows/hailort_driver.cpp @@ -294,9 +294,9 @@ static hailo_status validate_driver_version(const hailo_driver_info &driver_info return HAILO_SUCCESS; } -HailoRTDriver::HailoRTDriver(const std::string &dev_path, FileDescriptor &&fd, hailo_status &status) : +HailoRTDriver::HailoRTDriver(const DeviceInfo &device_info, FileDescriptor &&fd, hailo_status &status) : m_fd(std::move(fd)), - m_dev_path(dev_path), + m_device_info(device_info), m_allocate_driver_buffer(false) { tCompatibleHailoIoctlData data = {}; @@ -353,17 +353,17 @@ Expected> HailoRTDriver::scan_devices() return devices_info; } -Expected HailoRTDriver::create(const std::string &dev_path) +Expected HailoRTDriver::create(const DeviceInfo &device_info) { hailo_status status = HAILO_UNINITIALIZED; - CDeviceFile f(dev_path); + CDeviceFile f(device_info.dev_path); if (!f.Present()) { - LOGGER__ERROR("Failed to open board {}", dev_path); + LOGGER__ERROR("Failed to open board {}", device_info.dev_path); return make_unexpected(HAILO_OPEN_FILE_FAILURE); } FileDescriptor fd(f.Detach()); - HailoRTDriver platform(dev_path, std::move(fd), status); + HailoRTDriver platform(device_info, std::move(fd), status); if (HAILO_SUCCESS != status) { return make_unexpected(status); } @@ -564,13 +564,13 @@ hailo_status HailoRTDriver::write_vdma_channel_register(vdma::ChannelId channel_ return HAILO_SUCCESS; } -hailo_status HailoRTDriver::vdma_buffer_sync(VdmaBufferHandle handle, DmaDirection sync_direction, size_t offset, size_t count) +hailo_status HailoRTDriver::vdma_buffer_sync(VdmaBufferHandle handle, DmaSyncDirection sync_direction, + size_t offset, size_t count) { - CHECK(sync_direction != DmaDirection::BOTH, HAILO_INVALID_ARGUMENT, "Can't sync vdma data both host and device"); tCompatibleHailoIoctlData data = {}; hailo_vdma_buffer_sync_params& sync_info = data.Buffer.VdmaBufferSync; sync_info.handle = handle; - sync_info.sync_type = (sync_direction == DmaDirection::H2D) ? HAILO_SYNC_FOR_DEVICE : HAILO_SYNC_FOR_HOST; + sync_info.sync_type = (sync_direction == DmaSyncDirection::TO_HOST) ? HAILO_SYNC_FOR_CPU : HAILO_SYNC_FOR_DEVICE; sync_info.offset = offset; sync_info.count = count; if (0 > ioctl(this->m_fd, HAILO_VDMA_BUFFER_SYNC, &data)) { @@ -762,13 +762,54 @@ hailo_status HailoRTDriver::vdma_buffer_unmap(VdmaBufferHandle handle) return HAILO_SUCCESS; } -Expected> HailoRTDriver::descriptors_list_create(size_t desc_count) +Expected HailoRTDriver::descriptors_list_create(size_t desc_count, bool is_circular) +{ + auto handle_to_dma_address_pair = descriptors_list_create_ioctl(desc_count, is_circular); + CHECK_EXPECTED(handle_to_dma_address_pair); + + const auto desc_handle = handle_to_dma_address_pair->first; + const auto dma_address = handle_to_dma_address_pair->second; + + auto user_address = descriptors_list_create_mmap(desc_handle, desc_count); + if (!user_address) { + auto status = descriptors_list_release_ioctl(desc_handle); + if (HAILO_SUCCESS != status) { + LOGGER__ERROR("Failed releasing descriptors list, status {}", status); + // continue + } + return make_unexpected(user_address.status()); + } + + return DescriptorsListInfo{desc_handle, dma_address, desc_count, user_address.release()}; +} + +hailo_status HailoRTDriver::descriptors_list_release(const DescriptorsListInfo &descriptors_list_info) +{ + hailo_status status = HAILO_SUCCESS; + + auto unmap_status = descriptors_list_create_munmap(descriptors_list_info.user_address, descriptors_list_info.desc_count); + if (HAILO_SUCCESS != unmap_status) { + LOGGER__ERROR("Descriptors list unmap failed with {}", unmap_status); + status = unmap_status; + // continue + } + + auto release_status = descriptors_list_release_ioctl(descriptors_list_info.handle); + if (HAILO_SUCCESS != release_status) { + LOGGER__ERROR("Descriptors list release status failed with {}", release_status); + status = release_status; + // continue + } + + return status; +} + +Expected> HailoRTDriver::descriptors_list_create_ioctl(size_t desc_count, bool is_circular) { tCompatibleHailoIoctlData data = {}; hailo_desc_list_create_params& create_desc_info = data.Buffer.DescListCreate; create_desc_info.desc_count = desc_count; - create_desc_info.desc_handle = 0; - create_desc_info.dma_address = 0; + create_desc_info.is_circular = is_circular; if (0 > ioctl(this->m_fd, HAILO_DESC_LIST_CREATE, &data)) { LOGGER__ERROR("Failed to create descriptors list with errno: {}", errno); @@ -778,10 +819,10 @@ Expected> HailoRTDriver::descriptors_list_create( return std::move(std::make_pair(create_desc_info.desc_handle, create_desc_info.dma_address)); } -hailo_status HailoRTDriver::descriptors_list_release(uintptr_t desc_handle) +hailo_status HailoRTDriver::descriptors_list_release_ioctl(uintptr_t desc_handle) { tCompatibleHailoIoctlData data = {}; - uintptr_t& release_desc_info = data.Buffer.DescListReleaseParam; + uintptr_t& release_desc_info = data.Buffer.DescListReleaseParam; release_desc_info = desc_handle; if (0 > ioctl(this->m_fd, HAILO_DESC_LIST_RELEASE, &data)) { LOGGER__ERROR("Failed to release descriptors list with errno: {}", errno); @@ -791,6 +832,26 @@ hailo_status HailoRTDriver::descriptors_list_release(uintptr_t desc_handle) return HAILO_SUCCESS; } +Expected HailoRTDriver::descriptors_list_create_mmap(uintptr_t desc_handle, size_t desc_count) +{ + tCompatibleHailoIoctlData data = {}; + data.Buffer.DescListMmap.desc_handle = desc_handle; + data.Buffer.DescListMmap.size = desc_count * SIZE_OF_SINGLE_DESCRIPTOR; + if (0 > ioctl(m_fd, HAILO_NON_LINUX_DESC_LIST_MMAP, &data)) { + LOGGER__ERROR("Failed to map physical memory with errno: {}", errno); + return make_unexpected(HAILO_DRIVER_FAIL); + } + + void *user_address = data.Buffer.DescListMmap.user_address; + return user_address; +} + +hailo_status HailoRTDriver::descriptors_list_create_munmap(void *, size_t ) +{ + // On windows, the unmap is done on the release ioctl + return HAILO_SUCCESS; +} + hailo_status HailoRTDriver::descriptors_list_bind_vdma_buffer(uintptr_t desc_handle, VdmaBufferHandle buffer_handle, uint16_t desc_page_size, uint8_t channel_index, uint32_t starting_desc) { @@ -841,19 +902,6 @@ hailo_status HailoRTDriver::reset_nn_core() return HAILO_NOT_IMPLEMENTED; } -Expected MmapBufferImpl::create_file_map(size_t length, FileDescriptor &file, uintptr_t offset) -{ - tCompatibleHailoIoctlData data = {}; - data.Buffer.DescListMmap.desc_handle = offset; - data.Buffer.DescListMmap.size = length; - if (0 > ioctl(file, HAILO_NON_LINUX_DESC_LIST_MMAP, &data)) { - LOGGER__ERROR("Failed to map physical memory with errno: {}", errno); - return make_unexpected(HAILO_DRIVER_FAIL); - } - // this mapping will be deleted automatically with the physical allocation - return MmapBufferImpl(data.Buffer.DescListMmap.user_address, length, false); -} - Expected HailoRTDriver::vdma_low_memory_buffer_alloc(size_t size) { (void) size; return make_unexpected(HAILO_INVALID_OPERATION); diff --git a/hailort/libhailort/src/os/windows/mmap_buffer.cpp b/hailort/libhailort/src/os/windows/mmap_buffer.cpp index 3107cc1..95391e9 100644 --- a/hailort/libhailort/src/os/windows/mmap_buffer.cpp +++ b/hailort/libhailort/src/os/windows/mmap_buffer.cpp @@ -14,19 +14,22 @@ namespace hailort void * const MmapBufferImpl::INVALID_ADDR = NULL; -Expected MmapBufferImpl::create_shared_memory(size_t length) +Expected MmapBufferImpl::create_shared_memory(size_t) { - void *address = VirtualAlloc(NULL, length, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE); - CHECK_AS_EXPECTED(INVALID_ADDR != address, HAILO_OUT_OF_HOST_MEMORY, "Failed to mmap buffer with error:{}", GetLastError()); - return MmapBufferImpl(address, length, true); + LOGGER__ERROR("Creating shared memory is not implemented on windows"); + return make_unexpected(HAILO_NOT_IMPLEMENTED); +} + +Expected MmapBufferImpl::create_file_map(size_t, FileDescriptor &, uintptr_t ) +{ + LOGGER__ERROR("Creating file mapping is not implemented on windows"); + return make_unexpected(HAILO_NOT_IMPLEMENTED); } hailo_status MmapBufferImpl::unmap() { - if (m_unmappable) { - VirtualFree(m_address, m_length, MEM_RELEASE); - } - return HAILO_SUCCESS; + LOGGER__ERROR("Unmapping is not implemented on windows"); + return HAILO_NOT_IMPLEMENTED; } } /* namespace hailort */ diff --git a/hailort/libhailort/src/os/windows/virtual_alloc_guard.cpp b/hailort/libhailort/src/os/windows/virtual_alloc_guard.cpp new file mode 100644 index 0000000..425454a --- /dev/null +++ b/hailort/libhailort/src/os/windows/virtual_alloc_guard.cpp @@ -0,0 +1,48 @@ +/** + * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved. + * Distributed under the MIT license (https://opensource.org/licenses/MIT) +**/ +/** + * @file virtual_alloc_guard.cpp + * @brief Guard object for VirtualAlloc and VirtualFree + **/ + +#include "os/windows/virtual_alloc_guard.hpp" +#include "common/logger_macros.hpp" +#include "common/utils.hpp" + +namespace hailort +{ + +Expected VirtualAllocGuard::create(size_t size) +{ + hailo_status status = HAILO_UNINITIALIZED; + VirtualAllocGuard guard(size, status); + CHECK_SUCCESS_AS_EXPECTED(status); + return guard; +} + +VirtualAllocGuard::VirtualAllocGuard(size_t size, hailo_status &status) : + m_address(VirtualAlloc(NULL, size, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE)), + m_size(size) +{ + if (nullptr == m_address) { + status = HAILO_OUT_OF_HOST_MEMORY; + return; + } + + status = HAILO_SUCCESS; +} + +VirtualAllocGuard::~VirtualAllocGuard() +{ + if (nullptr != m_address) { + // From msdn - when passing MEM_RELEASE to VirtualFree, 0 must be passed as size. + static constexpr size_t ZERO_SIZE = 0; + if (!VirtualFree(m_address, ZERO_SIZE, MEM_RELEASE)) { + LOGGER__ERROR("VirtualFree failed with error {}", GetLastError()); + } + } +} + +} /* namespace hailort */ diff --git a/hailort/libhailort/src/os/windows/virtual_alloc_guard.hpp b/hailort/libhailort/src/os/windows/virtual_alloc_guard.hpp new file mode 100644 index 0000000..d89c4ba --- /dev/null +++ b/hailort/libhailort/src/os/windows/virtual_alloc_guard.hpp @@ -0,0 +1,45 @@ +/** + * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved. + * Distributed under the MIT license (https://opensource.org/licenses/MIT) +**/ +/** + * @file virtual_alloc_guard.hpp + * @brief Guard object for VirtualAlloc and VirtualFree (only for windows os). + **/ + +#ifndef _HAILO_VIRTUAL_ALLOC_GUARD_HPP_ +#define _HAILO_VIRTUAL_ALLOC_GUARD_HPP_ + +#include "hailo/expected.hpp" + +#include + +namespace hailort +{ + +class VirtualAllocGuard final { +public: + static Expected create(size_t size); + ~VirtualAllocGuard(); + + VirtualAllocGuard(const VirtualAllocGuard &other) = delete; + VirtualAllocGuard &operator=(const VirtualAllocGuard &other) = delete; + VirtualAllocGuard(VirtualAllocGuard &&other) : + m_address(std::exchange(other.m_address, nullptr)), + m_size(other.m_size) + {} + VirtualAllocGuard &operator=(VirtualAllocGuard &&other) = delete; + + void *address() { return m_address; } + size_t size() const { return m_size; } + +private: + VirtualAllocGuard(size_t size, hailo_status &status); + + void *m_address; + const size_t m_size; +}; + +} /* namespace hailort */ + +#endif /* _HAILO_VIRTUAL_ALLOC_GUARD_HPP_ */ diff --git a/hailort/libhailort/src/service/hailort_rpc_client.cpp b/hailort/libhailort/src/service/hailort_rpc_client.cpp index 30340aa..6f68357 100644 --- a/hailort/libhailort/src/service/hailort_rpc_client.cpp +++ b/hailort/libhailort/src/service/hailort_rpc_client.cpp @@ -23,7 +23,7 @@ hailo_status HailoRtRpcClient::client_keep_alive(uint32_t pid) keepalive_Request request; request.set_pid(pid); empty reply; - grpc::ClientContext context; + ClientContextWithTimeout context; grpc::Status status = m_stub->client_keep_alive(&context, request, &reply); CHECK_GRPC_STATUS(status); return HAILO_SUCCESS; @@ -33,7 +33,7 @@ Expected HailoRtRpcClient::get_service_version() { get_service_version_Request request; get_service_version_Reply reply; - grpc::ClientContext context; + ClientContextWithTimeout context; grpc::Status status = m_stub->get_service_version(&context, request, &reply); CHECK_GRPC_STATUS_AS_EXPECTED(status); assert(reply.status() < HAILO_STATUS_COUNT); @@ -58,7 +58,7 @@ Expected HailoRtRpcClient::VDevice_create(const hailo_vdevice_params_t proto_vdevice_params->set_group_id(params.group_id == nullptr ? "" : std::string(params.group_id)); VDevice_create_Reply reply; - grpc::ClientContext context; + ClientContextWithTimeout context; grpc::Status status = m_stub->VDevice_create(&context, request, &reply); CHECK_GRPC_STATUS_AS_EXPECTED(status); assert(reply.status() < HAILO_STATUS_COUNT); @@ -72,19 +72,20 @@ Expected HailoRtRpcClient::VDevice_dup_handle(uint32_t pid, uint32_t h request.set_pid(pid); request.set_handle(handle); dup_handle_Reply reply; - grpc::ClientContext context; + ClientContextWithTimeout context; grpc::Status status = m_stub->VDevice_dup_handle(&context, request, &reply); CHECK_GRPC_STATUS_AS_EXPECTED(status); return reply.handle(); } -hailo_status HailoRtRpcClient::VDevice_release(uint32_t handle) +hailo_status HailoRtRpcClient::VDevice_release(uint32_t handle, uint32_t pid) { Release_Request request; request.set_handle(handle); + request.set_pid(pid); Release_Reply reply; - grpc::ClientContext context; + ClientContextWithTimeout context; grpc::Status status = m_stub->VDevice_release(&context, request, &reply); CHECK_GRPC_STATUS(status); assert(reply.status() < HAILO_STATUS_COUNT); @@ -122,7 +123,7 @@ Expected> HailoRtRpcClient::InputVStreams_create(uint32_t } VStreams_create_Reply reply; - grpc::ClientContext context; + ClientContextWithTimeout context; grpc::Status status = m_stub->InputVStreams_create(&context, request, &reply); CHECK_GRPC_STATUS_AS_EXPECTED(status); assert(reply.status() < HAILO_STATUS_COUNT); @@ -135,13 +136,14 @@ Expected> HailoRtRpcClient::InputVStreams_create(uint32_t return input_vstreams_handles; } -hailo_status HailoRtRpcClient::InputVStream_release(uint32_t handle) +hailo_status HailoRtRpcClient::InputVStream_release(uint32_t handle, uint32_t pid) { Release_Request request; request.set_handle(handle); + request.set_pid(pid); Release_Reply reply; - grpc::ClientContext context; + ClientContextWithTimeout context; grpc::Status status = m_stub->InputVStream_release(&context, request, &reply); CHECK_GRPC_STATUS(status); assert(reply.status() < HAILO_STATUS_COUNT); @@ -179,7 +181,7 @@ Expected> HailoRtRpcClient::OutputVStreams_create(uint32_t } VStreams_create_Reply reply; - grpc::ClientContext context; + ClientContextWithTimeout context; grpc::Status status = m_stub->OutputVStreams_create(&context, request, &reply); CHECK_GRPC_STATUS_AS_EXPECTED(status); assert(reply.status() < HAILO_STATUS_COUNT); @@ -192,13 +194,14 @@ Expected> HailoRtRpcClient::OutputVStreams_create(uint32_t return output_vstreams_handles; } -hailo_status HailoRtRpcClient::OutputVStream_release(uint32_t handle) +hailo_status HailoRtRpcClient::OutputVStream_release(uint32_t handle, uint32_t pid) { Release_Request request; request.set_handle(handle); + request.set_pid(pid); Release_Reply reply; - grpc::ClientContext context; + ClientContextWithTimeout context; grpc::Status status = m_stub->OutputVStream_release(&context, request, &reply); CHECK_GRPC_STATUS(status); assert(reply.status() < HAILO_STATUS_COUNT); @@ -212,7 +215,7 @@ Expected HailoRtRpcClient::InputVStream_dup_handle(uint32_t pid, uint3 request.set_pid(pid); request.set_handle(handle); dup_handle_Reply reply; - grpc::ClientContext context; + ClientContextWithTimeout context; grpc::Status status = m_stub->InputVStream_dup_handle(&context, request, &reply); CHECK_GRPC_STATUS_AS_EXPECTED(status); return reply.handle(); @@ -224,7 +227,7 @@ Expected HailoRtRpcClient::OutputVStream_dup_handle(uint32_t pid, uint request.set_pid(pid); request.set_handle(handle); dup_handle_Reply reply; - grpc::ClientContext context; + ClientContextWithTimeout context; grpc::Status status = m_stub->OutputVStream_dup_handle(&context, request, &reply); CHECK_GRPC_STATUS_AS_EXPECTED(status); return reply.handle(); @@ -274,7 +277,7 @@ Expected> HailoRtRpcClient::VDevice_configure(uint32_t vde } VDevice_configure_Reply reply; - grpc::ClientContext context; + ClientContextWithTimeout context; grpc::Status status = m_stub->VDevice_configure(&context, request, &reply); CHECK_GRPC_STATUS_AS_EXPECTED(status); assert(reply.status() < HAILO_STATUS_COUNT); @@ -290,7 +293,7 @@ Expected> HailoRtRpcClient::VDevice_get_physical_device request.set_handle(handle); VDevice_get_physical_devices_ids_Reply reply; - grpc::ClientContext context; + ClientContextWithTimeout context; grpc::Status status = m_stub->VDevice_get_physical_devices_ids(&context, request, &reply); CHECK_GRPC_STATUS_AS_EXPECTED(status); assert(reply.status() < HAILO_STATUS_COUNT); @@ -302,13 +305,30 @@ Expected> HailoRtRpcClient::VDevice_get_physical_device return result; } +Expected>> HailoRtRpcClient::VDevice_get_physical_devices(uint32_t handle) +{ + std::vector> devices; + + auto device_ids = VDevice_get_physical_devices_ids(handle); + CHECK_EXPECTED(device_ids); + devices.reserve(device_ids->size()); + + for (const auto &device_id : device_ids.value()) { + auto device = Device::create(device_id); + CHECK_EXPECTED(device); + devices.push_back(std::move(device.release())) ; + } + + return devices; +} + Expected HailoRtRpcClient::VDevice_get_default_streams_interface(uint32_t handle) { VDevice_get_default_streams_interface_Request request; request.set_handle(handle); VDevice_get_default_streams_interface_Reply reply; - grpc::ClientContext context; + ClientContextWithTimeout context; grpc::Status status = m_stub->VDevice_get_default_streams_interface(&context, request, &reply); CHECK_GRPC_STATUS_AS_EXPECTED(status); assert(reply.status() < HAILO_STATUS_COUNT); @@ -324,19 +344,20 @@ Expected HailoRtRpcClient::ConfiguredNetworkGroup_dup_handle(uint32_t request.set_pid(pid); request.set_handle(handle); dup_handle_Reply reply; - grpc::ClientContext context; + ClientContextWithTimeout context; grpc::Status status = m_stub->ConfiguredNetworkGroup_dup_handle(&context, request, &reply); CHECK_GRPC_STATUS_AS_EXPECTED(status); return reply.handle(); } -hailo_status HailoRtRpcClient::ConfiguredNetworkGroup_release(uint32_t handle) +hailo_status HailoRtRpcClient::ConfiguredNetworkGroup_release(uint32_t handle, uint32_t pid) { Release_Request request; request.set_handle(handle); + request.set_pid(pid); Release_Reply reply; - grpc::ClientContext context; + ClientContextWithTimeout context; grpc::Status status = m_stub->ConfiguredNetworkGroup_release(&context, request, &reply); CHECK_GRPC_STATUS(status); assert(reply.status() < HAILO_STATUS_COUNT); @@ -381,7 +402,7 @@ Expected> HailoRtRpcClient::Config request.set_network_name(network_name); ConfiguredNetworkGroup_make_input_vstream_params_Reply reply; - grpc::ClientContext context; + ClientContextWithTimeout context; grpc::Status status = m_stub->ConfiguredNetworkGroup_make_input_vstream_params(&context, request, &reply); CHECK_GRPC_STATUS_AS_EXPECTED(status); assert(reply.status() < HAILO_STATUS_COUNT); @@ -400,7 +421,7 @@ Expected>> HailoRtRpcC request.set_queue_size(queue_size); ConfiguredNetworkGroup_make_output_vstream_params_groups_Reply reply; - grpc::ClientContext context; + ClientContextWithTimeout context; grpc::Status status = m_stub->ConfiguredNetworkGroup_make_output_vstream_params_groups(&context, request, &reply); CHECK_GRPC_STATUS_AS_EXPECTED(status); assert(reply.status() < HAILO_STATUS_COUNT); @@ -426,7 +447,7 @@ Expected> HailoRtRpcClient::Config request.set_network_name(network_name); ConfiguredNetworkGroup_make_output_vstream_params_Reply reply; - grpc::ClientContext context; + ClientContextWithTimeout context; grpc::Status status = m_stub->ConfiguredNetworkGroup_make_output_vstream_params(&context, request, &reply); CHECK_GRPC_STATUS_AS_EXPECTED(status); assert(reply.status() < HAILO_STATUS_COUNT); @@ -464,7 +485,7 @@ Expected HailoRtRpcClient::ConfiguredNetworkGroup_name(uint32_t han request.set_handle(handle); ConfiguredNetworkGroup_name_Reply reply; - grpc::ClientContext context; + ClientContextWithTimeout context; grpc::Status status = m_stub->ConfiguredNetworkGroup_name(&context, request, &reply); CHECK_GRPC_STATUS_AS_EXPECTED(status); assert(reply.status() < HAILO_STATUS_COUNT); @@ -479,7 +500,7 @@ Expected> HailoRtRpcClient::ConfiguredNetworkG request.set_handle(handle); ConfiguredNetworkGroup_get_network_infos_Reply reply; - grpc::ClientContext context; + ClientContextWithTimeout context; grpc::Status status = m_stub->ConfiguredNetworkGroup_get_network_infos(&context, request, &reply); CHECK_GRPC_STATUS_AS_EXPECTED(status); assert(reply.status() < HAILO_STATUS_COUNT); @@ -503,7 +524,7 @@ Expected> HailoRtRpcClient::ConfiguredNetworkGr request.set_network_name(network_name); ConfiguredNetworkGroup_get_all_stream_infos_Reply reply; - grpc::ClientContext context; + ClientContextWithTimeout context; grpc::Status status = m_stub->ConfiguredNetworkGroup_get_all_stream_infos(&context, request, &reply); CHECK_GRPC_STATUS_AS_EXPECTED(status); assert(reply.status() < HAILO_STATUS_COUNT); @@ -533,6 +554,8 @@ Expected> HailoRtRpcClient::ConfiguredNetworkGr proto_stream_info.nms_info().chunks_per_frame(), proto_stream_info.nms_info().is_defused(), nms_defuse_info, + proto_stream_info.nms_info().burst_size(), + static_cast(proto_stream_info.nms_info().burst_type()), }; hailo_format_t format{ static_cast(proto_stream_info.format().type()), @@ -571,7 +594,7 @@ Expected HailoRtRpcClient::ConfiguredNetworkGroup_get_ request.set_handle(handle); ConfiguredNetworkGroup_get_default_stream_interface_Reply reply; - grpc::ClientContext context; + ClientContextWithTimeout context; grpc::Status status = m_stub->ConfiguredNetworkGroup_get_default_stream_interface(&context, request, &reply); CHECK_GRPC_STATUS_AS_EXPECTED(status); assert(reply.status() < HAILO_STATUS_COUNT); @@ -586,7 +609,7 @@ Expected>> HailoRtRpcClient::ConfiguredNetw request.set_handle(handle); ConfiguredNetworkGroup_get_output_vstream_groups_Reply reply; - grpc::ClientContext context; + ClientContextWithTimeout context; grpc::Status status = m_stub->ConfiguredNetworkGroup_get_output_vstream_groups(&context, request, &reply); CHECK_GRPC_STATUS_AS_EXPECTED(status); assert(reply.status() < HAILO_STATUS_COUNT); @@ -660,7 +683,7 @@ Expected> HailoRtRpcClient::ConfiguredNetworkG request.set_network_name(network_name); ConfiguredNetworkGroup_get_vstream_infos_Reply reply; - grpc::ClientContext context; + ClientContextWithTimeout context; grpc::Status status = m_stub->ConfiguredNetworkGroup_get_input_vstream_infos(&context, request, &reply); CHECK_GRPC_STATUS_AS_EXPECTED(status); assert(reply.status() < HAILO_STATUS_COUNT); @@ -676,7 +699,7 @@ Expected> HailoRtRpcClient::ConfiguredNetworkG request.set_network_name(network_name); ConfiguredNetworkGroup_get_vstream_infos_Reply reply; - grpc::ClientContext context; + ClientContextWithTimeout context; grpc::Status status = m_stub->ConfiguredNetworkGroup_get_output_vstream_infos(&context, request, &reply); CHECK_GRPC_STATUS_AS_EXPECTED(status); assert(reply.status() < HAILO_STATUS_COUNT); @@ -692,7 +715,7 @@ Expected> HailoRtRpcClient::ConfiguredNetworkG request.set_network_name(network_name); ConfiguredNetworkGroup_get_vstream_infos_Reply reply; - grpc::ClientContext context; + ClientContextWithTimeout context; grpc::Status status = m_stub->ConfiguredNetworkGroup_get_all_vstream_infos(&context, request, &reply); CHECK_GRPC_STATUS_AS_EXPECTED(status); assert(reply.status() < HAILO_STATUS_COUNT); @@ -705,7 +728,7 @@ Expected HailoRtRpcClient::ConfiguredNetworkGroup_is_scheduled(uint32_t ha ConfiguredNetworkGroup_is_scheduled_Request request; ConfiguredNetworkGroup_is_scheduled_Reply reply; request.set_handle(handle); - grpc::ClientContext context; + ClientContextWithTimeout context; grpc::Status status = m_stub->ConfiguredNetworkGroup_is_scheduled(&context, request, &reply); CHECK_GRPC_STATUS_AS_EXPECTED(status); assert(reply.status() < HAILO_STATUS_COUNT); @@ -722,7 +745,7 @@ hailo_status HailoRtRpcClient::ConfiguredNetworkGroup_set_scheduler_timeout(uint request.set_network_name(network_name); ConfiguredNetworkGroup_set_scheduler_timeout_Reply reply; - grpc::ClientContext context; + ClientContextWithTimeout context; grpc::Status status = m_stub->ConfiguredNetworkGroup_set_scheduler_timeout(&context, request, &reply); CHECK_GRPC_STATUS(status); assert(reply.status() < HAILO_STATUS_COUNT); @@ -738,7 +761,7 @@ hailo_status HailoRtRpcClient::ConfiguredNetworkGroup_set_scheduler_threshold(ui request.set_network_name(network_name); ConfiguredNetworkGroup_set_scheduler_threshold_Reply reply; - grpc::ClientContext context; + ClientContextWithTimeout context; grpc::Status status = m_stub->ConfiguredNetworkGroup_set_scheduler_threshold(&context, request, &reply); CHECK_GRPC_STATUS(status); assert(reply.status() < HAILO_STATUS_COUNT); @@ -754,7 +777,7 @@ hailo_status HailoRtRpcClient::ConfiguredNetworkGroup_set_scheduler_priority(uin request.set_network_name(network_name); ConfiguredNetworkGroup_set_scheduler_priority_Reply reply; - grpc::ClientContext context; + ClientContextWithTimeout context; grpc::Status status = m_stub->ConfiguredNetworkGroup_set_scheduler_priority(&context, request, &reply); CHECK_GRPC_STATUS(status); assert(reply.status() < HAILO_STATUS_COUNT); @@ -768,10 +791,13 @@ Expected HailoRtRpcClient::ConfiguredNetworkGroup_get_ ConfiguredNetworkGroup_get_latency_measurement_Reply reply; request.set_handle(handle); request.set_network_name(network_name); - grpc::ClientContext context; + ClientContextWithTimeout context; grpc::Status status = m_stub->ConfiguredNetworkGroup_get_latency_measurement(&context, request, &reply); CHECK_GRPC_STATUS_AS_EXPECTED(status); assert(reply.status() < HAILO_STATUS_COUNT); + if (HAILO_NOT_AVAILABLE == reply.status()) { + return make_unexpected(HAILO_NOT_AVAILABLE); + } CHECK_SUCCESS_AS_EXPECTED(static_cast(reply.status())); LatencyMeasurementResult result{ std::chrono::nanoseconds(reply.avg_hw_latency()) @@ -784,7 +810,7 @@ Expected HailoRtRpcClient::ConfiguredNetworkGroup_is_multi_context(uint32_ ConfiguredNetworkGroup_is_multi_context_Request request; ConfiguredNetworkGroup_is_multi_context_Reply reply; request.set_handle(handle); - grpc::ClientContext context; + ClientContextWithTimeout context; grpc::Status status = m_stub->ConfiguredNetworkGroup_is_multi_context(&context, request, &reply); CHECK_GRPC_STATUS_AS_EXPECTED(status); assert(reply.status() < HAILO_STATUS_COUNT); @@ -797,7 +823,7 @@ Expected HailoRtRpcClient::ConfiguredNetworkGroup_get_co ConfiguredNetworkGroup_get_config_params_Request request; ConfiguredNetworkGroup_get_config_params_Reply reply; request.set_handle(handle); - grpc::ClientContext context; + ClientContextWithTimeout context; grpc::Status status = m_stub->ConfiguredNetworkGroup_get_config_params(&context, request, &reply); CHECK_GRPC_STATUS_AS_EXPECTED(status); assert(reply.status() < HAILO_STATUS_COUNT); @@ -832,12 +858,65 @@ Expected HailoRtRpcClient::ConfiguredNetworkGroup_get_co return network_configure_params; } +Expected> HailoRtRpcClient::ConfiguredNetworkGroup_get_sorted_output_names(uint32_t handle) +{ + ConfiguredNetworkGroup_get_sorted_output_names_Request request; + ConfiguredNetworkGroup_get_sorted_output_names_Reply reply; + request.set_handle(handle); + ClientContextWithTimeout context; + grpc::Status status = m_stub->ConfiguredNetworkGroup_get_sorted_output_names(&context, request, &reply); + CHECK_GRPC_STATUS_AS_EXPECTED(status); + assert(reply.status() < HAILO_STATUS_COUNT); + CHECK_SUCCESS_AS_EXPECTED(static_cast(reply.status())); + std::vector result; + for (auto &name : reply.sorted_output_names()) { + result.push_back(name); + } + return result; +} + +Expected> HailoRtRpcClient::ConfiguredNetworkGroup_get_stream_names_from_vstream_name(uint32_t handle, const std::string &vstream_name) +{ + ConfiguredNetworkGroup_get_stream_names_from_vstream_name_Request request; + ConfiguredNetworkGroup_get_stream_names_from_vstream_name_Reply reply; + request.set_handle(handle); + request.set_vstream_name(vstream_name); + ClientContextWithTimeout context; + grpc::Status status = m_stub->ConfiguredNetworkGroup_get_stream_names_from_vstream_name(&context, request, &reply); + CHECK_GRPC_STATUS_AS_EXPECTED(status); + assert(reply.status() < HAILO_STATUS_COUNT); + CHECK_SUCCESS_AS_EXPECTED(static_cast(reply.status())); + std::vector result; + for (auto &name : reply.streams_names()) { + result.push_back(name); + } + return result; +} + +Expected> HailoRtRpcClient::ConfiguredNetworkGroup_get_vstream_names_from_stream_name(uint32_t handle, const std::string &stream_name) +{ + ConfiguredNetworkGroup_get_vstream_names_from_stream_name_Request request; + ConfiguredNetworkGroup_get_vstream_names_from_stream_name_Reply reply; + request.set_handle(handle); + request.set_stream_name(stream_name); + ClientContextWithTimeout context; + grpc::Status status = m_stub->ConfiguredNetworkGroup_get_vstream_names_from_stream_name(&context, request, &reply); + CHECK_GRPC_STATUS_AS_EXPECTED(status); + assert(reply.status() < HAILO_STATUS_COUNT); + CHECK_SUCCESS_AS_EXPECTED(static_cast(reply.status())); + std::vector result; + for (auto &name : reply.vstreams_names()) { + result.push_back(name); + } + return result; +} + hailo_status HailoRtRpcClient::InputVStream_write(uint32_t handle, const MemoryView &buffer) { InputVStream_write_Request request; request.set_handle(handle); request.set_data(buffer.data(), buffer.size()); - grpc::ClientContext context; + ClientContextWithTimeout context; InputVStream_write_Reply reply; grpc::Status status = m_stub->InputVStream_write(&context, request, &reply); CHECK_GRPC_STATUS(status); @@ -854,7 +933,7 @@ hailo_status HailoRtRpcClient::OutputVStream_read(uint32_t handle, MemoryView bu OutputVStream_read_Request request; request.set_handle(handle); request.set_size(static_cast(buffer.size())); - grpc::ClientContext context; + ClientContextWithTimeout context; OutputVStream_read_Reply reply; grpc::Status status = m_stub->OutputVStream_read(&context, request, &reply); CHECK_GRPC_STATUS(status); @@ -871,7 +950,7 @@ Expected HailoRtRpcClient::InputVStream_get_frame_size(uint32_t handle) { VStream_get_frame_size_Request request; request.set_handle(handle); - grpc::ClientContext context; + ClientContextWithTimeout context; VStream_get_frame_size_Reply reply; grpc::Status status = m_stub->InputVStream_get_frame_size(&context, request, &reply); CHECK_GRPC_STATUS_AS_EXPECTED(status); @@ -884,7 +963,7 @@ Expected HailoRtRpcClient::OutputVStream_get_frame_size(uint32_t handle) { VStream_get_frame_size_Request request; request.set_handle(handle); - grpc::ClientContext context; + ClientContextWithTimeout context; VStream_get_frame_size_Reply reply; grpc::Status status = m_stub->OutputVStream_get_frame_size(&context, request, &reply); CHECK_GRPC_STATUS_AS_EXPECTED(status); @@ -897,7 +976,7 @@ hailo_status HailoRtRpcClient::InputVStream_flush(uint32_t handle) { InputVStream_flush_Request request; request.set_handle(handle); - grpc::ClientContext context; + ClientContextWithTimeout context; InputVStream_flush_Reply reply; grpc::Status status = m_stub->InputVStream_flush(&context, request, &reply); CHECK_GRPC_STATUS(status); @@ -909,7 +988,7 @@ Expected HailoRtRpcClient::InputVStream_name(uint32_t handle) { VStream_name_Request request; request.set_handle(handle); - grpc::ClientContext context; + ClientContextWithTimeout context; VStream_name_Reply reply; grpc::Status status = m_stub->InputVStream_name(&context, request, &reply); CHECK_GRPC_STATUS_AS_EXPECTED(status); @@ -923,7 +1002,7 @@ Expected HailoRtRpcClient::OutputVStream_name(uint32_t handle) { VStream_name_Request request; request.set_handle(handle); - grpc::ClientContext context; + ClientContextWithTimeout context; VStream_name_Reply reply; grpc::Status status = m_stub->OutputVStream_name(&context, request, &reply); CHECK_GRPC_STATUS_AS_EXPECTED(status); @@ -937,7 +1016,7 @@ Expected HailoRtRpcClient::InputVStream_network_name(uint32_t handl { VStream_network_name_Request request; request.set_handle(handle); - grpc::ClientContext context; + ClientContextWithTimeout context; VStream_network_name_Reply reply; grpc::Status status = m_stub->InputVStream_network_name(&context, request, &reply); CHECK_GRPC_STATUS_AS_EXPECTED(status); @@ -951,7 +1030,7 @@ Expected HailoRtRpcClient::OutputVStream_network_name(uint32_t hand { VStream_network_name_Request request; request.set_handle(handle); - grpc::ClientContext context; + ClientContextWithTimeout context; VStream_network_name_Reply reply; grpc::Status status = m_stub->OutputVStream_network_name(&context, request, &reply); CHECK_GRPC_STATUS_AS_EXPECTED(status); @@ -965,7 +1044,7 @@ hailo_status HailoRtRpcClient::InputVStream_abort(uint32_t handle) { VStream_abort_Request request; request.set_handle(handle); - grpc::ClientContext context; + ClientContextWithTimeout context; VStream_abort_Reply reply; grpc::Status status = m_stub->InputVStream_abort(&context, request, &reply); CHECK_GRPC_STATUS(status); @@ -977,7 +1056,7 @@ hailo_status HailoRtRpcClient::OutputVStream_abort(uint32_t handle) { VStream_abort_Request request; request.set_handle(handle); - grpc::ClientContext context; + ClientContextWithTimeout context; VStream_abort_Reply reply; grpc::Status status = m_stub->OutputVStream_abort(&context, request, &reply); CHECK_GRPC_STATUS(status); @@ -989,7 +1068,7 @@ hailo_status HailoRtRpcClient::InputVStream_resume(uint32_t handle) { VStream_resume_Request request; request.set_handle(handle); - grpc::ClientContext context; + ClientContextWithTimeout context; VStream_resume_Reply reply; grpc::Status status = m_stub->InputVStream_resume(&context, request, &reply); CHECK_GRPC_STATUS(status); @@ -1001,7 +1080,7 @@ hailo_status HailoRtRpcClient::OutputVStream_resume(uint32_t handle) { VStream_resume_Request request; request.set_handle(handle); - grpc::ClientContext context; + ClientContextWithTimeout context; VStream_resume_Reply reply; grpc::Status status = m_stub->OutputVStream_resume(&context, request, &reply); CHECK_GRPC_STATUS(status); @@ -1009,11 +1088,59 @@ hailo_status HailoRtRpcClient::OutputVStream_resume(uint32_t handle) return static_cast(reply.status()); } +hailo_status HailoRtRpcClient::InputVStream_stop_and_clear(uint32_t handle) +{ + VStream_stop_and_clear_Request request; + request.set_handle(handle); + ClientContextWithTimeout context; + VStream_stop_and_clear_Reply reply; + grpc::Status status = m_stub->InputVStream_stop_and_clear(&context, request, &reply); + CHECK_GRPC_STATUS(status); + assert(reply.status() < HAILO_STATUS_COUNT); + return static_cast(reply.status()); +} + +hailo_status HailoRtRpcClient::OutputVStream_stop_and_clear(uint32_t handle) +{ + VStream_stop_and_clear_Request request; + request.set_handle(handle); + ClientContextWithTimeout context; + VStream_stop_and_clear_Reply reply; + grpc::Status status = m_stub->OutputVStream_stop_and_clear(&context, request, &reply); + CHECK_GRPC_STATUS(status); + assert(reply.status() < HAILO_STATUS_COUNT); + return static_cast(reply.status()); +} + +hailo_status HailoRtRpcClient::InputVStream_start_vstream(uint32_t handle) +{ + VStream_start_vstream_Request request; + request.set_handle(handle); + ClientContextWithTimeout context; + VStream_start_vstream_Reply reply; + grpc::Status status = m_stub->InputVStream_start_vstream(&context, request, &reply); + CHECK_GRPC_STATUS(status); + assert(reply.status() < HAILO_STATUS_COUNT); + return static_cast(reply.status()); +} + +hailo_status HailoRtRpcClient::OutputVStream_start_vstream(uint32_t handle) +{ + VStream_start_vstream_Request request; + request.set_handle(handle); + ClientContextWithTimeout context; + VStream_start_vstream_Reply reply; + grpc::Status status = m_stub->OutputVStream_start_vstream(&context, request, &reply); + CHECK_GRPC_STATUS(status); + assert(reply.status() < HAILO_STATUS_COUNT); + return static_cast(reply.status()); +} + Expected HailoRtRpcClient::InputVStream_get_user_buffer_format(uint32_t handle) { VStream_get_user_buffer_format_Request request; request.set_handle(handle); - grpc::ClientContext context; + ClientContextWithTimeout context; VStream_get_user_buffer_format_Reply reply; grpc::Status status = m_stub->InputVStream_get_user_buffer_format(&context, request, &reply); CHECK_GRPC_STATUS_AS_EXPECTED(status); @@ -1034,7 +1161,7 @@ Expected HailoRtRpcClient::OutputVStream_get_user_buffer_format( { VStream_get_user_buffer_format_Request request; request.set_handle(handle); - grpc::ClientContext context; + ClientContextWithTimeout context; VStream_get_user_buffer_format_Reply reply; grpc::Status status = m_stub->OutputVStream_get_user_buffer_format(&context, request, &reply); CHECK_GRPC_STATUS_AS_EXPECTED(status); @@ -1055,7 +1182,7 @@ Expected HailoRtRpcClient::InputVStream_get_info(uint32_t { VStream_get_info_Request request; request.set_handle(handle); - grpc::ClientContext context; + ClientContextWithTimeout context; VStream_get_info_Reply reply; grpc::Status status = m_stub->InputVStream_get_info(&context, request, &reply); CHECK_GRPC_STATUS_AS_EXPECTED(status); @@ -1068,7 +1195,7 @@ Expected HailoRtRpcClient::OutputVStream_get_info(uint32_t { VStream_get_info_Request request; request.set_handle(handle); - grpc::ClientContext context; + ClientContextWithTimeout context; VStream_get_info_Reply reply; grpc::Status status = m_stub->OutputVStream_get_info(&context, request, &reply); CHECK_GRPC_STATUS_AS_EXPECTED(status); @@ -1078,4 +1205,32 @@ Expected HailoRtRpcClient::OutputVStream_get_info(uint32_t return deserialize_vstream_info(info_proto); } +Expected HailoRtRpcClient::InputVStream_is_aborted(uint32_t handle) +{ + VStream_is_aborted_Request request; + request.set_handle(handle); + ClientContextWithTimeout context; + VStream_is_aborted_Reply reply; + grpc::Status status = m_stub->InputVStream_is_aborted(&context, request, &reply); + CHECK_GRPC_STATUS_AS_EXPECTED(status); + assert(reply.status() < HAILO_STATUS_COUNT); + CHECK_SUCCESS_AS_EXPECTED(static_cast(reply.status())); + auto is_aborted = reply.is_aborted(); + return is_aborted; +} + +Expected HailoRtRpcClient::OutputVStream_is_aborted(uint32_t handle) +{ + VStream_is_aborted_Request request; + request.set_handle(handle); + ClientContextWithTimeout context; + VStream_is_aborted_Reply reply; + grpc::Status status = m_stub->OutputVStream_is_aborted(&context, request, &reply); + CHECK_GRPC_STATUS_AS_EXPECTED(status); + assert(reply.status() < HAILO_STATUS_COUNT); + CHECK_SUCCESS_AS_EXPECTED(static_cast(reply.status())); + auto is_aborted = reply.is_aborted(); + return is_aborted; +} + } \ No newline at end of file diff --git a/hailort/libhailort/src/service/hailort_rpc_client.hpp b/hailort/libhailort/src/service/hailort_rpc_client.hpp index 5b393fd..231daa2 100644 --- a/hailort/libhailort/src/service/hailort_rpc_client.hpp +++ b/hailort/libhailort/src/service/hailort_rpc_client.hpp @@ -12,6 +12,7 @@ #include "hailo/hailort.h" #include "hailo/expected.hpp" +#include "hailo/device.hpp" #if defined(_MSC_VER) #pragma warning(push) @@ -33,6 +34,17 @@ namespace hailort { +// Higher then default-hrt-timeout so we can differentiate errors +static const std::chrono::milliseconds CONTEXT_TIMEOUT(HAILO_DEFAULT_VSTREAM_TIMEOUT_MS + 500); + +class ClientContextWithTimeout : public grpc::ClientContext { +public: + ClientContextWithTimeout() + { + set_deadline(std::chrono::system_clock::now() + CONTEXT_TIMEOUT); + } +}; + class HailoRtRpcClient final { public: HailoRtRpcClient(std::shared_ptr channel) @@ -43,13 +55,14 @@ public: Expected VDevice_create(const hailo_vdevice_params_t ¶ms, uint32_t pid); Expected VDevice_dup_handle(uint32_t pid, uint32_t handle); - hailo_status VDevice_release(uint32_t handle); + hailo_status VDevice_release(uint32_t handle, uint32_t pid); Expected> VDevice_get_physical_devices_ids(uint32_t handle); + Expected>> VDevice_get_physical_devices(uint32_t handle); Expected VDevice_get_default_streams_interface(uint32_t handle); Expected> VDevice_configure(uint32_t vdevice_handle, const Hef &hef, uint32_t pid, const NetworkGroupsParamsMap &configure_params={}); Expected ConfiguredNetworkGroup_dup_handle(uint32_t pid, uint32_t handle); - hailo_status ConfiguredNetworkGroup_release(uint32_t handle); + hailo_status ConfiguredNetworkGroup_release(uint32_t handle, uint32_t pid); Expected> ConfiguredNetworkGroup_make_input_vstream_params(uint32_t handle, bool quantized, hailo_format_type_t format_type, uint32_t timeout_ms, uint32_t queue_size, const std::string &network_name); @@ -75,15 +88,18 @@ public: Expected ConfiguredNetworkGroup_get_latency_measurement(uint32_t handle, const std::string &network_name); Expected ConfiguredNetworkGroup_is_multi_context(uint32_t handle); Expected ConfiguredNetworkGroup_get_config_params(uint32_t handle); + Expected> ConfiguredNetworkGroup_get_sorted_output_names(uint32_t handle); + Expected> ConfiguredNetworkGroup_get_stream_names_from_vstream_name(uint32_t handle, const std::string &vstream_name); + Expected> ConfiguredNetworkGroup_get_vstream_names_from_stream_name(uint32_t handle, const std::string &stream_name); Expected> InputVStreams_create(uint32_t net_group_handle, const std::map &inputs_params, uint32_t pid); Expected InputVStream_dup_handle(uint32_t pid, uint32_t handle); Expected OutputVStream_dup_handle(uint32_t pid, uint32_t handle); - hailo_status InputVStream_release(uint32_t handle); + hailo_status InputVStream_release(uint32_t handle, uint32_t pid); Expected> OutputVStreams_create(uint32_t net_group_handle, const std::map &output_params, uint32_t pid); - hailo_status OutputVStream_release(uint32_t handle); + hailo_status OutputVStream_release(uint32_t handle, uint32_t pid); hailo_status InputVStream_write(uint32_t handle, const MemoryView &buffer); hailo_status OutputVStream_read(uint32_t handle, MemoryView buffer); Expected InputVStream_get_frame_size(uint32_t handle); @@ -101,6 +117,10 @@ public: hailo_status OutputVStream_abort(uint32_t handle); hailo_status InputVStream_resume(uint32_t handle); hailo_status OutputVStream_resume(uint32_t handle); + hailo_status InputVStream_stop_and_clear(uint32_t handle); + hailo_status OutputVStream_stop_and_clear(uint32_t handle); + hailo_status InputVStream_start_vstream(uint32_t handle); + hailo_status OutputVStream_start_vstream(uint32_t handle); Expected InputVStream_get_user_buffer_format(uint32_t handle); Expected OutputVStream_get_user_buffer_format(uint32_t handle); @@ -108,6 +128,9 @@ public: Expected InputVStream_get_info(uint32_t handle); Expected OutputVStream_get_info(uint32_t handle); + Expected InputVStream_is_aborted(uint32_t handle); + Expected OutputVStream_is_aborted(uint32_t handle); + private: std::unique_ptr m_stub; }; diff --git a/hailort/libhailort/src/service/network_group_client.cpp b/hailort/libhailort/src/service/network_group_client.cpp index bb2db9c..0e2bf66 100644 --- a/hailort/libhailort/src/service/network_group_client.cpp +++ b/hailort/libhailort/src/service/network_group_client.cpp @@ -34,9 +34,25 @@ ConfiguredNetworkGroupClient::ConfiguredNetworkGroupClient(std::unique_ptr> ConfiguredNetworkGroupClient::duplicate_network_group_client(uint32_t handle, + const std::string &network_group_name) +{ + auto duplicated_net_group = std::shared_ptr(new (std::nothrow) ConfiguredNetworkGroupClient(handle, network_group_name)); + CHECK_ARG_NOT_NULL_AS_EXPECTED(duplicated_net_group); + auto status = duplicated_net_group->after_fork_in_child(); + CHECK_SUCCESS_AS_EXPECTED(status); + + return duplicated_net_group; +} + ConfiguredNetworkGroupClient::~ConfiguredNetworkGroupClient() { - auto reply = m_client->ConfiguredNetworkGroup_release(m_handle); + auto reply = m_client->ConfiguredNetworkGroup_release(m_handle, OsUtils::get_curr_pid()); if (reply != HAILO_SUCCESS) { LOGGER__CRITICAL("ConfiguredNetworkGroup_release failed with status: {}", reply); } @@ -65,9 +81,11 @@ hailo_status ConfiguredNetworkGroupClient::after_fork_in_child() { auto status = create_client(); CHECK_SUCCESS(status); + auto expected_dup_handle = m_client->ConfiguredNetworkGroup_dup_handle(OsUtils::get_curr_pid(), m_handle); CHECK_EXPECTED_AS_STATUS(expected_dup_handle); m_handle = expected_dup_handle.value(); + return HAILO_SUCCESS; } @@ -75,7 +93,7 @@ Expected> ConfiguredNetworkGroupClient::a const hailo_activate_network_group_params_t &/* network_group_params */) { LOGGER__WARNING("ConfiguredNetworkGroup::activate function is not supported when using multi-process service or HailoRT Scheduler."); - return make_unexpected(HAILO_NOT_IMPLEMENTED); + return make_unexpected(HAILO_INVALID_OPERATION); } /* Network group base functions */ @@ -160,7 +178,7 @@ Expected ConfiguredNetworkGroupClient::get_output_ hailo_status ConfiguredNetworkGroupClient::wait_for_activation(const std::chrono::milliseconds&) { LOGGER__WARNING("ConfiguredNetworkGroup::wait_for_activation function is not supported when using multi-process service or HailoRT Scheduler."); - return HAILO_NOT_IMPLEMENTED; + return HAILO_INVALID_OPERATION; } Expected>> ConfiguredNetworkGroupClient::get_output_vstream_groups() @@ -266,6 +284,12 @@ bool ConfiguredNetworkGroupClient::is_multi_context() const return reply.value(); } +Expected ConfiguredNetworkGroupClient::run_hw_infer_estimator() +{ + LOGGER__ERROR("ConfiguredNetworkGroupClient::run_hw_infer_estimator function is not supported when using multi-process service."); + return make_unexpected(HAILO_NOT_IMPLEMENTED); +} + const ConfigureNetworkParams ConfiguredNetworkGroupClient::get_config_params() const { auto reply = m_client->ConfiguredNetworkGroup_get_config_params(m_handle); @@ -276,6 +300,21 @@ const ConfigureNetworkParams ConfiguredNetworkGroupClient::get_config_params() c return reply.value(); } +Expected> ConfiguredNetworkGroupClient::get_sorted_output_names() +{ + return m_client->ConfiguredNetworkGroup_get_sorted_output_names(m_handle); +} + +Expected> ConfiguredNetworkGroupClient::get_stream_names_from_vstream_name(const std::string &vstream_name) +{ + return m_client->ConfiguredNetworkGroup_get_stream_names_from_vstream_name(m_handle, vstream_name); +} + +Expected> ConfiguredNetworkGroupClient::get_vstream_names_from_stream_name(const std::string &stream_name) +{ + return m_client->ConfiguredNetworkGroup_get_vstream_names_from_stream_name(m_handle, stream_name); +} + Expected> ConfiguredNetworkGroupClient::create_input_vstreams(const std::map &inputs_params) { auto reply = m_client->InputVStreams_create(m_handle, inputs_params, OsUtils::get_curr_pid()); diff --git a/hailort/libhailort/src/service/rpc_client_utils.hpp b/hailort/libhailort/src/service/rpc_client_utils.hpp index 965be9c..99d8444 100644 --- a/hailort/libhailort/src/service/rpc_client_utils.hpp +++ b/hailort/libhailort/src/service/rpc_client_utils.hpp @@ -3,7 +3,7 @@ * Distributed under the MIT license (https://opensource.org/licenses/MIT) **/ /** - * @file hailort_common.hpp + * @file rpc_client_utils.hpp * @brief Utility functions for rpc client communication **/ @@ -34,10 +34,14 @@ public: return instance; } - HailoRtRpcClientUtils() - : m_mutex(std::make_shared()) - , m_forking(false) - {} + HailoRtRpcClientUtils() : + m_mutex(std::make_shared()) + { + auto status = init_keep_alive_shutdown_event(); + if (HAILO_SUCCESS != status) { + LOGGER__ERROR("Failed to initialize RPC Client's keep-alive shutdown event with status {}", status); + } + } static Expected> create_client() { @@ -55,7 +59,7 @@ public: // Create client auto channel = grpc::CreateChannel(hailort::HAILORT_SERVICE_DEFAULT_ADDR, grpc::InsecureChannelCredentials()); auto client = make_unique_nothrow(channel); - CHECK(client != nullptr, HAILO_OUT_OF_HOST_MEMORY); + CHECK_NOT_NULL(client, HAILO_OUT_OF_HOST_MEMORY); // Check service version auto reply = client->get_service_version(); @@ -78,45 +82,39 @@ public: m_pid = OsUtils::get_curr_pid(); // Trigger client keep-alive - m_keep_alive_thread = make_unique_nothrow>([this] () { - return this->keep_alive(); - }); - CHECK(nullptr != m_keep_alive_thread, HAILO_OUT_OF_HOST_MEMORY); + status = start_keep_alive_thread(); + CHECK_SUCCESS(status); + m_initialized = true; } return HAILO_SUCCESS; } - hailo_status before_fork() + void before_fork() { - m_forking = true; - return m_keep_alive_thread->get(); + stop_keep_alive_thread(); } hailo_status after_fork_in_parent() { - m_forking = false; + m_keep_alive_shutdown_event->reset(); std::unique_lock lock(*m_mutex); if (m_initialized) { - // Trigger client keep-alive - m_keep_alive_thread = make_unique_nothrow>([this] () { - return this->keep_alive(); - }); + return start_keep_alive_thread(); } return HAILO_SUCCESS; } hailo_status after_fork_in_child() { - m_forking = false; m_mutex = std::make_shared(); + auto status = init_keep_alive_shutdown_event(); + CHECK_SUCCESS(status); + std::unique_lock lock(*m_mutex); if (m_initialized) { m_pid = OsUtils::get_curr_pid(); - // Trigger client keep-alive - m_keep_alive_thread = make_unique_nothrow>([this] () { - return this->keep_alive(); - }); + return start_keep_alive_thread(); } return HAILO_SUCCESS; } @@ -124,27 +122,59 @@ public: private: ~HailoRtRpcClientUtils() { - m_keep_alive_thread.release(); + stop_keep_alive_thread(); + } + + void stop_keep_alive_thread() + { + if (m_keep_alive_shutdown_event) { + (void)m_keep_alive_shutdown_event->signal(); + } + + m_keep_alive_thread.reset(); + } + + hailo_status start_keep_alive_thread() + { + m_keep_alive_thread = make_unique_nothrow>("SVC_KEEPALIVE", [this] () { + return this->keep_alive(); + }); + CHECK_NOT_NULL(m_keep_alive_thread, HAILO_OUT_OF_HOST_MEMORY); + return HAILO_SUCCESS; } hailo_status keep_alive() { auto channel = grpc::CreateChannel(hailort::HAILORT_SERVICE_DEFAULT_ADDR, grpc::InsecureChannelCredentials()); auto client = make_unique_nothrow(channel); - CHECK(client != nullptr, HAILO_OUT_OF_HOST_MEMORY); - while (!m_forking) { + CHECK_NOT_NULL(client, HAILO_OUT_OF_HOST_MEMORY); + + while (true) { + auto shutdown_status = m_keep_alive_shutdown_event->wait(hailort::HAILO_KEEPALIVE_INTERVAL / 2); + if (HAILO_TIMEOUT != shutdown_status) { + // shutdown event is signal (or we have another error) + return shutdown_status; + } + + // keep alive interval auto status = client->client_keep_alive(m_pid); CHECK_SUCCESS(status); - std::this_thread::sleep_for(hailort::HAILO_KEEPALIVE_INTERVAL / 2); } + } + + hailo_status init_keep_alive_shutdown_event() + { + m_keep_alive_shutdown_event = Event::create_shared(Event::State::not_signalled); + CHECK(nullptr != m_keep_alive_shutdown_event, HAILO_OUT_OF_HOST_MEMORY); + return HAILO_SUCCESS; } std::shared_ptr m_mutex; AsyncThreadPtr m_keep_alive_thread; bool m_initialized = false; - std::atomic m_forking; uint32_t m_pid; + EventPtr m_keep_alive_shutdown_event; }; } /* namespace hailort */ diff --git a/hailort/libhailort/src/stream_common/CMakeLists.txt b/hailort/libhailort/src/stream_common/CMakeLists.txt index 001d29e..cacbbb2 100644 --- a/hailort/libhailort/src/stream_common/CMakeLists.txt +++ b/hailort/libhailort/src/stream_common/CMakeLists.txt @@ -3,6 +3,7 @@ cmake_minimum_required(VERSION 3.0.0) set(SRC_FILES ${CMAKE_CURRENT_SOURCE_DIR}/stream.cpp ${CMAKE_CURRENT_SOURCE_DIR}/stream_internal.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/nms_stream_reader.cpp ) set(HAILORT_CPP_SOURCES ${HAILORT_CPP_SOURCES} ${SRC_FILES} PARENT_SCOPE) diff --git a/hailort/libhailort/src/stream_common/async_common.hpp b/hailort/libhailort/src/stream_common/async_common.hpp new file mode 100644 index 0000000..31c39c8 --- /dev/null +++ b/hailort/libhailort/src/stream_common/async_common.hpp @@ -0,0 +1,31 @@ +/** + * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved. + * Distributed under the MIT license (https://opensource.org/licenses/MIT) +**/ +/** + * @file async_common.hpp + * @brief Common types/functions for async api + **/ + +#ifndef _HAILO_ASYNC_COMMON_HPP_ +#define _HAILO_ASYNC_COMMON_HPP_ + +#include "hailo/stream.hpp" + +namespace hailort +{ + +// Internal function, wrapper to the user callbacks, accepts the callback status as an argument. +using InternalTransferDoneCallback = std::function; + +struct TransferRequest { + MemoryView buffer; + InternalTransferDoneCallback callback; + + // Optional pre-mapped user buffer. If set, mapped_buffer must be the same as the "buffer" + BufferPtr mapped_buffer = nullptr; +}; + +} /* namespace hailort */ + +#endif /* _HAILO_ASYNC_COMMON_HPP_ */ diff --git a/hailort/libhailort/src/stream_common/nms_stream_reader.cpp b/hailort/libhailort/src/stream_common/nms_stream_reader.cpp new file mode 100644 index 0000000..618be44 --- /dev/null +++ b/hailort/libhailort/src/stream_common/nms_stream_reader.cpp @@ -0,0 +1,300 @@ +/** + * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved. + * Distributed under the MIT license (https://opensource.org/licenses/MIT) + **/ +/** + * @file nms_stream_reader.cpp + * @brief static class that helps receive and read the nms ouput stream according to the different burst mode, type and size. + * + * Explanation of state machine and logic: + * This class supports the following 5 nms cases: + * 1) Hailo-8 bbox mode (non burst mode) + * 2) Hailo-15 bbox mode + * 3) Hailo-8 Burst mode + * 4) Hailo-15 Burst per class mode + * 5) Hailo15 Burst per frame mode + * + * Lets explain each mode and the state machine of each mode: + * 1)-2) Hailo-8 bbox mode / Hailo-15 bbox mode - both work the same - they read bbox bbox from the nms core until a delimeter comes + * and expect to read the amount of delimeters as the same amount of number of classes (times num chunks if more than one chunk per frame). + * + * 3) Hailo8 Burst mode - Hailo 8 burst mode reads bursts in the size of burst-size and expects each burst to be made of x bboxes and + * then a delimeter and padding until the end of the burst - essentially what the state machine does here is read until the first delimeter + * and then expect padding until end of burts (in release mode we dont check that the rest of burst is padding and + * just go onto the next burst but in debug we validate that rest of burst is padding). NOTE: in Hailo-8 delimeter value and + * padding value are both 0xFFFFFFFFFFFFFFFF so essentially we read until first delimeter - and the every following delimeter + * in burst is padding. This mode also supports interrupt per frame - assuming burst size received from SDK is larger than max bboxes + 1 (for delimeter) + * we know there will be one burst per class and hence the output size will be num classes * burst size and we enable one interrupt per frame. + * + * 4) Hailo15 Burst per class mode - Hailo-15 Burst per class mode reads bursts in the size of burst size and expects the following order. + * x bboxes , followed by a delimeter, followed by an image delimeter, followed by padding until the end of the burst. The bbboxes, delimeter + * and image delimeter can all be in different bursts - so essentially the way the state machine works is the following: we read burst burst, + * in each burst we iterate over the bboxes until we find a delimeter - once after that we know how many bboxes there were for that class, + * and then we expect to see a following image delimeter after the delimeter, once we read the image delimeter we expect padding until the end of the + * burst (which we ensure in debug but not in release). NOTE: if a burst ends on a delimeter we need to read the next burst to get the image delimeter + * even in the case where the amount of delimeters we read is equal to the amount of classes - otherwise there is data still in the core + * that was not emptied and will be read as part of the next frame. This mode also supports interrupt per frame - assuming burst size received from SDK + * is larger than max bboxes + 2 (for image delimeter and delimeter) we know there will be one burst per class and hence the output size will be + * num classes * burst size and we enable one interrupt per frame. + * + * 5) Hailo15 Burst per frame mode - Hailo-15 Burst per frame mode reads bursts in the size of burst size and expects the following order. + * x bboxes , followed by a delimeter, for all the classes until the last class where the last delimeter should be followed by an image delimeter + * and padding until the end of the burst. The state machine works in the following way - we read burst burst, and for each time we reach a delimeter + * we save the amount of bboxes that were read for that class and keep reading the burst. NOTE: this is the only mode where there can be multiple + * delimeters per burst. Once we read the last delimeter (which we know from number classes) - we ensure there is a following image delimeter (which again + * can be in the following burst) and then assume the rest of the burst is padding (and in debug we verify that). NOTE: currently this mode is not + * supported in the sdk. + * + **/ + +#include "hailo/hailort.h" +#include "hailo/expected.hpp" +#include "stream_common/nms_stream_reader.hpp" +#include "src/hef/layer_info.hpp" + +namespace hailort +{ + +static void finish_reading_burst_update_state(NMSBurstState *burst_state, bool *can_stop_reading_burst, size_t *burst_index) +{ + *burst_state = NMSBurstState::NMS_BURST_STATE_WAITING_FOR_DELIMETER; + *burst_index = (*burst_index + 1); + *can_stop_reading_burst = true; +} + +// Function that implements the state machine of the 3 different nms burst modes based on the value of the current bbox and the current state. +hailo_status NMSStreamReader::advance_state_machine(NMSBurstState *burst_state, const uint64_t current_bbox, + const hailo_nms_burst_type_t burst_type, const uint32_t num_classes, size_t *num_delimeters_received, + bool *can_stop_reading_burst, const size_t burst_offset, const size_t burst_size, size_t *burst_index) +{ + switch(current_bbox) { + // This is also case for Hailo8 padding - seeing as they are same value + case NMS_DELIMITER: + { + // If we are in hailo8 per class mode - if we are in state waiting for delimeter - we received delimeter + // otherwise we must be in state waiting for padding - in which case we received padding. + if (HAILO_BURST_TYPE_H8_PER_CLASS == burst_type) { + CHECK_IN_DEBUG((NMSBurstState::NMS_BURST_STATE_WAITING_FOR_DELIMETER == (*burst_state)) || + (NMSBurstState::NMS_BURST_STATE_WAITING_FOR_PADDING == (*burst_state)), HAILO_NMS_BURST_INVALID_DATA, + "Invalid state, H8 NMS burst cannot receive delimeter while in state {}", (*burst_state)); + // To differentiate from H8 padding - where we should not increment amount of delimeters found + if ((*burst_state) == NMSBurstState::NMS_BURST_STATE_WAITING_FOR_DELIMETER) { + (*num_delimeters_received)++; + } +#ifdef NDEBUG + // In hailo8 burst mode - if is in state waiting for delimeter and got delimeter - rest will be padding and can skip + if ((*burst_state) == NMSBurstState::NMS_BURST_STATE_WAITING_FOR_DELIMETER) { + finish_reading_burst_update_state(burst_state, can_stop_reading_burst, burst_index); + break; + } +#endif + // In hailo8 mode after delimeter we expect padding until end of burst - seeing as h8 padding is same value + // Weather was in state wait for delimeter or state wait for padding - will always go to wait for padding until end of burst + *burst_state = NMSBurstState::NMS_BURST_STATE_WAITING_FOR_PADDING; + if (burst_offset == (burst_size - sizeof(current_bbox))) { + finish_reading_burst_update_state(burst_state, can_stop_reading_burst, burst_index); + } + break; + + } else if (HAILO_BURST_TYPE_H15_PER_CLASS == burst_type) { + CHECK_IN_DEBUG(NMSBurstState::NMS_BURST_STATE_WAITING_FOR_DELIMETER == (*burst_state), HAILO_NMS_BURST_INVALID_DATA, + "Invalid state, H15 Per class NMS burst cannot receive delimeter while in state {}", (*burst_state)); + (*num_delimeters_received)++; + *burst_state = NMSBurstState::NMS_BURST_STATE_WAITING_FOR_IMAGE_DELIMETER; + } else { + CHECK_IN_DEBUG(NMSBurstState::NMS_BURST_STATE_WAITING_FOR_DELIMETER == (*burst_state), HAILO_NMS_BURST_INVALID_DATA, + "Invalid state, H15 Per Frame NMS burst cannot receive delimeter while in state {}", (*burst_state)); + // in hailo15 per frame - if number of delimeter is same as num classes - we expect image delimeter next + // otherwise expect another delimeter + (*num_delimeters_received)++; + if (num_classes == (*num_delimeters_received)) { + *burst_state = NMSBurstState::NMS_BURST_STATE_WAITING_FOR_IMAGE_DELIMETER; + } + } + break; + } + + case NMS_IMAGE_DELIMITER: + { + CHECK_IN_DEBUG(HAILO_BURST_TYPE_H8_PER_CLASS != burst_type, HAILO_NMS_BURST_INVALID_DATA, + "Invalid state, H8 NMS burst cannot receive image delimeter"); + + CHECK_IN_DEBUG(NMSBurstState::NMS_BURST_STATE_WAITING_FOR_IMAGE_DELIMETER == (*burst_state), HAILO_NMS_BURST_INVALID_DATA, + "Invalid state, H15 NMS burst cannot receive image delimeter in state {}", (*burst_state)); + + // in both hailo15 per class and per frame - when receiving image delimeter we move to expecting padding + *burst_state = NMSBurstState::NMS_BURST_STATE_WAITING_FOR_PADDING; + +#ifdef NDEBUG + finish_reading_burst_update_state(burst_state, can_stop_reading_burst, burst_index); +#endif // NDEBUG + break; + } + + case NMS_H15_PADDING: + { + if ((HAILO_BURST_TYPE_H15_PER_CLASS == burst_type) || (HAILO_BURST_TYPE_H15_PER_FRAME == burst_type)) { + CHECK_IN_DEBUG(NMSBurstState::NMS_BURST_STATE_WAITING_FOR_PADDING == (*burst_state), HAILO_NMS_BURST_INVALID_DATA, + "Invalid state, H15 NMS burst cannot receive padding in state {}", (*burst_state)); + } + // In case of padding next state is wait for padding unless it is last padding of burst - then next state will be + // Wait for delimeter - will only get to this stage in debug - in release once image delimeter is read we ignore rest of + // burst seeing as it must be padding + if (burst_offset == (burst_size - sizeof(current_bbox))) { + finish_reading_burst_update_state(burst_state, can_stop_reading_burst, burst_index); + } + break; + } + } + + return HAILO_SUCCESS; +} + +hailo_status NMSStreamReader::read_nms_bbox_mode(OutputStream &stream, void *buffer, size_t offset) +{ + const uint32_t num_classes = stream.get_info().nms_info.number_of_classes; + const uint32_t chunks_per_frame = stream.get_info().nms_info.chunks_per_frame; + const size_t bbox_size = stream.get_info().nms_info.bbox_size; + + for (size_t delimeters_found = 0; delimeters_found < (num_classes * chunks_per_frame); delimeters_found++) { + nms_bbox_counter_t class_bboxes_count = 0; + nms_bbox_counter_t* class_bboxes_count_ptr = (nms_bbox_counter_t*)(reinterpret_cast(buffer) + offset); + offset += sizeof(*class_bboxes_count_ptr); + + while (true) { + MemoryView buffer_view(static_cast(buffer) + offset, bbox_size); + auto status = stream.read_impl(buffer_view); + if ((HAILO_STREAM_ABORTED_BY_USER == status) || + ((HAILO_STREAM_NOT_ACTIVATED == status))) { + return status; + } + CHECK_SUCCESS(status, "Failed reading nms bbox"); + const uint64_t current_bbox = *(uint64_t*)((uint8_t*)buffer + offset); + + if (NMS_IMAGE_DELIMITER == current_bbox) { + continue; + } + + if (NMS_DELIMITER == current_bbox) { + break; + } + + class_bboxes_count++; + CHECK_IN_DEBUG(class_bboxes_count <= stream.get_info().nms_info.max_bboxes_per_class, HAILO_INTERNAL_FAILURE, + "Data read from the device for the current class was size {}, max size is {}", class_bboxes_count, + stream.get_info().nms_info.max_bboxes_per_class); + offset += bbox_size; + } + + *class_bboxes_count_ptr = class_bboxes_count; + } + + return HAILO_SUCCESS; +} + +hailo_status NMSStreamReader::read_nms_burst_mode(OutputStream &stream, void *buffer, size_t offset, size_t buffer_size) +{ + NMSBurstState burst_state = NMSBurstState::NMS_BURST_STATE_WAITING_FOR_DELIMETER; + const uint32_t bbox_size = stream.get_info().nms_info.bbox_size; + const size_t burst_size = stream.get_layer_info().nms_info.burst_size * bbox_size; + const hailo_nms_burst_type_t burst_type = stream.get_layer_info().nms_info.burst_type; + const auto num_expected_delimeters = stream.get_info().nms_info.chunks_per_frame * stream.get_info().nms_info.number_of_classes; + // Transfer size if affected from if working in interrupt per burst or interrupt per frame + const size_t transfer_size = LayerInfoUtils::get_nms_layer_transfer_size(stream.get_layer_info()); + const bool is_interrupt_per_frame = (transfer_size > burst_size); + + CHECK(bbox_size == sizeof(uint64_t), HAILO_INTERNAL_FAILURE, + "Invalid Bbox size, must be 8 bytes received {}", bbox_size); + + CHECK(transfer_size <= buffer_size, HAILO_INTERNAL_FAILURE, "Invalid transfer size {}, Cannot be larger than buffer {}", + transfer_size, buffer_size); + + // Start writing bboxes at offset sizeof(nms_bbox_counter_t) - because the first sizeof(nms_bbox_counter_t) will be + // used to write amount of bboxes found for class 0 etc... + nms_bbox_counter_t class_bboxes_count = 0; + nms_bbox_counter_t* class_bboxes_count_ptr = (nms_bbox_counter_t*)(reinterpret_cast(buffer) + offset); + offset += sizeof(nms_bbox_counter_t); + + // Counter of number of delimeters found in frame + size_t delimeters_found = 0; + size_t burst_index = 0; + uint8_t *start_index_of_burst_in_buffer = nullptr; + while ((delimeters_found < num_expected_delimeters) || (NMSBurstState::NMS_BURST_STATE_WAITING_FOR_IMAGE_DELIMETER == burst_state)) { + // In interrupt per frame we read whole frame once (in first iteration) - then don't read in following loop iterations + // delimeters_found will always be 0 in first iteration - and in interrupt_per_frame will always be larger in following iterations + if (!is_interrupt_per_frame || (0 == delimeters_found)) { + assert(offset + transfer_size <= buffer_size); + start_index_of_burst_in_buffer = static_cast(buffer) + offset; + MemoryView buffer_view(start_index_of_burst_in_buffer, transfer_size); + auto status = stream.read_impl(buffer_view); + if ((HAILO_STREAM_ABORTED_BY_USER == status) || ((HAILO_STREAM_NOT_ACTIVATED == status))) { + return status; + } + CHECK_SUCCESS(status, "Failed reading nms burst"); + } + + // Flag that marks if we can stop reading burst and continue to next burst + bool can_stop_reading_burst = false; + // Iterate through burst and copy relevant data to user buffer + for (size_t burst_offset = 0; burst_offset < burst_size; burst_offset += bbox_size) { + uint64_t current_bbox = 0; + if (is_interrupt_per_frame) { + assert((burst_index * burst_size) + burst_offset < transfer_size); + current_bbox = *(uint64_t*)((uint8_t*)start_index_of_burst_in_buffer + (burst_index * burst_size) + burst_offset); + } else { + current_bbox = *(uint64_t*)((uint8_t*)start_index_of_burst_in_buffer + burst_offset); + } + + // If read delimeter - fill in information about num of bboxes found for the class (we also make sure that + // It is in state NMS_BURST_STATE_WAITING_FOR_DELIMETER because in hailo8 padding is same value) + if ((NMS_DELIMITER == current_bbox) && (NMSBurstState::NMS_BURST_STATE_WAITING_FOR_DELIMETER == burst_state)) { + *class_bboxes_count_ptr = class_bboxes_count; + class_bboxes_count_ptr = (nms_bbox_counter_t*)(reinterpret_cast(buffer) + offset); + class_bboxes_count = 0; + offset += sizeof(nms_bbox_counter_t); + } + + // Received delimeter can stop reading burst because rest of burst is image delimeter then padding + if ((NMS_DELIMITER == current_bbox) || (NMS_IMAGE_DELIMITER == current_bbox) || (NMS_H15_PADDING == current_bbox)) { + auto status = advance_state_machine(&burst_state, current_bbox, burst_type, stream.get_info().nms_info.number_of_classes, + &delimeters_found, &can_stop_reading_burst, burst_offset, burst_size, &burst_index); + CHECK_SUCCESS(status); + + if (can_stop_reading_burst) { + break; + } + continue; + } + + class_bboxes_count++; + CHECK_IN_DEBUG(class_bboxes_count <= stream.get_info().nms_info.max_bboxes_per_class, HAILO_INTERNAL_FAILURE, + "Data read from the device for the current class was size {}, max size is {}", class_bboxes_count, + stream.get_info().nms_info.max_bboxes_per_class); + + // Copy bbox to correct location in buffer + memcpy((static_cast(buffer) + offset), ¤t_bbox, sizeof(current_bbox)); + offset += bbox_size; + } + } + + return HAILO_SUCCESS; +} + +hailo_status NMSStreamReader::read_nms(OutputStream &stream, void *buffer, size_t offset, size_t size) +{ + hailo_status status = HAILO_UNINITIALIZED; + const bool burst_mode = (HAILO_BURST_TYPE_NO_BURST != stream.get_layer_info().nms_info.burst_type); + if (burst_mode) { + status = NMSStreamReader::read_nms_burst_mode(stream, buffer, offset, size); + } else { + status = NMSStreamReader::read_nms_bbox_mode(stream, buffer, offset); + } + if ((HAILO_STREAM_ABORTED_BY_USER == status) || (HAILO_STREAM_NOT_ACTIVATED == status)) { + return status; + } + CHECK_SUCCESS(status, "Failed reading nms"); + + return HAILO_SUCCESS; +} + +} /* namespace hailort */ \ No newline at end of file diff --git a/hailort/libhailort/src/stream_common/nms_stream_reader.hpp b/hailort/libhailort/src/stream_common/nms_stream_reader.hpp new file mode 100644 index 0000000..db5139c --- /dev/null +++ b/hailort/libhailort/src/stream_common/nms_stream_reader.hpp @@ -0,0 +1,47 @@ +/** + * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved. + * Distributed under the MIT license (https://opensource.org/licenses/MIT) + **/ +/** + * @file nms_stream_reader.hpp + * @brief static class that helps receives and reads the nms ouput stream according to the differnet burst mode, type and size. + * + * For explanation on the different burst modes and types and state machine and logic of the class please check out the cpp. + * + **/ + +#ifndef _NMS_STREAM_READER_HPP_ +#define _NMS_STREAM_READER_HPP_ + +#include "hailo/stream.hpp" +#include "common/utils.hpp" +#include "hailo/hailort_common.hpp" + +namespace hailort +{ + +static constexpr uint32_t MAX_NMS_BURST_SIZE = 65536; +static const uint64_t NMS_DELIMITER = 0xFFFFFFFFFFFFFFFF; +static const uint64_t NMS_IMAGE_DELIMITER = 0xFFFFFFFFFFFFFFFE; +static const uint64_t NMS_H15_PADDING = 0xFFFFFFFFFFFFFFFD; + +enum class NMSBurstState { + NMS_BURST_STATE_WAITING_FOR_DELIMETER = 0, + NMS_BURST_STATE_WAITING_FOR_IMAGE_DELIMETER = 1, + NMS_BURST_STATE_WAITING_FOR_PADDING = 2, +}; + +class NMSStreamReader { +public: + static hailo_status read_nms(OutputStream &stream, void *buffer, size_t offset, size_t size); +private: + static hailo_status read_nms_bbox_mode(OutputStream &stream, void *buffer, size_t offset); + static hailo_status read_nms_burst_mode(OutputStream &stream, void *buffer, size_t offset, size_t buffer_size); + static hailo_status advance_state_machine(NMSBurstState *burst_state, const uint64_t current_bbox, + const hailo_nms_burst_type_t burst_type, const uint32_t num_classes, size_t *num_delimeters_received, + bool *can_stop_reading_burst, const size_t burst_offset, const size_t burst_size, size_t *burst_index); +}; + +} /* namespace hailort */ + +#endif /* _STREAM_INTERNAL_HPP_ */ \ No newline at end of file diff --git a/hailort/libhailort/src/stream_common/stream.cpp b/hailort/libhailort/src/stream_common/stream.cpp index df20eee..909fbbf 100644 --- a/hailort/libhailort/src/stream_common/stream.cpp +++ b/hailort/libhailort/src/stream_common/stream.cpp @@ -12,6 +12,7 @@ #include "hailo/hailort_common.hpp" #include "hailo/transform.hpp" #include "common/utils.hpp" +#include "stream_common/nms_stream_reader.hpp" #include @@ -25,25 +26,32 @@ hailo_status InputStream::flush() hailo_status InputStream::write(const MemoryView &buffer) { - CHECK((buffer.size() % get_info().hw_frame_size) == 0, HAILO_INVALID_ARGUMENT, - "write size {} must be a multiple of hw size {}", buffer.size(), get_info().hw_frame_size); + CHECK(buffer.size() == get_frame_size(), HAILO_INVALID_ARGUMENT, + "write size {} must be {}", buffer.size(), get_frame_size()); CHECK(((buffer.size() % HailoRTCommon::HW_DATA_ALIGNMENT) == 0), HAILO_INVALID_ARGUMENT, "Input must be aligned to {} (got {})", HailoRTCommon::HW_DATA_ALIGNMENT, buffer.size()); - - return sync_write_all_raw_buffer_no_transform_impl(const_cast(buffer.data()), 0, buffer.size()); + + return write_impl(buffer); } -hailo_status InputStream::wait_for_ready(size_t /* transfer_size */, std::chrono::milliseconds /* timeout */) +hailo_status InputStream::write(const void *buffer, size_t size) { - return HAILO_NOT_IMPLEMENTED; + return write(MemoryView::create_const(buffer, size)); } -hailo_status InputStream::write_async(std::shared_ptr /* buffer */, const TransferDoneCallback &/* user_callback */, void */* opaque */) +hailo_status InputStream::wait_for_async_ready(size_t /* transfer_size */, std::chrono::milliseconds /* timeout */) { + LOGGER__ERROR("wait_for_async_ready not implemented for sync API"); return HAILO_NOT_IMPLEMENTED; } +Expected InputStream::get_async_max_queue_size() const +{ + LOGGER__ERROR("get_async_max_queue_size not implemented for sync API"); + return make_unexpected(HAILO_NOT_IMPLEMENTED); +} + std::string InputStream::to_string() const { std::stringstream string_stream; @@ -60,76 +68,40 @@ EventPtr &InputStream::get_network_group_activated_event() hailo_status OutputStream::read_nms(void *buffer, size_t offset, size_t size) { - uint32_t num_of_classes = get_info().nms_info.number_of_classes; - uint32_t max_bboxes_per_class = get_info().nms_info.max_bboxes_per_class; - uint32_t chunks_per_frame = get_info().nms_info.chunks_per_frame; - size_t bbox_size = get_info().nms_info.bbox_size; - size_t transfer_size = bbox_size; - CHECK(size == get_info().hw_frame_size, HAILO_INSUFFICIENT_BUFFER, "On nms stream buffer size should be {} (given size {})", get_info().hw_frame_size, size); - for (uint32_t chunk_index = 0; chunk_index < chunks_per_frame; chunk_index++) { - for (uint32_t class_index = 0; class_index < num_of_classes; class_index++) { - nms_bbox_counter_t class_bboxes_count = 0; - nms_bbox_counter_t* class_bboxes_count_ptr = (nms_bbox_counter_t*)(reinterpret_cast(buffer) + offset); - offset += sizeof(*class_bboxes_count_ptr); - - // Read bboxes until reaching delimiter - for (;;) { - MemoryView buffer_view(static_cast(buffer) + offset, transfer_size); - auto expected_bytes_read = sync_read_raw_buffer(buffer_view); - if ((HAILO_STREAM_ABORTED_BY_USER == expected_bytes_read.status()) || - ((HAILO_STREAM_NOT_ACTIVATED == expected_bytes_read.status()))) { - return expected_bytes_read.status(); - } - CHECK_EXPECTED_AS_STATUS(expected_bytes_read, "Failed reading nms bbox"); - transfer_size = expected_bytes_read.release(); - CHECK(transfer_size == bbox_size, HAILO_INTERNAL_FAILURE, - "Data read from the device was size {}, should be bbox size {}", transfer_size, bbox_size); - - if (HailoRTCommon::NMS_DUMMY_DELIMITER == *(uint64_t*)((uint8_t*)buffer + offset)) { - continue; - } - - if (HailoRTCommon::NMS_DELIMITER == *(uint64_t*)((uint8_t*)buffer + offset)) { - break; - } - - class_bboxes_count++; - CHECK(class_bboxes_count <= max_bboxes_per_class, HAILO_INTERNAL_FAILURE, - "Data read from the device for the current class was size {}, max size is {}", class_bboxes_count, max_bboxes_per_class); - offset += bbox_size; - } - - *class_bboxes_count_ptr = class_bboxes_count; - } - } - return HAILO_SUCCESS; + return NMSStreamReader::read_nms((*this), buffer, offset, size); } hailo_status OutputStream::read(MemoryView buffer) { - CHECK((buffer.size() % get_info().hw_frame_size) == 0, HAILO_INVALID_ARGUMENT, - "Read size {} must be a multiple of hw size {}", buffer.size(), get_info().hw_frame_size); + CHECK(buffer.size() == get_frame_size(), HAILO_INVALID_ARGUMENT, "Read size {} must be {}", buffer.size(), + get_frame_size()); if (get_info().format.order == HAILO_FORMAT_ORDER_HAILO_NMS){ return read_nms(buffer.data(), 0, buffer.size()); } else { - return this->read_all(buffer); + return read_impl(buffer); } } -hailo_status OutputStream::wait_for_ready(size_t /* transfer_size */, std::chrono::milliseconds /* timeout */) +hailo_status OutputStream::read(void *buffer, size_t size) { - return HAILO_NOT_IMPLEMENTED; + return read(MemoryView(buffer, size)); } -hailo_status OutputStream::read_async(std::shared_ptr /* buffer */, const TransferDoneCallback &/* user_callback */, void */* opaque */) +hailo_status OutputStream::wait_for_async_ready(size_t /* transfer_size */, std::chrono::milliseconds /* timeout */) { + LOGGER__ERROR("wait_for_async_ready not implemented for sync API"); return HAILO_NOT_IMPLEMENTED; } +Expected OutputStream::get_async_max_queue_size() const +{ + LOGGER__ERROR("get_async_max_queue_size not implemented for sync API"); + return make_unexpected(HAILO_NOT_IMPLEMENTED); +} std::string OutputStream::to_string() const { diff --git a/hailort/libhailort/src/stream_common/stream_internal.cpp b/hailort/libhailort/src/stream_common/stream_internal.cpp index b3fb244..76a6421 100644 --- a/hailort/libhailort/src/stream_common/stream_internal.cpp +++ b/hailort/libhailort/src/stream_common/stream_internal.cpp @@ -27,6 +27,42 @@ InputStreamBase::InputStreamBase(const hailo_stream_info_t &stream_info, m_stream_info = stream_info; } +hailo_status InputStreamBase::write_async(BufferPtr buffer, const TransferDoneCallback &user_callback) +{ + CHECK_ARG_NOT_NULL(buffer); + CHECK_ARG_NOT_NULL(buffer->data()); + CHECK(buffer->size() == get_frame_size(), HAILO_INVALID_ARGUMENT, "Write size {} must be frame size {}", buffer->size(), + get_frame_size()); + + auto wrapped_callback = [buffer, user_callback](hailo_status status) { + user_callback(CompletionInfo{status, buffer->data(), buffer->size()}); + }; + return write_async(TransferRequest{MemoryView(*buffer), wrapped_callback, buffer}); +} + +hailo_status InputStreamBase::write_async(const MemoryView &buffer, const TransferDoneCallback &user_callback) +{ + CHECK_ARG_NOT_NULL(buffer.data()); + CHECK(buffer.size() == get_frame_size(), HAILO_INVALID_ARGUMENT, "Write size {} must be frame size {}", buffer.size(), + get_frame_size()); + + auto wrapped_callback = [buffer, user_callback](hailo_status status) { + user_callback(CompletionInfo{status, const_cast(buffer.data()), buffer.size()}); + }; + return write_async(TransferRequest{buffer, wrapped_callback}); +} + +hailo_status InputStreamBase::write_async(const void *buffer, size_t size, const TransferDoneCallback &user_callback) +{ + return write_async(MemoryView::create_const(buffer, size), user_callback); +} + +hailo_status InputStreamBase::write_async(TransferRequest &&) +{ + LOGGER__ERROR("write_async not implemented for sync API"); + return HAILO_NOT_IMPLEMENTED; +} + EventPtr &InputStreamBase::get_core_op_activated_event() { return m_core_op_activated_event; @@ -44,6 +80,42 @@ OutputStreamBase::OutputStreamBase(const LayerInfo &layer_info, const hailo_stre m_stream_info = stream_info; } +hailo_status OutputStreamBase::read_async(BufferPtr buffer, const TransferDoneCallback &user_callback) +{ + CHECK_ARG_NOT_NULL(buffer); + CHECK_ARG_NOT_NULL(buffer->data()); + CHECK(buffer->size() == get_frame_size(), HAILO_INVALID_ARGUMENT, "Read size {} must be frame size {}", buffer->size(), + get_frame_size()); + + auto wrapped_callback = [buffer, user_callback](hailo_status status) { + user_callback(CompletionInfo{status, const_cast(buffer->data()), buffer->size()}); + }; + return read_async(TransferRequest{MemoryView(*buffer), wrapped_callback, buffer}); +} + +hailo_status OutputStreamBase::read_async(MemoryView buffer, const TransferDoneCallback &user_callback) +{ + CHECK_ARG_NOT_NULL(buffer.data()); + CHECK(buffer.size() == get_frame_size(), HAILO_INVALID_ARGUMENT, "Read size {} must be frame size {}", buffer.size(), + get_frame_size()); + + auto wrapped_callback = [buffer, user_callback](hailo_status status) { + user_callback(CompletionInfo{status, const_cast(buffer.data()), buffer.size()}); + }; + return read_async(TransferRequest{buffer, wrapped_callback}); +} + +hailo_status OutputStreamBase::read_async(void *buffer, size_t size, const TransferDoneCallback &user_callback) +{ + return read_async(MemoryView(buffer, size), user_callback); +} + +hailo_status OutputStreamBase::read_async(TransferRequest &&) +{ + LOGGER__ERROR("read_async not implemented for sync API"); + return HAILO_NOT_IMPLEMENTED; +} + EventPtr &OutputStreamBase::get_core_op_activated_event() { return m_core_op_activated_event; diff --git a/hailort/libhailort/src/stream_common/stream_internal.hpp b/hailort/libhailort/src/stream_common/stream_internal.hpp index 27eff6b..b09340c 100644 --- a/hailort/libhailort/src/stream_common/stream_internal.hpp +++ b/hailort/libhailort/src/stream_common/stream_internal.hpp @@ -11,16 +11,23 @@ * * InputStream (External "interface") * |-- InputStreamBase (Base class) - * |-- VdmaInputStream + * |-- VdmaInputStreamBase + * |-- VdmaInputStream + * |-- VdmaAsyncInputStream * |-- EthernetInputStream * |-- MipiInputStream + * |-- VDeviceInputStreamBase + * |-- See vdevice_stream.hpp for subclasses * * * OutputStream (External "interface") * |-- OutputStreamBase (Base class) - * |-- VdmaOutputStream + * |-- VdmaOutputStreamBase + * |-- VdmaOutputStream + * |-- VdmaAsyncOutputStream * |-- EthernetOutputStream - * + * |-- VDeviceOutputStreamBase + * |-- See vdevice_stream.hpp for subclasses **/ #ifndef _STREAM_INTERNAL_HPP_ @@ -30,11 +37,14 @@ #include "hailo/event.hpp" #include "hailo/hailort_common.hpp" +#include "stream_common/async_common.hpp" #include "hef/hef_internal.hpp" #include "device_common/control_protocol.hpp" #include "hef/layer_info.hpp" #include "vdma/channel/boundary_channel.hpp" +using device_id_t = std::string; + namespace hailort { @@ -64,9 +74,9 @@ public: return m_nn_stream_config; }; - virtual hailo_status send_pending_buffer(size_t device_index = 0) + virtual hailo_status send_pending_buffer(const device_id_t &device_id) { - (void)device_index; + (void)device_id; return HAILO_INVALID_OPERATION; } @@ -74,16 +84,17 @@ public: { return make_unexpected(HAILO_INVALID_OPERATION); } - + virtual Expected get_pending_frames_count() const { return make_unexpected(HAILO_INVALID_OPERATION); } - virtual hailo_status register_interrupt_callback(const vdma::ProcessingCompleteCallback &/*callback*/) - { - return HAILO_INVALID_OPERATION; - } + virtual hailo_status write_async(BufferPtr buffer, const TransferDoneCallback &user_callback) override final; + virtual hailo_status write_async(const MemoryView &buffer, const TransferDoneCallback &user_callback) override final; + virtual hailo_status write_async(const void *buffer, size_t size, const TransferDoneCallback &user_callback) override final; + + virtual hailo_status write_async(TransferRequest &&transfer_request); CONTROL_PROTOCOL__nn_stream_config_t m_nn_stream_config; @@ -94,7 +105,14 @@ protected: { m_stream_info = LayerInfoUtils::get_stream_info_from_layer_info(layer_info); - const bool hw_padding_supported = HefConfigurator::is_hw_padding_supported(layer_info); + auto max_periph_bytes_from_hef = HefConfigurator::max_periph_bytes_value(stream_interface); + if (HAILO_SUCCESS != max_periph_bytes_from_hef.status()) { + status = max_periph_bytes_from_hef.status(); + return; + } + const auto max_periph_bytes = MIN(max_periph_bytes_from_hef.value(), layer_info.max_shmifo_size); + const bool hw_padding_supported = HefConfigurator::is_hw_padding_supported(layer_info, max_periph_bytes); + auto nn_stream_config = HefConfigurator::parse_nn_stream_config(layer_info, hw_padding_supported && (HAILO_STREAM_INTERFACE_MIPI != stream_interface)); // On MIPI networks, we don't want to use hw padding nn stream config. if(!nn_stream_config) { @@ -138,27 +156,41 @@ public: { return make_unexpected(HAILO_INVALID_OPERATION); } - + virtual Expected get_pending_frames_count() const { return make_unexpected(HAILO_INVALID_OPERATION); } - virtual hailo_status register_interrupt_callback(const vdma::ProcessingCompleteCallback &/*callback*/) + virtual hailo_status set_next_device_to_read(const device_id_t &device_id) { + (void)device_id; return HAILO_INVALID_OPERATION; } + virtual hailo_status read_async(BufferPtr buffer, const TransferDoneCallback &user_callback) override final; + virtual hailo_status read_async(MemoryView buffer, const TransferDoneCallback &user_callback) override final; + virtual hailo_status read_async(void *buffer, size_t size, const TransferDoneCallback &user_callback) override final; + + virtual hailo_status read_async(TransferRequest &&transfer_request); + CONTROL_PROTOCOL__nn_stream_config_t m_nn_stream_config; protected: - explicit OutputStreamBase(const LayerInfo &layer_info, + explicit OutputStreamBase(const LayerInfo &layer_info, hailo_stream_interface_t stream_interface, EventPtr &&core_op_activated_event, hailo_status &status) : m_layer_info(layer_info), m_core_op_activated_event(std::move(core_op_activated_event)) { m_stream_info = LayerInfoUtils::get_stream_info_from_layer_info(m_layer_info); - const bool hw_padding_supported = HefConfigurator::is_hw_padding_supported(m_layer_info); + auto max_periph_bytes_from_hef = HefConfigurator::max_periph_bytes_value(stream_interface); + if (HAILO_SUCCESS != max_periph_bytes_from_hef.status()) { + status = max_periph_bytes_from_hef.status(); + return; + } + const auto max_periph_bytes = MIN(max_periph_bytes_from_hef.value(), layer_info.max_shmifo_size); + const bool hw_padding_supported = HefConfigurator::is_hw_padding_supported(layer_info, max_periph_bytes); + auto nn_stream_config = HefConfigurator::parse_nn_stream_config(m_layer_info, hw_padding_supported); if(!nn_stream_config) { LOGGER__ERROR("Failed parse nn stream config"); diff --git a/hailort/libhailort/src/transform/transform.cpp b/hailort/libhailort/src/transform/transform.cpp index a983c95..54e5208 100644 --- a/hailort/libhailort/src/transform/transform.cpp +++ b/hailort/libhailort/src/transform/transform.cpp @@ -185,6 +185,7 @@ hailo_status transform__transpose_buffer(const void *src_ptr, const hailo_3d_ima switch (format.order) { case HAILO_FORMAT_ORDER_NHWC: + case HAILO_FORMAT_ORDER_RGB4: case HAILO_FORMAT_ORDER_NHW: case HAILO_FORMAT_ORDER_BAYER_RGB: case HAILO_FORMAT_ORDER_12_BIT_BAYER_RGB: @@ -445,8 +446,8 @@ static inline void transform__parse_and_copy_bbox (hailo_bbox_t *dst, uint64_t* void transform__d2h_NMS(const uint8_t *src_ptr, uint8_t *dst_ptr, const hailo_nms_info_t &nms_info, std::vector &chunk_offsets) { /* Validate arguments */ - ASSERT(NULL != src_ptr); - ASSERT(NULL != dst_ptr); + assert(NULL != src_ptr); + assert(NULL != dst_ptr); uint32_t num_of_classes = nms_info.number_of_classes; uint32_t bbox_size = nms_info.bbox_size; @@ -485,6 +486,7 @@ void transform__d2h_NMS(const uint8_t *src_ptr, uint8_t *dst_ptr, const hailo_nm // Add bbox from all chunks of current class src_offset = chunk_offsets[chunk_index]; class_bboxes_count = *((nms_bbox_counter_t*)((uint8_t*)src_ptr + src_offset)); + assert(class_bboxes_count <= nms_info.max_bboxes_per_class); *dst_bbox_counter = static_cast(*dst_bbox_counter + class_bboxes_count); src_offset += sizeof(nms_bbox_counter_t); @@ -739,8 +741,8 @@ hailo_status transform__d2h_argmax_NHCW_to_NHW(const T *src_ptr, const hailo_3d_ CHECK(dst_image_shape.features == 1, HAILO_INVALID_OPERATION, "NHCW_to_NHW argmax Transform is supported only when dst features ({}) is 1", dst_image_shape.features); - CHECK(src_image_shape.features < std::numeric_limits::max(), HAILO_INVALID_OPERATION, - "NHCW_to_NHW argmax Transform is supported only when src features ({}) is smaller than {}", + CHECK(src_image_shape.features <= std::numeric_limits::max(), HAILO_INVALID_OPERATION, + "NHCW_to_NHW argmax Transform is supported only when src features ({}) is equal/smaller than {}", src_image_shape.features, std::numeric_limits::max()); const auto src_row_size = src_image_shape.width * src_image_shape.features; @@ -828,7 +830,7 @@ hailo_status transform__h2d_RGB4_to_NHCW(const T *src_ptr, const hailo_3d_image_ const auto src_row_size = HailoRTCommon::align_to(row_size, RGB4_ALIGNMENT); const auto dst_row_size = dst_image_shape.width * dst_image_shape.features; - const auto pad_size = (dst_image_shape.width - src_image_shape.width) * dst_image_shape.features; + const auto pad_size = dst_image_shape.width - src_image_shape.width; uint32_t src_offset = 0; uint32_t dst_offset = 0; @@ -841,7 +843,7 @@ hailo_status transform__h2d_RGB4_to_NHCW(const T *src_ptr, const hailo_3d_image_ dst_offset = r * dst_row_size + f * dst_image_shape.width + c; dst_ptr[dst_offset] = src_ptr[src_offset]; } - /* pad feature to 8 elemnts */ + /* pad feature to 8 elements */ if (pad_size != 0) { dst_offset = r * dst_row_size + f * dst_image_shape.width + src_image_shape.width; std::fill_n(dst_ptr + dst_offset, pad_size, static_cast(0)); @@ -901,7 +903,11 @@ hailo_status FrameOutputTransformContext::quantize_stream(const void *dst_ptr) switch (m_dst_format.type) { case HAILO_FORMAT_TYPE_UINT8: if (HAILO_FORMAT_TYPE_UINT8 == m_src_format.type) { - Quantization::dequantize_output_buffer_in_place((uint8_t*)dst_ptr, shape_size, m_dst_quant_info); + if (m_are_all_qps_the_same) { + Quantization::dequantize_output_buffer_in_place((uint8_t*)dst_ptr, shape_size, m_dst_quant_info); + } else { + dequantize_output_by_feature((uint8_t*)dst_ptr, shape_size, m_quant_info_per_feature, m_quant_infos_rep_count); + } } else { return HAILO_INVALID_OPERATION; @@ -909,10 +915,18 @@ hailo_status FrameOutputTransformContext::quantize_stream(const void *dst_ptr) break; case HAILO_FORMAT_TYPE_UINT16: if (HAILO_FORMAT_TYPE_UINT8 == m_src_format.type) { - Quantization::dequantize_output_buffer_in_place((uint16_t*)dst_ptr, shape_size, m_dst_quant_info); + if (m_are_all_qps_the_same) { + Quantization::dequantize_output_buffer_in_place((uint16_t*)dst_ptr, shape_size, m_dst_quant_info); + } else { + dequantize_output_by_feature((uint16_t*)dst_ptr, shape_size, m_quant_info_per_feature, m_quant_infos_rep_count); + } } else if (HAILO_FORMAT_TYPE_UINT16 == m_src_format.type) { - Quantization::dequantize_output_buffer_in_place((uint16_t*)dst_ptr, shape_size, m_dst_quant_info); + if (m_are_all_qps_the_same) { + Quantization::dequantize_output_buffer_in_place((uint16_t*)dst_ptr, shape_size, m_dst_quant_info); + } else { + dequantize_output_by_feature((uint16_t*)dst_ptr, shape_size, m_quant_info_per_feature, m_quant_infos_rep_count); + } } else { return HAILO_INVALID_OPERATION; @@ -922,10 +936,18 @@ hailo_status FrameOutputTransformContext::quantize_stream(const void *dst_ptr) /* if output layer is argmax - do not rescale */ if (HAILO_FORMAT_ORDER_NHW != m_dst_format.order) { if (HAILO_FORMAT_TYPE_UINT8 == m_src_format.type) { - Quantization::dequantize_output_buffer_in_place((float32_t*)dst_ptr, shape_size, m_dst_quant_info); + if (m_are_all_qps_the_same) { + Quantization::dequantize_output_buffer_in_place((float32_t*)dst_ptr, shape_size, m_dst_quant_info); + } else { + dequantize_output_by_feature((float32_t*)dst_ptr, shape_size, m_quant_info_per_feature, m_quant_infos_rep_count); + } } else if (HAILO_FORMAT_TYPE_UINT16 == m_src_format.type) { - Quantization::dequantize_output_buffer_in_place((float32_t*)dst_ptr, shape_size, m_dst_quant_info); + if (m_are_all_qps_the_same) { + Quantization::dequantize_output_buffer_in_place((float32_t*)dst_ptr, shape_size, m_dst_quant_info); + } else { + dequantize_output_by_feature((float32_t*)dst_ptr, shape_size, m_quant_info_per_feature, m_quant_infos_rep_count); + } } else { return HAILO_INVALID_OPERATION; @@ -1795,7 +1817,50 @@ FrameOutputTransformContext::FrameOutputTransformContext(size_t src_frame_size, OutputTransformContext(src_frame_size, src_format, dst_frame_size, dst_format, dst_quant_info, should_quantize, should_transpose, should_reorder), m_src_image_shape(src_image_shape), m_dst_image_shape(dst_image_shape), m_transpose_buffer(std::move(transpose_buffer)) -{} +{ + std::vector dst_quant_infos = { dst_quant_info }; // TODO: Get vector from HEF + bool are_all_qps_the_same = true; + if (dst_quant_infos.size() > 1) { + for (const auto &quant_info : dst_quant_infos) { + if (0 != memcmp(&quant_info, &dst_quant_infos[0], sizeof(quant_info))) { + are_all_qps_the_same = false; + break; + } + } + } + m_are_all_qps_the_same = are_all_qps_the_same; + + switch (dst_format.order) { + case HAILO_FORMAT_ORDER_NHW: + case HAILO_FORMAT_ORDER_BAYER_RGB: + case HAILO_FORMAT_ORDER_12_BIT_BAYER_RGB: + case HAILO_FORMAT_ORDER_NCHW: + for (const auto &quant_info : dst_quant_infos) { + m_quant_info_per_feature.emplace_back(quant_info.qp_zp, quant_info.qp_scale); + } + m_quant_infos_rep_count = static_cast(dst_frame_size); + break; + case HAILO_FORMAT_ORDER_NHWC: + case HAILO_FORMAT_ORDER_FCR: + case HAILO_FORMAT_ORDER_F8CR: + case HAILO_FORMAT_ORDER_NC: + case HAILO_FORMAT_ORDER_RGB4: + for (const auto &quant_info : dst_quant_infos) { + m_quant_info_per_feature.emplace_back(quant_info.qp_zp, quant_info.qp_scale); + } + m_quant_infos_rep_count = 1; + break; + case HAILO_FORMAT_ORDER_NHCW: + for (const auto &quant_info : dst_quant_infos) { + m_quant_info_per_feature.emplace_back(quant_info.qp_zp, quant_info.qp_scale); + } + m_quant_infos_rep_count = dst_image_shape.width; + break; + default: + LOGGER__CRITICAL("Got unknown format order = {}", dst_format.order); + break; + } +} Expected> FrameOutputTransformContext::create(const hailo_3d_image_shape_t &src_image_shape, const hailo_format_t &src_format, const hailo_3d_image_shape_t &dst_image_shape, diff --git a/hailort/libhailort/src/transform/transform_internal.hpp b/hailort/libhailort/src/transform/transform_internal.hpp index b8ef52d..c1038a8 100644 --- a/hailort/libhailort/src/transform/transform_internal.hpp +++ b/hailort/libhailort/src/transform/transform_internal.hpp @@ -16,6 +16,7 @@ #include "hailo/buffer.hpp" #include "hailo/hef.hpp" #include "hailo/transform.hpp" +#include "hailo/quantization.hpp" #include "stream_common/stream_internal.hpp" #include "hef/layer_info.hpp" @@ -74,6 +75,14 @@ private: std::vector m_mux_infos; }; +struct QuantInfoForDequantize +{ + float32_t m_qp_zp; + float32_t m_qp_scale; + QuantInfoForDequantize(float32_t qp_zp, float32_t qp_scale) : m_qp_zp(qp_zp), m_qp_scale(qp_scale) + {} +}; + class HAILORTAPI FrameOutputTransformContext final : public OutputTransformContext { public: @@ -95,9 +104,26 @@ public: virtual std::string description() const override; private: + template + static inline void dequantize_output_by_feature(T *dst_ptr, uint32_t buffer_elements_count, + const std::vector &quant_infos, uint32_t repetition_count) + { + uint32_t elements_dequantized = 0; + while (elements_dequantized < buffer_elements_count) { + for (int32_t i = static_cast(quant_infos.size()) - 1; i >= 0; i--) { + Quantization::dequantize_output_buffer_in_place(dst_ptr, buffer_elements_count - repetition_count - elements_dequantized, + repetition_count, quant_infos[i].m_qp_zp, quant_infos[i].m_qp_scale); + elements_dequantized += repetition_count; + } + } + } + const hailo_3d_image_shape_t m_src_image_shape; const hailo_3d_image_shape_t m_dst_image_shape; Buffer m_transpose_buffer; + bool m_are_all_qps_the_same; + std::vector m_quant_info_per_feature; + uint32_t m_quant_infos_rep_count; }; class HAILORTAPI NMSOutputTransformContext final : public OutputTransformContext diff --git a/hailort/libhailort/src/utils/CMakeLists.txt b/hailort/libhailort/src/utils/CMakeLists.txt index c16a9a5..70cbfc1 100644 --- a/hailort/libhailort/src/utils/CMakeLists.txt +++ b/hailort/libhailort/src/utils/CMakeLists.txt @@ -4,6 +4,7 @@ set(SRC_FILES ${CMAKE_CURRENT_SOURCE_DIR}/hailort_common.cpp ${CMAKE_CURRENT_SOURCE_DIR}/hailort_logger.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/buffer_storage.cpp ${CMAKE_CURRENT_SOURCE_DIR}/buffer.cpp ${CMAKE_CURRENT_SOURCE_DIR}/sensor_config_utils.cpp ) diff --git a/hailort/libhailort/src/utils/buffer.cpp b/hailort/libhailort/src/utils/buffer.cpp index 1430910..2148487 100644 --- a/hailort/libhailort/src/utils/buffer.cpp +++ b/hailort/libhailort/src/utils/buffer.cpp @@ -27,6 +27,8 @@ static void format_buffer(std::ostream& stream, const uint8_t *buffer, size_t si { assert(nullptr != buffer); + stream << "[addr = " << static_cast(buffer) << ", size = " << size << "]" << std::endl; + static const bool UPPERCASE = true; static const size_t BYTES_PER_LINE = 32; static const char *BYTE_DELIM = " "; @@ -35,67 +37,80 @@ static void format_buffer(std::ostream& stream, const uint8_t *buffer, size_t si stream << fmt::format("0x{:08X}", offset) << BYTE_DELIM; // 32 bit offset into a buffer should be enough stream << StringUtils::to_hex_string(buffer + offset, line_size, UPPERCASE, BYTE_DELIM) << std::endl; } - stream << "[size = " << std::dec << size << "]"; } Buffer::Buffer() : + m_storage(), m_data(nullptr), m_size(0) {} +Buffer::Buffer(BufferStoragePtr storage) : + m_storage(storage), + m_data(static_cast(m_storage->user_address())), + m_size(m_storage->size()) +{} + Buffer::Buffer(Buffer&& other) : - m_data(std::move(other.m_data)), + m_storage(std::move(other.m_storage)), + m_data(std::exchange(other.m_data, nullptr)), m_size(std::exchange(other.m_size, 0)) {} -Expected Buffer::create(size_t size) +Expected Buffer::create(size_t size, const BufferStorageParams ¶ms) { - std::unique_ptr data(new (std::nothrow) uint8_t[size]); - if (data == nullptr) { - LOGGER__ERROR("Failed allocating {} bytes", size); - return make_unexpected(HAILO_OUT_OF_HOST_MEMORY); - } + auto storage = BufferStorage::create(size, params); + CHECK_EXPECTED(storage); - return Buffer(std::move(data), size); + return Buffer(storage.release()); } -Expected Buffer::create(size_t size, uint8_t default_value) +Expected Buffer::create(size_t size, uint8_t default_value, const BufferStorageParams ¶ms) { - auto buffer = create(size); + auto buffer = create(size, params); CHECK_EXPECTED(buffer); - std::memset(static_cast(buffer->m_data.get()), default_value, size); + std::memset(static_cast(buffer->m_data), default_value, size); return buffer; } -Expected Buffer::create_shared(size_t size) +Expected Buffer::create_shared(size_t size, const BufferStorageParams ¶ms) { - auto buffer = Buffer::create(size); + auto buffer = Buffer::create(size, params); CHECK_EXPECTED(buffer); auto buffer_ptr = make_shared_nothrow(buffer.release()); CHECK_NOT_NULL_AS_EXPECTED(buffer_ptr, HAILO_OUT_OF_HOST_MEMORY); return buffer_ptr; } -Expected Buffer::create_shared(size_t size, uint8_t default_value) +Expected Buffer::create_shared(size_t size, uint8_t default_value, const BufferStorageParams ¶ms) { - auto buffer = Buffer::create(size, default_value); + auto buffer = Buffer::create(size, default_value, params); CHECK_EXPECTED(buffer); auto buffer_ptr = make_shared_nothrow(buffer.release()); CHECK_NOT_NULL_AS_EXPECTED(buffer_ptr, HAILO_OUT_OF_HOST_MEMORY); return buffer_ptr; } -Expected Buffer::create(const uint8_t *src, size_t size) +Expected Buffer::create_shared(const uint8_t *src, size_t size, const BufferStorageParams ¶ms) { - auto buffer = create(size); + auto buffer = Buffer::create(src, size, params); CHECK_EXPECTED(buffer); - std::memcpy(static_cast(buffer->m_data.get()), static_cast(src), size); + auto buffer_ptr = make_shared_nothrow(buffer.release()); + CHECK_NOT_NULL_AS_EXPECTED(buffer_ptr, HAILO_OUT_OF_HOST_MEMORY); + return buffer_ptr; +} + +Expected Buffer::create(const uint8_t *src, size_t size, const BufferStorageParams ¶ms) +{ + auto buffer = create(size, params); + CHECK_EXPECTED(buffer); + std::memcpy(static_cast(buffer->m_data), static_cast(src), size); return buffer; } -Expected Buffer::create(std::initializer_list init) +Expected Buffer::create(std::initializer_list init, const BufferStorageParams ¶ms) { - auto buffer = create(init.size()); + auto buffer = create(init.size(), params); CHECK_EXPECTED(buffer); size_t index = 0; for (const auto& n : init) { @@ -108,12 +123,13 @@ Expected Buffer::create(std::initializer_list init) Expected Buffer::copy() const { - return Buffer::create(m_data.get(), m_size); + return Buffer::create(m_data, m_size); } Buffer& Buffer::operator=(Buffer&& other) { - m_data = std::move(other.m_data); + m_storage = std::move(other.m_storage); + m_data = std::exchange(other.m_data, nullptr); m_size = std::exchange(other.m_size, 0); return *this; } @@ -123,7 +139,7 @@ bool Buffer::operator==(const Buffer& rhs) const if (m_size != rhs.m_size) { return false; } - return (0 == std::memcmp(data(), rhs.data(), m_size)); + return (0 == std::memcmp(m_data, rhs.m_data, m_size)); } bool Buffer::operator!=(const Buffer& rhs) const @@ -131,7 +147,7 @@ bool Buffer::operator!=(const Buffer& rhs) const if (m_size != rhs.m_size) { return true; } - return (0 != std::memcmp(data(), rhs.data(), m_size)); + return (0 != std::memcmp(m_data, rhs.m_data, m_size)); } uint8_t& Buffer::operator[](size_t pos) @@ -156,14 +172,19 @@ Buffer::iterator Buffer::end() return iterator(data() + m_size); } +BufferStorage &Buffer::storage() +{ + return *m_storage; +} + uint8_t* Buffer::data() noexcept { - return m_data.get(); + return m_data; } const uint8_t* Buffer::data() const noexcept { - return m_data.get(); + return m_data; } size_t Buffer::size() const noexcept @@ -171,22 +192,16 @@ size_t Buffer::size() const noexcept return m_size; } -uint8_t* Buffer::release() noexcept -{ - m_size = 0; - return m_data.release(); -} - std::string Buffer::to_string() const { for (size_t i = 0; i < m_size; i++) { if (m_data[i] == 0) { // We'll return a string that ends at the first null in the buffer - return std::string(reinterpret_cast(m_data.get())); + return std::string(reinterpret_cast(m_data)); } } - return std::string(reinterpret_cast(m_data.get()), m_size); + return std::string(reinterpret_cast(m_data), m_size); } // Note: This is a friend function @@ -226,11 +241,6 @@ uint64_t& Buffer::as_uint64() return as_type(); } -Buffer::Buffer(std::unique_ptr data, size_t size) : - m_data(std::move(data)), - m_size(size) - {} - MemoryView::MemoryView() : m_data(nullptr), m_size(0) diff --git a/hailort/libhailort/src/utils/buffer_storage.cpp b/hailort/libhailort/src/utils/buffer_storage.cpp new file mode 100644 index 0000000..2f94cb4 --- /dev/null +++ b/hailort/libhailort/src/utils/buffer_storage.cpp @@ -0,0 +1,345 @@ +/** + * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved. + * Distributed under the MIT license (https://opensource.org/licenses/MIT) +**/ +/** + * @file buffer_storage.cpp + * @brief TODO: fill me (HRT-10026) + **/ + +#include "hailo/buffer_storage.hpp" +#include "hailo/hailort.h" +#include "hailo/vdevice.hpp" +#include "vdma/vdma_device.hpp" +#include "vdma/memory/dma_able_buffer.hpp" +#include "vdma/memory/mapped_buffer.hpp" +#include "common/utils.hpp" + +namespace hailort +{ + +// Checking ABI of hailo_dma_buffer_direction_t vs HailoRTDriver::DmaDirection +static_assert(HAILO_DMA_BUFFER_DIRECTION_H2D == (int)HailoRTDriver::DmaDirection::H2D, + "hailo_dma_buffer_direction_t must match HailoRTDriver::DmaDirection"); +static_assert(HAILO_DMA_BUFFER_DIRECTION_D2H == (int)HailoRTDriver::DmaDirection::D2H, + "hailo_dma_buffer_direction_t must match HailoRTDriver::DmaDirection"); +static_assert(HAILO_DMA_BUFFER_DIRECTION_BOTH == (int)HailoRTDriver::DmaDirection::BOTH, + "hailo_dma_buffer_direction_t must match HailoRTDriver::DmaDirection"); + +BufferStorageParams::HeapParams::HeapParams() +{} + +Expected BufferStorageParams::DmaMappingParams::create( + const hailo_buffer_dma_mapping_params_t ¶ms) +{ + CHECK_AS_EXPECTED((params.device == nullptr) || (params.vdevice == nullptr), HAILO_INVALID_ARGUMENT, + "Can't set both device and vdevice fields"); + return DmaMappingParams(params); +} + +BufferStorageParams::DmaMappingParams::DmaMappingParams(const hailo_buffer_dma_mapping_params_t ¶ms) : + device(reinterpret_cast(params.device)), + vdevice(reinterpret_cast(params.vdevice)), + data_direction(params.direction) +{} + +BufferStorageParams::DmaMappingParams::DmaMappingParams(Device &device, hailo_dma_buffer_direction_t data_direction) : + device(&device), + vdevice(nullptr), + data_direction(data_direction) +{} + +BufferStorageParams::DmaMappingParams::DmaMappingParams(VDevice &vdevice, hailo_dma_buffer_direction_t data_direction) : + device(nullptr), + vdevice(&vdevice), + data_direction(data_direction) +{} + +BufferStorageParams::DmaMappingParams::DmaMappingParams() : + device(nullptr), + vdevice(nullptr), + data_direction(HAILO_DMA_BUFFER_DIRECTION_MAX_ENUM) +{} + +Expected BufferStorageParams::create(const hailo_buffer_parameters_t ¶ms) +{ + BufferStorageParams result{}; + result.flags = params.flags; + + if (params.flags == HAILO_BUFFER_FLAGS_NONE) { + result.heap_params = HeapParams(); + } else if ((params.flags & HAILO_BUFFER_FLAGS_DMA) != 0) { + auto dma_mapping_params = DmaMappingParams::create(params.dma_mapping_params); + CHECK_EXPECTED(dma_mapping_params); + result.dma_mapping_params = dma_mapping_params.release(); + } else { + // TODO: HRT-10903 + LOGGER__ERROR("Buffer storage flags not currently supported {}", params.flags); + return make_unexpected(HAILO_NOT_IMPLEMENTED); + } + + return result; +} + +BufferStorageParams BufferStorageParams::create_dma() +{ + BufferStorageParams result{}; + result.flags = HAILO_BUFFER_FLAGS_DMA; + result.dma_mapping_params = DmaMappingParams(); + return result; +} + +BufferStorageParams BufferStorageParams::create_dma(Device &device, hailo_dma_buffer_direction_t data_direction) +{ + BufferStorageParams result{}; + result.flags = HAILO_BUFFER_FLAGS_DMA; + result.dma_mapping_params = DmaMappingParams(device, data_direction); + return result; +} + +BufferStorageParams BufferStorageParams::create_dma(VDevice &vdevice, hailo_dma_buffer_direction_t data_direction) +{ + BufferStorageParams result{}; + result.flags = HAILO_BUFFER_FLAGS_DMA; + result.dma_mapping_params = DmaMappingParams(vdevice, data_direction); + return result; +} + +BufferStorageParams::BufferStorageParams() : + flags(HAILO_BUFFER_FLAGS_NONE), + heap_params() +{} + +Expected BufferStorage::create(size_t size, const BufferStorageParams ¶ms) +{ + if (params.flags == HAILO_BUFFER_FLAGS_NONE) { + auto result = HeapStorage::create(size); + CHECK_EXPECTED(result); + return std::static_pointer_cast(result.release()); + } else if (0 != (params.flags & HAILO_BUFFER_FLAGS_DMA)) { + // TODO: check other flags here (HRT-10903) + auto &dma_mapping_params = params.dma_mapping_params; + + DmaStoragePtr storage = nullptr; + if ((dma_mapping_params.device != nullptr) && (dma_mapping_params.vdevice != nullptr)) { + LOGGER__ERROR("Can't map a buffer to both vdevice and device"); + return make_unexpected(HAILO_INVALID_ARGUMENT); + } else if (dma_mapping_params.device != nullptr) { + auto result = DmaStorage::create(size, dma_mapping_params.data_direction, + *dma_mapping_params.device); + CHECK_EXPECTED(result); + storage = result.release(); + } else if (dma_mapping_params.vdevice != nullptr) { + auto result = DmaStorage::create(size, dma_mapping_params.data_direction, + *dma_mapping_params.vdevice); + CHECK_EXPECTED(result); + storage = result.release(); + } else { + auto result = DmaStorage::create(size); + CHECK_EXPECTED(result); + storage = result.release(); + } + return std::static_pointer_cast(storage); + } + + // TODO: HRT-10903 + LOGGER__ERROR("Buffer storage flags not currently supported {}", params.flags); + return make_unexpected(HAILO_NOT_IMPLEMENTED); +} + +BufferStorage::BufferStorage(Type type) : + m_type(type) +{} + +BufferStorage::Type BufferStorage::type() const +{ + return m_type; +} + +Expected HeapStorage::create(size_t size) +{ + std::unique_ptr data(new (std::nothrow) uint8_t[size]); + CHECK_NOT_NULL_AS_EXPECTED(data, HAILO_OUT_OF_HOST_MEMORY); + + auto result = make_shared_nothrow(std::move(data), size); + CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY); + + return result; +} + +HeapStorage::HeapStorage(std::unique_ptr data, size_t size) : + BufferStorage(Type::HEAP), + m_data(std::move(data)), + m_size(size) +{} + +HeapStorage::HeapStorage(HeapStorage&& other) noexcept : + BufferStorage(std::move(other)), + m_data(std::move(other.m_data)), + m_size(std::exchange(other.m_size, 0)) +{} + +size_t HeapStorage::size() const +{ + return m_size; +} + +void *HeapStorage::user_address() +{ + return m_data.get(); +} + +Expected HeapStorage::release() noexcept +{ + m_size = 0; + return m_data.release(); +} + +Expected HeapStorage::dma_map(Device &, hailo_dma_buffer_direction_t) +{ + LOGGER__ERROR("Heap allocated buffers can't be mapped to DMA"); + return make_unexpected(HAILO_INVALID_OPERATION); +} + +Expected HeapStorage::dma_map(HailoRTDriver &, hailo_dma_buffer_direction_t) +{ + LOGGER__ERROR("Heap allocated buffers can't be mapped to DMA"); + return make_unexpected(HAILO_INVALID_OPERATION); +} + +Expected HeapStorage::get_dma_mapped_buffer(const std::string &) +{ + LOGGER__ERROR("Mapped buffer is not supported for Heap allocated buffers"); + return make_unexpected(HAILO_INVALID_OPERATION); +} + +Expected DmaStorage::create(size_t size) +{ + static const auto ALLOCATE_BUFFER = nullptr; + return create(ALLOCATE_BUFFER, size); +} + +Expected DmaStorage::create(size_t size, + hailo_dma_buffer_direction_t data_direction, Device &device) +{ + static const auto ALLOCATE_BUFFER = nullptr; + return create(ALLOCATE_BUFFER, size, data_direction, + std::vector>{std::ref(device)}); +} + +Expected DmaStorage::create(size_t size, + hailo_dma_buffer_direction_t data_direction, VDevice &vdevice) +{ + static const auto ALLOCATE_BUFFER = nullptr; + auto physical_devices = vdevice.get_physical_devices(); + CHECK_EXPECTED(physical_devices); + return create(ALLOCATE_BUFFER, size, data_direction, physical_devices.release()); +} + +Expected DmaStorage::create_from_user_address(void *user_address, size_t size) +{ + return create(user_address, size); +} + +Expected DmaStorage::create_from_user_address(void *user_address, size_t size, + hailo_dma_buffer_direction_t data_direction, Device &device) +{ + CHECK_ARG_NOT_NULL_AS_EXPECTED(user_address); + return create(user_address, size, data_direction, + std::vector>{std::ref(device)}); +} + +Expected DmaStorage::create_from_user_address(void *user_address, size_t size, + hailo_dma_buffer_direction_t data_direction, VDevice &vdevice) +{ + CHECK_ARG_NOT_NULL_AS_EXPECTED(user_address); + auto physical_devices = vdevice.get_physical_devices(); + CHECK_EXPECTED(physical_devices); + return create(user_address, size, data_direction, physical_devices.release()); +} + +Expected DmaStorage::create(void *user_address, size_t size, + hailo_dma_buffer_direction_t data_direction, + std::vector> &&physical_devices) +{ + // TODO: HRT-10283 support sharing low memory buffers for DART and similar systems. + auto dma_able_buffer = vdma::DmaAbleBuffer::create(size, user_address); + CHECK_EXPECTED(dma_able_buffer); + + auto result = make_shared_nothrow(dma_able_buffer.release()); + CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY); + + for (auto &device : physical_devices) { + auto is_new_mapping = result->dma_map(device, data_direction); + CHECK_EXPECTED(is_new_mapping); + CHECK_AS_EXPECTED(is_new_mapping.value(), HAILO_INTERNAL_FAILURE); + } + + return result; +} + +DmaStorage::DmaStorage(vdma::DmaAbleBufferPtr &&dma_able_buffer) : + BufferStorage(Type::DMA), + m_dma_able_buffer(std::move(dma_able_buffer)), + m_mappings() +{} + +size_t DmaStorage::size() const +{ + return m_dma_able_buffer->size(); +} + +void *DmaStorage::user_address() +{ + return m_dma_able_buffer->user_address(); +} + +Expected DmaStorage::release() noexcept +{ + return make_unexpected(HAILO_NOT_IMPLEMENTED); +} + +Expected DmaStorage::dma_map(Device &device, hailo_dma_buffer_direction_t data_direction) +{ + const auto device_type = device.get_type(); + CHECK_AS_EXPECTED(((Device::Type::INTEGRATED == device_type) || (Device::Type::PCIE == device_type)), + HAILO_INVALID_ARGUMENT, "Invalid device type (expected integrated/pcie, received {})", device_type); + VdmaDevice *vdma_device = reinterpret_cast(&device); + + return dma_map(vdma_device->get_driver(), data_direction); +} + +Expected DmaStorage::dma_map(HailoRTDriver &driver, hailo_dma_buffer_direction_t data_direction) +{ + CHECK_AS_EXPECTED(data_direction <= HAILO_DMA_BUFFER_DIRECTION_BOTH, HAILO_INVALID_ARGUMENT, + "Invalid data direction {}", data_direction); + + const auto &device_id = driver.device_id(); + auto find_result = m_mappings.find(device_id); + if (find_result != m_mappings.end()) { + // The buffer has been mapped => don't map it again + return Expected(false); // not a new mapping + } + + // The buffer hasn't been mapped => map it now + auto mapped_buffer = vdma::MappedBuffer::create_shared(driver, m_dma_able_buffer, + static_cast(data_direction)); + CHECK_EXPECTED(mapped_buffer); + + m_mappings.emplace(device_id, mapped_buffer.value()); + return Expected(true); // new mapping +} + +Expected DmaStorage::get_dma_mapped_buffer(const std::string &device_id) +{ + auto mapped_buffer = m_mappings.find(device_id); + if (mapped_buffer == m_mappings.end()) { + // Don't print error message here + LOGGER__INFO("Mapped buffer for {} not found", device_id); + return make_unexpected(HAILO_NOT_FOUND); + } + + return Expected(mapped_buffer->second); +} + +} /* namespace hailort */ diff --git a/hailort/libhailort/src/utils/event_internal.hpp b/hailort/libhailort/src/utils/event_internal.hpp deleted file mode 100644 index 04559e8..0000000 --- a/hailort/libhailort/src/utils/event_internal.hpp +++ /dev/null @@ -1,85 +0,0 @@ -/** - * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved. - * Distributed under the MIT license (https://opensource.org/licenses/MIT) - **/ -/** - * @file event.hpp - * @brief Event and Semaphore wrapper objects used for multithreading - **/ - -#ifndef _EVENT_INTERNAL_HPP_ -#define _EVENT_INTERNAL_HPP_ - -#include "hailo/hailort.h" -#include "hailo/expected.hpp" - -#include -#include -#include -#include -#if defined(__GNUC__) -#include -#endif - -namespace hailort -{ - -// TODO: Replace with a static wait_multiple func belonging to Waitable (SDK-16567). -// Will get a vector of pointers as an argument. Can also use variadic -// template args for cases with fixed number Waitables -class WaitOrShutdown final -{ -public: - WaitOrShutdown(WaitablePtr waitable, EventPtr shutdown_event); - ~WaitOrShutdown() = default; - - WaitOrShutdown(const WaitOrShutdown &other) = delete; - WaitOrShutdown &operator=(const WaitOrShutdown &other) = delete; - WaitOrShutdown(WaitOrShutdown &&other) noexcept = default; - WaitOrShutdown &operator=(WaitOrShutdown &&other) = delete; - - // Waits on waitable or shutdown_event to be signaled: - // * If shutdown_event is signaled: - // - shutdown_event is not reset - // - HAILO_SHUTDOWN_EVENT_SIGNALED is returned - // * If waitable is signaled: - // - waitable is reset if waitable->is_auto_reset() - // - HAILO_SUCCESS is returned - // * If both waitable and shutdown_event are signaled: - // - shutdown_event is not reset - // - waitable is not reset - // - HAILO_SHUTDOWN_EVENT_SIGNALED is returned - // * If neither are signaled, then HAILO_TIMEOUT is returned - // * On any failure an appropriate status shall be returned - hailo_status wait(std::chrono::milliseconds timeout); - hailo_status signal(); - -private: - // Note: We want to guarantee that if the shutdown event is signaled, HAILO_SHUTDOWN_EVENT_SIGNALED will be - // returned. - // * In Unix, using poll this isn't a problem since we'll get all the readable fds in a single call. - // * In Windows, using WaitForMultipleObjects, this works differently (from msdn): - // If bWaitAll is FALSE, the return value minus WAIT_OBJECT_0 indicates the lpHandles array index - // of the object that satisfied the wait. If more than one object became signaled during the call, - // this is the array index of the signaled object with the smallest index value of all the signaled - // objects. - // (https://docs.microsoft.com/en-us/windows/win32/api/synchapi/nf-synchapi-waitformultipleobjects) - // * Hence, SHUTDOWN_INDEX must come before WAITABLE_INDEX! - static const size_t SHUTDOWN_INDEX = 0; - static const size_t WAITABLE_INDEX = 1; - #if defined(_MSC_VER) || defined(__QNX__) - using WaitHandleArray = std::array; - #else - using WaitHandleArray = std::array; - #endif - - const WaitablePtr m_waitable; - const EventPtr m_shutdown_event; - WaitHandleArray m_wait_handle_array; - - static WaitHandleArray create_wait_handle_array(WaitablePtr waitable, EventPtr shutdown_event); -}; - -} /* namespace hailort */ - -#endif /* _EVENT_INTERNAL_HPP_ */ diff --git a/hailort/libhailort/src/utils/exported_resource_manager.hpp b/hailort/libhailort/src/utils/exported_resource_manager.hpp new file mode 100644 index 0000000..a4d2d5d --- /dev/null +++ b/hailort/libhailort/src/utils/exported_resource_manager.hpp @@ -0,0 +1,94 @@ +/** + * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved. + * Distributed under the MIT license (https://opensource.org/licenses/MIT) +**/ +/** + * @file exported_resource_manager.hpp + * @brief Holds resources that are exported via c-api + **/ + +#ifndef _HAILO_EXPORTED_RESOURCE_MANAGER_HPP_ +#define _HAILO_EXPORTED_RESOURCE_MANAGER_HPP_ + +#include "hailo/hailort.h" + +#include + +namespace hailort +{ + +// TODO: Merge ExportedResourceManager and SharedResourceManager (HRT-10317) +template> +class ExportedResourceManager final +{ +public: + static hailo_status register_resource(const Resource &resource, const Key &key) + { + return get_instance().register_resource_impl(resource, key); + } + + static Expected> get_resource(const Key &key) + { + return get_instance().get_resource_impl(key); + } + + static hailo_status unregister_resource(const Key &key) + { + return get_instance().unregister_resource_impl(key); + } + +private: + static ExportedResourceManager& get_instance() + { + static ExportedResourceManager instance; + return instance; + } + + hailo_status register_resource_impl(const Resource &resource, const Key &key) + { + std::lock_guard lock_guard(m_mutex); + + auto it = m_storage.find(key); + if (it != m_storage.end()) { + LOGGER__TRACE("There's already a resource registered under key {}", key); + return HAILO_INVALID_ARGUMENT; + } + + m_storage[key] = resource; + return HAILO_SUCCESS; + } + + Expected> get_resource_impl(const Key &key) + { + std::lock_guard lock_guard(m_mutex); + + auto it = m_storage.find(key); + if (it == m_storage.end()) { + LOGGER__TRACE("Key {} not found in resource manager", key); + return make_unexpected(HAILO_NOT_FOUND); + } + + return std::ref(it->second); + } + + hailo_status unregister_resource_impl(const Key &key) + { + std::lock_guard lock_guard(m_mutex); + + auto it = m_storage.find(key); + if (it == m_storage.end()) { + LOGGER__TRACE("Key {} not found in resource manager", key); + return HAILO_NOT_FOUND; + } + + m_storage.erase(it); + return HAILO_SUCCESS; + } + + std::mutex m_mutex; + std::unordered_map m_storage; +}; + +} /* namespace hailort */ + +#endif /* _HAILO_EXPORTED_RESOURCE_MANAGER_HPP_ */ diff --git a/hailort/libhailort/src/utils/hailort_common.cpp b/hailort/libhailort/src/utils/hailort_common.cpp index 7f59c47..7f8f17f 100644 --- a/hailort/libhailort/src/utils/hailort_common.cpp +++ b/hailort/libhailort/src/utils/hailort_common.cpp @@ -17,8 +17,6 @@ namespace hailort const uint32_t HailoRTCommon::BBOX_PARAMS; const uint32_t HailoRTCommon::MAX_DEFUSED_LAYER_COUNT; const size_t HailoRTCommon::HW_DATA_ALIGNMENT; -const uint64_t HailoRTCommon::NMS_DELIMITER; -const uint64_t HailoRTCommon::NMS_DUMMY_DELIMITER; Expected HailoRTCommon::to_device_id(const std::string &device_id) { diff --git a/hailort/libhailort/src/utils/hailort_logger.cpp b/hailort/libhailort/src/utils/hailort_logger.cpp index 3eda92f..f85abba 100644 --- a/hailort/libhailort/src/utils/hailort_logger.cpp +++ b/hailort/libhailort/src/utils/hailort_logger.cpp @@ -45,6 +45,7 @@ namespace hailort #define HAILORT_ANDROID_LOGGER_PATTERN ("%v") // Android logger will print only message (additional info are built-in) #define HAILORT_LOGGER_PATH_ENV_VAR ("HAILORT_LOGGER_PATH") +#define PERIODIC_LOGGER_FLUSH_TIME_IN_SECONDS (5) #ifdef _WIN32 #define PATH_SEPARATOR "\\" @@ -140,18 +141,24 @@ std::shared_ptr HailoRTLogger::create_file_sink(const std:: return make_shared_nothrow(); } + auto is_dir = Filesystem::is_directory(dir_path); + if (!is_dir) { + std::cerr << "HailoRT warning: Cannot create log file " << filename << "! Path " << dir_path << " is not valid." << std::endl; + return make_shared_nothrow(); + } + if (!is_dir.value()) { + std::cerr << "HailoRT warning: Cannot create log file " << filename << "! Path " << dir_path << " is not a directory." << std::endl; + return make_shared_nothrow(); + } + if (!Filesystem::is_path_accesible(dir_path)) { - std::cerr << "HailoRT warning: Cannot create log file " << filename - << "! Please check the directory " << dir_path << " write permissions." << std::endl; - // Create null sink instead (Will throw away its log) + std::cerr << "HailoRT warning: Cannot create log file " << filename << "! Please check the directory " << dir_path << " write permissions." << std::endl; return make_shared_nothrow(); } const auto file_path = dir_path + PATH_SEPARATOR + filename; if (Filesystem::does_file_exists(file_path) && !Filesystem::is_path_accesible(file_path)) { - std::cerr << "HailoRT warning: Cannot create log file " << filename - << "! Please check the file " << file_path << " write permissions." << std::endl; - // Create null sink instead (Will throw away its log) + std::cerr << "HailoRT warning: Cannot create log file " << filename << "! Please check the file " << file_path << " write permissions." << std::endl; return make_shared_nothrow(); } @@ -162,7 +169,7 @@ std::shared_ptr HailoRTLogger::create_file_sink(const std:: return make_shared_nothrow(file_path); } -HailoRTLogger::HailoRTLogger() : +HailoRTLogger::HailoRTLogger(spdlog::level::level_enum console_level, spdlog::level::level_enum file_level, spdlog::level::level_enum flush_level) : m_console_sink(make_shared_nothrow()), #ifdef __ANDROID__ m_main_log_file_sink(make_shared_nothrow(HAILORT_NAME)) @@ -171,6 +178,10 @@ HailoRTLogger::HailoRTLogger() : m_local_log_file_sink(create_file_sink(get_log_path(HAILORT_LOGGER_PATH_ENV_VAR), HAILORT_LOGGER_FILENAME, true)) #endif { + if ((nullptr == m_console_sink) || (nullptr == m_main_log_file_sink) || (nullptr == m_local_log_file_sink)) { + std::cerr << "Allocating memory on heap for logger sinks has failed! Please check if this host has enough memory. Writing to log will result in a SEGFAULT!" << std::endl; + return; + } #ifdef __ANDROID__ m_main_log_file_sink->set_pattern(HAILORT_ANDROID_LOGGER_PATTERN); @@ -179,31 +190,26 @@ HailoRTLogger::HailoRTLogger() : m_local_log_file_sink->set_pattern(HAILORT_LOCAL_FILE_LOGGER_PATTERN); #endif - // TODO: Handle null pointers for logger and sinks m_console_sink->set_pattern(HAILORT_CONSOLE_LOGGER_PATTERN); spdlog::sinks_init_list sink_list = { m_console_sink, m_main_log_file_sink, m_local_log_file_sink }; m_hailort_logger = make_shared_nothrow(HAILORT_NAME, sink_list.begin(), sink_list.end()); + if (nullptr == m_hailort_logger) { + std::cerr << "Allocating memory on heap for HailoRT logger has failed! Please check if this host has enough memory. Writing to log will result in a SEGFAULT!" << std::endl; + return; + } -#ifdef NDEBUG - set_levels(spdlog::level::warn, spdlog::level::info, spdlog::level::warn); -#else - set_levels(spdlog::level::warn, spdlog::level::debug, spdlog::level::debug); -#endif + set_levels(console_level, file_level, flush_level); spdlog::set_default_logger(m_hailort_logger); } -std::shared_ptr HailoRTLogger::logger() -{ - return m_hailort_logger; -} - -void HailoRTLogger::set_levels(spdlog::level::level_enum console_level, - spdlog::level::level_enum file_level, spdlog::level::level_enum flush_level) +void HailoRTLogger::set_levels(spdlog::level::level_enum console_level, spdlog::level::level_enum file_level, + spdlog::level::level_enum flush_level) { m_console_sink->set_level(console_level); m_main_log_file_sink->set_level(file_level); m_local_log_file_sink->set_level(file_level); m_hailort_logger->flush_on(flush_level); + spdlog::flush_every(std::chrono::seconds(PERIODIC_LOGGER_FLUSH_TIME_IN_SECONDS)); } diff --git a/hailort/libhailort/src/utils/hailort_logger.hpp b/hailort/libhailort/src/utils/hailort_logger.hpp index 5552d00..c40047e 100644 --- a/hailort/libhailort/src/utils/hailort_logger.hpp +++ b/hailort/libhailort/src/utils/hailort_logger.hpp @@ -17,30 +17,40 @@ #include "hailo/hailort.h" #include "common/logger_macros.hpp" +#include "common/utils.hpp" namespace hailort { class HailoRTLogger { public: - static HailoRTLogger& get_instance() +#ifdef NDEBUG + static std::unique_ptr &get_instance(spdlog::level::level_enum console_level = spdlog::level::warn, + spdlog::level::level_enum file_level = spdlog::level::info, spdlog::level::level_enum flush_level = spdlog::level::warn) +#else + static std::unique_ptr &get_instance(spdlog::level::level_enum console_level = spdlog::level::warn, + spdlog::level::level_enum file_level = spdlog::level::debug, spdlog::level::level_enum flush_level = spdlog::level::debug) +#endif { - static HailoRTLogger instance; + static std::unique_ptr instance = nullptr; + if (nullptr == instance) { + instance = make_unique_nothrow(console_level, file_level, flush_level); + } return instance; } + + HailoRTLogger(spdlog::level::level_enum console_level, spdlog::level::level_enum file_level, spdlog::level::level_enum flush_level); + ~HailoRTLogger() = default; HailoRTLogger(HailoRTLogger const&) = delete; void operator=(HailoRTLogger const&) = delete; - std::shared_ptr logger(); - void set_levels(spdlog::level::level_enum console_level, spdlog::level::level_enum file_level, - spdlog::level::level_enum flush_level); static std::string get_log_path(const std::string &path_env_var); static std::string get_main_log_path(); static std::shared_ptr create_file_sink(const std::string &dir_path, const std::string &filename, bool rotate); private: - HailoRTLogger(); static std::string parse_log_path(const char *log_path); + void set_levels(spdlog::level::level_enum console_level, spdlog::level::level_enum file_level, spdlog::level::level_enum flush_level); std::shared_ptr m_console_sink; diff --git a/hailort/libhailort/src/utils/profiler/CMakeLists.txt b/hailort/libhailort/src/utils/profiler/CMakeLists.txt index f5c91fa..56ab701 100644 --- a/hailort/libhailort/src/utils/profiler/CMakeLists.txt +++ b/hailort/libhailort/src/utils/profiler/CMakeLists.txt @@ -2,6 +2,8 @@ cmake_minimum_required(VERSION 3.0.0) set(SRC_FILES ${CMAKE_CURRENT_SOURCE_DIR}/tracer.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/scheduler_profiler_handler.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/monitor_handler.cpp ) set(HAILORT_CPP_SOURCES ${HAILORT_CPP_SOURCES} ${SRC_FILES} PARENT_SCOPE) diff --git a/hailort/libhailort/src/utils/profiler/handler.hpp b/hailort/libhailort/src/utils/profiler/handler.hpp new file mode 100644 index 0000000..cfd8f41 --- /dev/null +++ b/hailort/libhailort/src/utils/profiler/handler.hpp @@ -0,0 +1,199 @@ +/** + * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved. + * Distributed under the MIT license (https://opensource.org/licenses/MIT) + **/ +/** + * @file handler.hpp + * @brief Handlers base class for HailoRT tracer mechanism + **/ + +#ifndef _HAILO_HANDLER_HPP_ +#define _HAILO_HANDLER_HPP_ + +#include "hailo/hailort.h" +#include "hailo/stream.hpp" + +#include "vdevice/scheduler/scheduler_base.hpp" + +namespace hailort +{ + +struct Trace +{ + Trace(const std::string &name) + : name(name) + {} + + virtual ~Trace() = default; + + uint64_t timestamp = 0; + std::string name; +}; + +struct InitTrace : Trace +{ + InitTrace() : Trace("init") {} +}; + +struct CoreOpIdleTrace : Trace +{ + CoreOpIdleTrace(const device_id_t &device_id, scheduler_core_op_handle_t core_op_handle) + : Trace("core_op_idle"), device_id(device_id), core_op_handle(core_op_handle) + {} + + device_id_t device_id; + scheduler_core_op_handle_t core_op_handle; +}; + +struct AddDeviceTrace : Trace +{ + AddDeviceTrace(const device_id_t &device_id, const std::string &device_arch) + : Trace("add_device_trace"), device_id(device_id), device_arch(device_arch) + {} + + device_id_t device_id; + std::string device_arch; +}; + +struct SchedulerStartTrace : Trace +{ + SchedulerStartTrace(uint32_t device_count) + : Trace("scheduler_start"), device_count(device_count) + {} + + uint32_t device_count = 0; +}; + +struct AddCoreOpTrace : Trace +{ + AddCoreOpTrace(const device_id_t &device_id, const std::string &core_op_name, uint64_t timeout, uint32_t threshold, scheduler_core_op_handle_t handle, + bool is_nms) + : Trace("add_core_op"), device_id(device_id), core_op_name(core_op_name), timeout(timeout), threshold(threshold), core_op_handle(handle), is_nms(is_nms) + {} + + device_id_t device_id; + std::string core_op_name; + uint64_t timeout = 0; + uint32_t threshold = 0; + scheduler_core_op_handle_t core_op_handle = INVALID_CORE_OP_HANDLE; + bool is_nms; +}; + +struct CreateCoreOpInputStreamsTrace : Trace +{ + CreateCoreOpInputStreamsTrace(const device_id_t &device_id, const std::string &core_op_name, const std::string &stream_name, uint32_t queue_size) + : Trace("create_input_stream"), device_id(device_id), core_op_name(core_op_name), stream_name(stream_name), queue_size(queue_size) + {} + + device_id_t device_id; + std::string core_op_name; + std::string stream_name; + uint32_t queue_size; +}; + +struct CreateCoreOpOutputStreamsTrace : Trace +{ + CreateCoreOpOutputStreamsTrace(const device_id_t &device_id, const std::string &core_op_name, const std::string &stream_name, uint32_t queue_size) + : Trace("create_output_stream"), device_id(device_id), core_op_name(core_op_name), stream_name(stream_name), queue_size(queue_size) + {} + + device_id_t device_id; + std::string core_op_name; + std::string stream_name; + uint32_t queue_size; +}; + +struct WriteFrameTrace : Trace +{ + WriteFrameTrace(const device_id_t &device_id, scheduler_core_op_handle_t core_op_handle, const std::string &queue_name) + : Trace("write_frame"), device_id(device_id), core_op_handle(core_op_handle), queue_name(queue_name) + {} + + device_id_t device_id; + scheduler_core_op_handle_t core_op_handle; + std::string queue_name; +}; + +struct InputVdmaDequeueTrace : Trace +{ + InputVdmaDequeueTrace(const device_id_t &device_id, scheduler_core_op_handle_t core_op_handle, const std::string &queue_name) + : Trace("input_vdma_dequeue"), device_id(device_id), core_op_handle(core_op_handle), queue_name(queue_name) + {} + + device_id_t device_id; + scheduler_core_op_handle_t core_op_handle; + std::string queue_name; +}; + +struct ReadFrameTrace : Trace +{ + ReadFrameTrace(const device_id_t &device_id, scheduler_core_op_handle_t core_op_handle, const std::string &queue_name) + : Trace("read_frame"), device_id(device_id), core_op_handle(core_op_handle), queue_name(queue_name) + {} + + std::string device_id; + scheduler_core_op_handle_t core_op_handle; + std::string queue_name; +}; + +struct OutputVdmaEnqueueTrace : Trace +{ + OutputVdmaEnqueueTrace(const device_id_t &device_id, scheduler_core_op_handle_t core_op_handle, const std::string &queue_name, uint32_t frames) + : Trace("output_vdma_enqueue"), device_id(device_id), core_op_handle(core_op_handle), queue_name(queue_name), frames(frames) + {} + + device_id_t device_id; + scheduler_core_op_handle_t core_op_handle; + std::string queue_name; + uint32_t frames = 0; +}; + +struct ChooseCoreOpTrace : Trace +{ + ChooseCoreOpTrace(const device_id_t &device_id, scheduler_core_op_handle_t handle, bool threshold, bool timeout, core_op_priority_t priority) + : Trace("choose_core_op"), device_id(device_id), core_op_handle(handle), threshold(threshold), timeout(timeout), priority(priority) + {} + + device_id_t device_id; + scheduler_core_op_handle_t core_op_handle; + bool threshold = false; + bool timeout = false; + core_op_priority_t priority; +}; + +struct SwitchCoreOpTrace : Trace +{ + SwitchCoreOpTrace(const device_id_t &device_id, scheduler_core_op_handle_t handle) + : Trace("switch_core_op"), device_id(device_id), core_op_handle(handle) + {} + + device_id_t device_id; + scheduler_core_op_handle_t core_op_handle; +}; + +class Handler +{ +public: + virtual ~Handler() = default; + + virtual void handle_trace(const InitTrace&) {}; + virtual void handle_trace(const AddCoreOpTrace&) {}; + virtual void handle_trace(const CreateCoreOpInputStreamsTrace&) {}; + virtual void handle_trace(const CreateCoreOpOutputStreamsTrace&) {}; + virtual void handle_trace(const WriteFrameTrace&) {}; + virtual void handle_trace(const InputVdmaDequeueTrace&) {}; + virtual void handle_trace(const ReadFrameTrace&) {}; + virtual void handle_trace(const OutputVdmaEnqueueTrace&) {}; + virtual void handle_trace(const ChooseCoreOpTrace&) {}; + virtual void handle_trace(const SwitchCoreOpTrace&) {}; + virtual void handle_trace(const SchedulerStartTrace&) {}; + virtual void handle_trace(const CoreOpIdleTrace&) {}; + virtual void handle_trace(const AddDeviceTrace&) {}; + +}; + +struct JSON; + +} + +#endif /* _HAILO_HANDLER_HPP */ \ No newline at end of file diff --git a/hailort/libhailort/src/utils/profiler/monitor_handler.cpp b/hailort/libhailort/src/utils/profiler/monitor_handler.cpp new file mode 100644 index 0000000..79ee27a --- /dev/null +++ b/hailort/libhailort/src/utils/profiler/monitor_handler.cpp @@ -0,0 +1,341 @@ +/** + * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved. + * Distributed under the MIT license (https://opensource.org/licenses/MIT) + **/ +/** + * @file monitor_handler.cpp + * @brief Implementation of the scheduler monitor handlers base with HailoRT tracer mechanism + **/ + +#include "monitor_handler.hpp" + +#include "common/logger_macros.hpp" +#include "common/os_utils.hpp" + +namespace hailort +{ +MonitorHandler::MonitorHandler() +{} + +MonitorHandler::~MonitorHandler() +{ + clear_monitor(); +} + +void MonitorHandler::clear_monitor() { + + if (m_is_monitor_currently_working) { + m_is_monitor_currently_working = false; + m_mon_shutdown_event->signal(); + if (m_mon_thread.joinable()) { + m_mon_thread.join(); + } + } + m_devices_info.clear(); + m_core_ops_info.clear(); +} + +void MonitorHandler::handle_trace(const SchedulerStartTrace &trace) +{ + m_device_count = trace.device_count; + start_mon(); +} + +void MonitorHandler::handle_trace(const CoreOpIdleTrace &trace) +{ + update_utilization_read_buffers_finished(trace.device_id, trace.core_op_handle, true); +} + +void MonitorHandler::handle_trace(const AddCoreOpTrace &trace) +{ + m_core_ops_info[trace.core_op_handle].utilization = 0; + m_core_ops_info[trace.core_op_handle].core_op_name = trace.core_op_name; + m_core_ops_info[trace.core_op_handle].is_nms = trace.is_nms; +} + +void MonitorHandler::handle_trace(const AddDeviceTrace &trace) +{ + DeviceInfo device_info(trace.device_id, trace.device_arch); + m_devices_info.emplace(trace.device_id, device_info); +} + +void MonitorHandler::handle_trace(const SwitchCoreOpTrace &trace) +{ + assert(contains(m_devices_info, trace.device_id)); + m_devices_info.at(trace.device_id).current_core_op_handle = trace.core_op_handle; +} + +void MonitorHandler::handle_trace(const CreateCoreOpInputStreamsTrace &trace) +{ + // TODO- HRT-10371 'if' should be removed, this is temporary solution since this trace is called out of the scheduler. + if (!m_is_monitor_currently_working) { return; } + auto core_op_handle = get_core_op_handle_by_name(trace.core_op_name); + assert(contains(m_core_ops_info, core_op_handle)); + m_core_ops_info[core_op_handle].input_streams_info[trace.stream_name] = StreamsInfo{trace.queue_size, 0}; +} + +void MonitorHandler::handle_trace(const CreateCoreOpOutputStreamsTrace &trace) +{ + // TODO- HRT-10371 'if' should be removed, this is temporary solution since this trace is called out of the scheduler. + if (!m_is_monitor_currently_working) { return; } + auto core_op_handle = get_core_op_handle_by_name(trace.core_op_name); + assert(contains(m_core_ops_info, core_op_handle)); + m_core_ops_info[core_op_handle].output_streams_info[trace.stream_name] = StreamsInfo{trace.queue_size, 0}; +} + +void MonitorHandler::handle_trace(const WriteFrameTrace &trace) +{ + assert(contains(m_core_ops_info, trace.core_op_handle)); + assert(contains(m_core_ops_info[trace.core_op_handle].input_streams_info, trace.queue_name)); + m_core_ops_info[trace.core_op_handle].input_streams_info[trace.queue_name].pending_frames_count++; +} + +void MonitorHandler::handle_trace(const ReadFrameTrace &trace) +{ + assert(contains(m_core_ops_info, trace.core_op_handle)); + assert(contains(m_core_ops_info[trace.core_op_handle].output_streams_info, trace.queue_name)); + m_core_ops_info[trace.core_op_handle].output_streams_info[trace.queue_name].pending_frames_count--; + m_core_ops_info[trace.core_op_handle].output_streams_info[trace.queue_name].total_frames_count++; +} + +void MonitorHandler::handle_trace(const OutputVdmaEnqueueTrace &trace) +{ + assert(contains(m_core_ops_info, trace.core_op_handle)); + assert(contains(m_core_ops_info[trace.core_op_handle].output_streams_info, trace.queue_name)); + m_core_ops_info[trace.core_op_handle].output_streams_info[trace.queue_name].pending_frames_count += trace.frames; +} + +void MonitorHandler::handle_trace(const InputVdmaDequeueTrace &trace) +{ + assert(contains(m_core_ops_info, trace.core_op_handle)); + assert(contains(m_core_ops_info[trace.core_op_handle].input_streams_info, trace.queue_name)); + m_core_ops_info[trace.core_op_handle].input_streams_info[trace.queue_name].pending_frames_count--; + update_utilization_send_started(trace.device_id); +} + +scheduler_core_op_handle_t MonitorHandler::get_core_op_handle_by_name(const std::string &name) +{ + for (const auto &core_op_info : m_core_ops_info) { + if (0 == core_op_info.second.core_op_name.compare(name)) { + return core_op_info.first; + } + } + return INVALID_CORE_OP_HANDLE; +} + +hailo_status MonitorHandler::start_mon() +{ +#if defined(__GNUC__) + + /* Clearing monitor members. Since the owner of monitor_handler is tracer, which is static, + the monitor may get rerun without destructor being called. */ + if (m_is_monitor_currently_working) { + clear_monitor(); + } + m_is_monitor_currently_working = true; + + m_mon_shutdown_event = Event::create_shared(Event::State::not_signalled); + m_last_measured_timestamp = std::chrono::steady_clock::now(); + CHECK(nullptr != m_mon_shutdown_event, HAILO_OUT_OF_HOST_MEMORY); + + auto tmp_file = open_temp_mon_file(); + CHECK_EXPECTED_AS_STATUS(tmp_file); + m_mon_tmp_output = tmp_file.release(); + + m_mon_thread = std::thread([this] () + { + while (true) { + auto status = m_mon_shutdown_event->wait(DEFAULT_SCHEDULER_MON_INTERVAL); + if (HAILO_TIMEOUT == status) { + dump_state(); + } else if (HAILO_SUCCESS == status) { + break; // shutdown_event was signaled + } else if (HAILO_SUCCESS != status) { + LOGGER__ERROR("Scheduler monitor failed with status {}", status); + return; + } + } + return; + }); + + return HAILO_SUCCESS; +#else + return HAILO_NOT_IMPLEMENTED; +#endif +} + +std::string get_curr_pid_as_str() +{ + return std::to_string(OsUtils::get_curr_pid()); +} + +#if defined(__GNUC__) +Expected> MonitorHandler::open_temp_mon_file() +{ + std::string file_name = get_curr_pid_as_str(); + auto tmp_file = TempFile::create(file_name, SCHEDULER_MON_TMP_DIR); + CHECK_EXPECTED(tmp_file); + + auto tmp_file_ptr = make_shared_nothrow(tmp_file.release()); + CHECK_AS_EXPECTED(nullptr != tmp_file_ptr, HAILO_OUT_OF_HOST_MEMORY); + + return tmp_file_ptr; +} + +void MonitorHandler::dump_state() +{ + auto file = LockedFile::create(m_mon_tmp_output->name(), "w"); + if (HAILO_SUCCESS != file.status()) { + LOGGER__ERROR("Failed to open and lock file {}, with status: {}", m_mon_tmp_output->name(), file.status()); + return; + } + + ProtoMon mon; + mon.set_pid(get_curr_pid_as_str()); + time_dependent_events_cycle_calc(); + log_monitor_networks_infos(mon); + log_monitor_device_infos(mon); + log_monitor_frames_infos(mon); + + clear_accumulators(); + + if (!mon.SerializeToFileDescriptor(file->get_fd())) { + LOGGER__ERROR("Failed to SerializeToFileDescriptor(), with errno: {}", errno); + } +} +#endif + +void MonitorHandler::time_dependent_events_cycle_calc() +{ + auto curr_time = std::chrono::steady_clock::now(); + m_last_measured_time_duration = std::chrono::duration_cast>(curr_time - m_last_measured_timestamp).count(); + + for (auto &device : m_devices_info) { + if (!device.second.device_has_drained_everything) { + update_utilization_read_buffers_finished(device.second.device_id, device.second.current_core_op_handle, false); + } + } + m_last_measured_timestamp = curr_time; +} + +void MonitorHandler::log_monitor_device_infos(ProtoMon &mon) +{ + for (auto const &device_info_pair : m_devices_info) { + auto device_info = device_info_pair.second; + auto curr_device_utilization = device_info.device_utilization_duration; + auto utilization_percentage = ((curr_device_utilization * 100) / m_last_measured_time_duration); + + auto device_infos = mon.add_device_infos(); + device_infos->set_device_id(device_info.device_id); + device_infos->set_utilization(utilization_percentage); + device_infos->set_device_arch(device_info.device_arch); + } +} + +void MonitorHandler::log_monitor_networks_infos(ProtoMon &mon) +{ + for (uint32_t core_op_handle = 0; core_op_handle < m_core_ops_info.size(); core_op_handle++) { + auto curr_core_op_utilization = m_core_ops_info[core_op_handle].utilization; + auto utilization = ((curr_core_op_utilization * 100) / m_last_measured_time_duration); + double min_fps = std::numeric_limits::max(); + + for (auto const &stream : m_core_ops_info[core_op_handle].output_streams_info) { + double fps = stream.second.total_frames_count / m_last_measured_time_duration; + min_fps = (fps < min_fps) ? fps : min_fps; + } + + auto net_info = mon.add_networks_infos(); + net_info->set_network_name(m_core_ops_info[core_op_handle].core_op_name); + net_info->set_utilization(utilization); + net_info->set_fps(min_fps); + } +} + +void MonitorHandler::log_monitor_frames_infos(ProtoMon &mon) +{ + for (uint32_t core_op_handle = 0; core_op_handle < m_core_ops_info.size(); core_op_handle++) { + assert(contains(m_core_ops_info, core_op_handle)); + auto net_frames_info = mon.add_net_frames_infos(); + for (auto const &stream : m_core_ops_info[core_op_handle].input_streams_info) { + net_frames_info->set_network_name(m_core_ops_info[core_op_handle].core_op_name); + auto stream_frames_info = net_frames_info->add_streams_frames_infos(); + stream_frames_info->set_stream_name(stream.first); + stream_frames_info->set_stream_direction(PROTO__STREAM_DIRECTION__HOST_TO_DEVICE); + stream_frames_info->set_buffer_frames_size(static_cast(stream.second.queue_size * m_device_count)); + stream_frames_info->set_pending_frames_count(static_cast(stream.second.pending_frames_count)); + } + + for (auto const &stream : m_core_ops_info[core_op_handle].output_streams_info) { + net_frames_info->set_network_name(m_core_ops_info[core_op_handle].core_op_name); + auto stream_frames_info = net_frames_info->add_streams_frames_infos(); + stream_frames_info->set_stream_name(stream.first); + stream_frames_info->set_stream_direction(PROTO__STREAM_DIRECTION__DEVICE_TO_HOST); + if (m_core_ops_info[core_op_handle].is_nms) { + stream_frames_info->set_pending_frames_count(SCHEDULER_MON_NAN_VAL); + stream_frames_info->set_buffer_frames_size(SCHEDULER_MON_NAN_VAL); + } else { + stream_frames_info->set_pending_frames_count(static_cast(stream.second.pending_frames_count)); + stream_frames_info->set_buffer_frames_size(static_cast(stream.second.queue_size * m_device_count)); + } + } + } +} + +void MonitorHandler::update_utilization_timers(const device_id_t &device_id, scheduler_core_op_handle_t core_op_handle) +{ + assert(contains(m_core_ops_info, core_op_handle)); + assert(contains(m_devices_info, device_id)); + + auto time_diff = std::chrono::duration_cast>( + std::chrono::steady_clock::now() - m_devices_info.at(device_id).last_measured_utilization_timestamp).count(); + + m_devices_info.at(device_id).device_utilization_duration += time_diff; + m_core_ops_info[core_op_handle].utilization += time_diff; +} + +void MonitorHandler::update_utilization_timestamp(const device_id_t &device_id) +{ + assert(contains(m_devices_info, device_id)); + m_devices_info.at(device_id).last_measured_utilization_timestamp = std::chrono::steady_clock::now(); +} + +void MonitorHandler::update_utilization_send_started(const device_id_t &device_id) +{ + assert(contains(m_devices_info, device_id)); + if (m_devices_info.at(device_id).device_has_drained_everything) { + update_device_drained_state(device_id, false); + update_utilization_timestamp(device_id); + } +} + +void MonitorHandler::update_device_drained_state(const device_id_t &device_id, bool state) +{ + assert(contains(m_devices_info, device_id)); + m_devices_info.at(device_id).device_has_drained_everything = state; +} + +void MonitorHandler::update_utilization_read_buffers_finished(const device_id_t &device_id, + scheduler_core_op_handle_t core_op_handle, bool is_drained_everything) +{ + update_utilization_timers(device_id, core_op_handle); + update_device_drained_state(device_id, is_drained_everything); + if (!is_drained_everything) { + update_utilization_timestamp(device_id); + } +} + +void MonitorHandler::clear_accumulators() +{ + for (auto &device_info : m_devices_info) { + device_info.second.device_utilization_duration = 0; + } + + for (auto &handle_core_op_pair : m_core_ops_info) { + for (auto &handle_streams_pair : handle_core_op_pair.second.output_streams_info) { + handle_streams_pair.second.total_frames_count = 0; + } + handle_core_op_pair.second.utilization = 0; + } +} + +} \ No newline at end of file diff --git a/hailort/libhailort/src/utils/profiler/monitor_handler.hpp b/hailort/libhailort/src/utils/profiler/monitor_handler.hpp new file mode 100644 index 0000000..e6c188c --- /dev/null +++ b/hailort/libhailort/src/utils/profiler/monitor_handler.hpp @@ -0,0 +1,135 @@ +/** + * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved. + * Distributed under the MIT license (https://opensource.org/licenses/MIT) + **/ +/** + * @file monitor_handler.hpp + * @brief Implementation of the scheduler monitor handlers base with HailoRT tracer mechanism + **/ + +#ifndef _HAILO_MONITOR_HANDLER_HPP_ +#define _HAILO_MONITOR_HANDLER_HPP_ + +#include "handler.hpp" + +#include "hailo/hailort.h" +#include "hailo/expected.hpp" +#include "hailo/event.hpp" + +#include "common/filesystem.hpp" +#include "common/utils.hpp" + +#include "vdevice/scheduler/scheduler_base.hpp" + +#include +#include +#include + +#if defined(_MSC_VER) +#pragma warning(push) +#pragma warning(disable: 4244 4267 4127) +#else +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wconversion" +#endif +#include "scheduler_mon.pb.h" +#if defined(_MSC_VER) +#pragma warning( pop ) +#else +#pragma GCC diagnostic pop +#endif + +namespace hailort +{ + +#define SCHEDULER_MON_TMP_DIR ("/tmp/hmon_files/") +#define SCHEDULER_MON_ENV_VAR ("HAILO_MONITOR") +#define DEFAULT_SCHEDULER_MON_INTERVAL (std::chrono::seconds(1)) +#define SCHEDULER_MON_NAN_VAL (-1) + +using stream_name = std::string; + +struct DeviceInfo { + DeviceInfo(const device_id_t &device_id, const std::string &device_arch) : + device_id(device_id), device_arch(device_arch), device_has_drained_everything(true), + device_utilization_duration(0), last_measured_utilization_timestamp(std::chrono::steady_clock::now()), + current_core_op_handle(INVALID_CORE_OP_HANDLE) + {} + std::string device_id; + std::string device_arch; + bool device_has_drained_everything; + double device_utilization_duration; + std::chrono::time_point last_measured_utilization_timestamp; + scheduler_core_op_handle_t current_core_op_handle; +}; + +struct StreamsInfo { + uint32_t queue_size; + uint32_t pending_frames_count; + uint32_t total_frames_count = 0; +}; + +struct CoreOpInfo { + std::unordered_map input_streams_info; + std::unordered_map output_streams_info; + std::string core_op_name; + bool is_nms; + double utilization; +}; + +class MonitorHandler : public Handler +{ +public: + MonitorHandler(MonitorHandler const&) = delete; + void operator=(MonitorHandler const&) = delete; + + MonitorHandler(); + ~MonitorHandler(); + void clear_monitor(); + + virtual void handle_trace(const AddCoreOpTrace&) override; + virtual void handle_trace(const CreateCoreOpInputStreamsTrace&) override; + virtual void handle_trace(const CreateCoreOpOutputStreamsTrace&) override; + virtual void handle_trace(const WriteFrameTrace&) override; + virtual void handle_trace(const ReadFrameTrace&) override; + virtual void handle_trace(const InputVdmaDequeueTrace&) override; + virtual void handle_trace(const OutputVdmaEnqueueTrace&) override; + virtual void handle_trace(const SwitchCoreOpTrace&) override; + virtual void handle_trace(const SchedulerStartTrace&) override; + virtual void handle_trace(const CoreOpIdleTrace&) override; + virtual void handle_trace(const AddDeviceTrace&) override; + +private: + hailo_status start_mon(); +#if defined(__GNUC__) + Expected> open_temp_mon_file(); + void dump_state(); +#endif + void time_dependent_events_cycle_calc(); + void log_monitor_device_infos(ProtoMon &mon); + void log_monitor_networks_infos(ProtoMon &mon); + void log_monitor_frames_infos(ProtoMon &mon); + void update_utilization_timers(const device_id_t &device_id, scheduler_core_op_handle_t core_op_handle); + void update_utilization_timestamp(const device_id_t &device_id); + void update_utilization_send_started(const device_id_t &device_id); + void update_device_drained_state(const device_id_t &device_id, bool state); + void update_utilization_read_buffers_finished(const device_id_t &device_id, scheduler_core_op_handle_t core_op_hanle, bool is_drained_everything); + void clear_accumulators(); + scheduler_core_op_handle_t get_core_op_handle_by_name(const std::string &name); + + bool m_is_monitor_currently_working = false; + uint32_t m_device_count; + std::thread m_mon_thread; + EventPtr m_mon_shutdown_event; +#if defined(__GNUC__) + std::shared_ptr m_mon_tmp_output; +#endif + std::chrono::time_point m_last_measured_timestamp; + double m_last_measured_time_duration; + // TODO: Consider adding Accumulator classes for more info (min, max, mean, etc..) + std::unordered_map m_core_ops_info; + std::unordered_map m_devices_info; +}; +} + +#endif /* _MONITOR_HANDLER_HPP_ */ \ No newline at end of file diff --git a/hailort/libhailort/src/utils/profiler/scheduler_profiler_handler.cpp b/hailort/libhailort/src/utils/profiler/scheduler_profiler_handler.cpp new file mode 100644 index 0000000..86bd76b --- /dev/null +++ b/hailort/libhailort/src/utils/profiler/scheduler_profiler_handler.cpp @@ -0,0 +1,219 @@ +/** + * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved. + * Distributed under the MIT license (https://opensource.org/licenses/MIT) + **/ +/** + * @file scheduler_profiler_handler.cpp + * @brief Implementation of the scheduler profiler handlers base with HailoRT tracer mechanism + **/ + +#include "scheduler_profiler_handler.hpp" + +#include "common/logger_macros.hpp" + +#include "utils/hailort_logger.hpp" + +#include +#include +#include +#include + +#include +#include + +#define SCHEDULER_PROFILER_NAME ("SchedulerProfiler") +#define SCHEDULER_PROFILER_LOGGER_FILENAME ("scheduler_profiler.json") +#define SCHEDULER_PROFILER_LOGGER_PATTERN ("%v") + +#define SCHEDULER_PROFILER_LOGGER_PATH ("SCHEDULER_PROFILER_LOGGER_PATH") + +namespace hailort +{ + +SchedulerProfilerHandler::SchedulerProfilerHandler(int64_t &start_time) +#ifndef __ANDROID__ + : m_file_sink(HailoRTLogger::create_file_sink(HailoRTLogger::get_log_path(SCHEDULER_PROFILER_LOGGER_PATH), SCHEDULER_PROFILER_LOGGER_FILENAME, false)), + m_first_write(true) +#endif +{ +#ifndef __ANDROID__ + spdlog::sinks_init_list sink_list = { m_file_sink }; + m_profiler_logger = make_shared_nothrow(SCHEDULER_PROFILER_NAME, sink_list.begin(), sink_list.end()); + m_file_sink->set_level(spdlog::level::level_enum::info); + m_file_sink->set_pattern(SCHEDULER_PROFILER_LOGGER_PATTERN); + std::stringstream ss; + ss << "{\"ns_since_epoch_zero_time\": \"" << start_time << "\",\n\"scheduler_actions\": [\n"; + m_profiler_logger->info(ss.str()); +#else + (void)start_time; +#endif +} + +SchedulerProfilerHandler::~SchedulerProfilerHandler() +{ + m_profiler_logger->info("]\n}"); +} + +struct JSON +{ + std::unordered_map members; + JSON(const std::initializer_list> &dict) : members{dict} {} + JSON(const std::unordered_map &dict) { + for (auto &pair : dict) { + members.insert({pair.first, std::to_string(pair.second)}); + } + } +}; + +template +std::string json_to_string(const T &val) { + return std::to_string(val); +} + +template<> +std::string json_to_string(const std::string &val) { + std::ostringstream os; + os << std::quoted(val); + return os.str(); +} + +template<> +std::string json_to_string(const bool &bool_val) { + return bool_val ? "true" : "false"; +} + +template<> +std::string json_to_string(const JSON &json_val) { + std::ostringstream os; + os << "{\n"; + size_t i = 0; + for (const auto &kv : json_val.members) { + ++i; + os << std::quoted(kv.first) << " : "; + os << kv.second; + if (i != json_val.members.size()) { + os << ",\n"; + } + } + os << "\n}"; + return os.str(); +} + +bool SchedulerProfilerHandler::comma() +{ + auto result = !m_first_write; + m_first_write = false; + return result; +} + +void SchedulerProfilerHandler::log(JSON json) +{ + m_profiler_logger->info("{}{}", comma() ? ",\n" : "", json_to_string(json)); +} + +void SchedulerProfilerHandler::handle_trace(const AddCoreOpTrace &trace) +{ + log(JSON({ + {"action", json_to_string(trace.name)}, + {"timestamp", json_to_string(trace.timestamp)}, + {"device_id", json_to_string(trace.device_id)}, + {"core_op_name", json_to_string(trace.core_op_name)}, + {"core_op_handle", json_to_string(trace.core_op_handle)}, + {"timeout", json_to_string((uint64_t)trace.timeout)}, + {"threshold", json_to_string((uint64_t)trace.threshold)} + })); +} + +void SchedulerProfilerHandler::handle_trace(const CreateCoreOpInputStreamsTrace &trace) +{ + log(JSON({ + {"action", json_to_string(trace.name)}, + {"timestamp", json_to_string(trace.timestamp)}, + {"device_id", json_to_string(trace.device_id)}, + {"core_op_name", json_to_string(trace.core_op_name)}, + {"stream_name", json_to_string(trace.stream_name)}, + {"queue_size", json_to_string(trace.queue_size)} + })); +} + +void SchedulerProfilerHandler::handle_trace(const CreateCoreOpOutputStreamsTrace &trace) +{ + log(JSON({ + {"action", json_to_string(trace.name)}, + {"timestamp", json_to_string(trace.timestamp)}, + {"device_id", json_to_string(trace.device_id)}, + {"core_op_name", json_to_string(trace.core_op_name)}, + {"stream_name", json_to_string(trace.stream_name)}, + {"queue_size", json_to_string(trace.queue_size)} + })); +} + +void SchedulerProfilerHandler::handle_trace(const WriteFrameTrace &trace) +{ + log(JSON({ + {"action", json_to_string(trace.name)}, + {"timestamp", json_to_string(trace.timestamp)}, + {"device_id", json_to_string(trace.device_id)}, + {"core_op_handle", json_to_string(trace.core_op_handle)}, + {"queue_name", json_to_string(trace.queue_name)} + })); +} + +void SchedulerProfilerHandler::handle_trace(const InputVdmaDequeueTrace &trace) +{ + log(JSON({ + {"action", json_to_string(trace.name)}, + {"timestamp", json_to_string(trace.timestamp)}, + {"device_id", json_to_string(trace.device_id)}, + {"core_op_handle", json_to_string(trace.core_op_handle)}, + {"queue_name", json_to_string(trace.queue_name)} + })); +} + +void SchedulerProfilerHandler::handle_trace(const ReadFrameTrace &trace) +{ + log(JSON({ + {"action", json_to_string(trace.name)}, + {"timestamp", json_to_string(trace.timestamp)}, + {"device_id", json_to_string(trace.device_id)}, + {"core_op_handle", json_to_string(trace.core_op_handle)}, + {"queue_name", json_to_string(trace.queue_name)} + })); +} + +void SchedulerProfilerHandler::handle_trace(const OutputVdmaEnqueueTrace &trace) +{ + log(JSON({ + {"action", json_to_string(trace.name)}, + {"timestamp", json_to_string(trace.timestamp)}, + {"device_id", json_to_string(trace.device_id)}, + {"core_op_handle", json_to_string(trace.core_op_handle)}, + {"queue_name", json_to_string(trace.queue_name)}, + {"frames", json_to_string(trace.frames)} + })); +} + +void SchedulerProfilerHandler::handle_trace(const ChooseCoreOpTrace &trace) +{ + log(JSON({ + {"action", json_to_string(trace.name)}, + {"timestamp", json_to_string(trace.timestamp)}, + {"device_id", json_to_string(trace.device_id)}, + {"chosen_core_op_handle", json_to_string(trace.core_op_handle)}, + {"threshold", json_to_string(trace.threshold)}, + {"timeout", json_to_string(trace.timeout)}, + {"priority", json_to_string(trace.priority)} + })); +} + +void SchedulerProfilerHandler::handle_trace(const SwitchCoreOpTrace &trace) +{ + log(JSON({ + {"action", json_to_string(trace.name)}, + {"timestamp", json_to_string(trace.timestamp)}, + {"device_id", json_to_string(trace.device_id)}, + {"core_op_handle", json_to_string(trace.core_op_handle)} + })); +} + +} \ No newline at end of file diff --git a/hailort/libhailort/src/utils/profiler/scheduler_profiler_handler.hpp b/hailort/libhailort/src/utils/profiler/scheduler_profiler_handler.hpp new file mode 100644 index 0000000..24178ae --- /dev/null +++ b/hailort/libhailort/src/utils/profiler/scheduler_profiler_handler.hpp @@ -0,0 +1,49 @@ +/** + * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved. + * Distributed under the MIT license (https://opensource.org/licenses/MIT) + **/ +/** + * @file scheduler_profiler_handler.hpp + * @brief Implementation of the scheduler profiler handlers base with HailoRT tracer mechanism + **/ + +#ifndef _HAILO_SCHEDULER_PROFILER_HANDLER_HPP_ +#define _HAILO_SCHEDULER_PROFILER_HANDLER_HPP_ + +#include "hailo/hailort.h" + +#include "handler.hpp" + +namespace hailort +{ +class SchedulerProfilerHandler : public Handler +{ +public: + SchedulerProfilerHandler(SchedulerProfilerHandler const&) = delete; + void operator=(SchedulerProfilerHandler const&) = delete; + + SchedulerProfilerHandler(int64_t &start_time); + ~SchedulerProfilerHandler(); + + virtual void handle_trace(const AddCoreOpTrace&) override; + virtual void handle_trace(const CreateCoreOpInputStreamsTrace&) override; + virtual void handle_trace(const CreateCoreOpOutputStreamsTrace&) override; + virtual void handle_trace(const WriteFrameTrace&) override; + virtual void handle_trace(const InputVdmaDequeueTrace&) override; + virtual void handle_trace(const ReadFrameTrace&) override; + virtual void handle_trace(const OutputVdmaEnqueueTrace&) override; + virtual void handle_trace(const ChooseCoreOpTrace&) override; + virtual void handle_trace(const SwitchCoreOpTrace&) override; + +private: + void log(JSON json); + bool comma(); + + std::shared_ptr m_file_sink; + std::shared_ptr m_profiler_logger; + std::atomic m_first_write; +}; + +} + +#endif /* _SCHEDULER_PROFILER_HANDLER_HPP_ */ \ No newline at end of file diff --git a/hailort/libhailort/src/utils/profiler/tracer.cpp b/hailort/libhailort/src/utils/profiler/tracer.cpp index 175f67f..14fce8f 100644 --- a/hailort/libhailort/src/utils/profiler/tracer.cpp +++ b/hailort/libhailort/src/utils/profiler/tracer.cpp @@ -5,28 +5,12 @@ /** * @file tracer.cpp * @brief: Tracing mechanism for HailoRT + FW events - * **/ #include "common/utils.hpp" -#include "utils/hailort_logger.hpp" #include "utils/profiler/tracer.hpp" -#include -#include -#include -#include -#include -#include - - -#define SCHEDULER_PROFILER_NAME ("SchedulerProfiler") -#define SCHEDULER_PROFILER_LOGGER_FILENAME ("scheduler_profiler.json") -#define SCHEDULER_PROFILER_LOGGER_PATTERN ("%v") - -#define SCHEDULER_PROFILER_LOGGER_PATH ("SCHEDULER_PROFILER_LOGGER_PATH") - #define PROFILER_ENV_VAR ("HAILO_ENABLE_PROFILER") namespace hailort @@ -34,8 +18,14 @@ namespace hailort Tracer::Tracer() { - auto should_trace_env = std::getenv(PROFILER_ENV_VAR); - m_should_trace = ((nullptr != should_trace_env) && (strnlen(should_trace_env, 2) == 1) && (strncmp(should_trace_env, "1", 1) == 0)); + init_scheduler_profiler_handler(); + init_monitor_handler(); +} + +void Tracer::init_scheduler_profiler_handler() +{ + const char* env_var_name = PROFILER_ENV_VAR; + m_should_trace = is_env_variable_on(env_var_name); if (m_should_trace) { m_start_time = std::chrono::high_resolution_clock::now(); int64_t time_since_epoch = std::chrono::duration_cast(m_start_time.time_since_epoch()).count(); @@ -43,191 +33,13 @@ Tracer::Tracer() } } -SchedulerProfilerHandler::SchedulerProfilerHandler(int64_t &start_time) -#ifndef __ANDROID__ - : m_file_sink(HailoRTLogger::create_file_sink(HailoRTLogger::get_log_path(SCHEDULER_PROFILER_LOGGER_PATH), SCHEDULER_PROFILER_LOGGER_FILENAME, false)), - m_first_write(true) -#endif +void Tracer::init_monitor_handler() { -#ifndef __ANDROID__ - spdlog::sinks_init_list sink_list = { m_file_sink }; - m_profiler_logger = make_shared_nothrow(SCHEDULER_PROFILER_NAME, sink_list.begin(), sink_list.end()); - m_file_sink->set_level(spdlog::level::level_enum::info); - m_file_sink->set_pattern(SCHEDULER_PROFILER_LOGGER_PATTERN); - std::stringstream ss; - ss << "{\"ns_since_epoch_zero_time\": \"" << start_time << "\",\n\"scheduler_actions\": [\n"; - m_profiler_logger->info(ss.str()); -#else - (void)start_time; -#endif -} - -SchedulerProfilerHandler::~SchedulerProfilerHandler() -{ - m_profiler_logger->info("]\n}"); -} - -struct JSON -{ - std::unordered_map members; - JSON(const std::initializer_list> &dict) : members{dict} {} - JSON(const std::unordered_map &dict) { - for (auto &pair : dict) { - members.insert({pair.first, std::to_string(pair.second)}); - } - } -}; - -template -std::string json_to_string(const T &val) { - return std::to_string(val); -} - -template<> -std::string json_to_string(const std::string &val) { - std::ostringstream os; - os << std::quoted(val); - return os.str(); -} - -template<> -std::string json_to_string(const bool &bool_val) { - return bool_val ? "true" : "false"; -} - -template<> -std::string json_to_string(const JSON &json_val) { - std::ostringstream os; - os << "{\n"; - size_t i = 0; - for (const auto &kv : json_val.members) { - ++i; - os << std::quoted(kv.first) << " : "; - os << kv.second; - if (i != json_val.members.size()) { - os << ",\n"; - } + const char* env_var_name = SCHEDULER_MON_ENV_VAR; + m_should_monitor = is_env_variable_on(env_var_name); + if (m_should_monitor) { + m_handlers.push_back(std::make_unique()); } - os << "\n}"; - return os.str(); } -bool SchedulerProfilerHandler::comma() -{ - auto result = !m_first_write; - m_first_write = false; - return result; -} - -void SchedulerProfilerHandler::log(JSON json) -{ - m_profiler_logger->info("{}{}", comma() ? ",\n" : "", json_to_string(json)); -} - -void SchedulerProfilerHandler::handle_trace(const AddCoreOpTrace &trace) -{ - log(JSON({ - {"action", json_to_string(trace.name)}, - {"timestamp", json_to_string(trace.timestamp)}, - {"device_id", json_to_string(trace.device_id)}, - {"core_op_name", json_to_string(trace.core_op_name)}, - {"core_op_handle", json_to_string(trace.core_op_handle)}, - {"timeout", json_to_string((uint64_t)trace.timeout)}, - {"threshold", json_to_string((uint64_t)trace.threshold)} - })); -} - -void SchedulerProfilerHandler::handle_trace(const CreateCoreOpInputStreamsTrace &trace) -{ - log(JSON({ - {"action", json_to_string(trace.name)}, - {"timestamp", json_to_string(trace.timestamp)}, - {"device_id", json_to_string(trace.device_id)}, - {"core_op_name", json_to_string(trace.core_op_name)}, - {"stream_name", json_to_string(trace.stream_name)}, - {"queue_size", json_to_string(trace.queue_size)} - })); -} - -void SchedulerProfilerHandler::handle_trace(const CreateCoreOpOutputStreamsTrace &trace) -{ - log(JSON({ - {"action", json_to_string(trace.name)}, - {"timestamp", json_to_string(trace.timestamp)}, - {"device_id", json_to_string(trace.device_id)}, - {"core_op_name", json_to_string(trace.core_op_name)}, - {"stream_name", json_to_string(trace.stream_name)}, - {"queue_size", json_to_string(trace.queue_size)} - })); -} - -void SchedulerProfilerHandler::handle_trace(const WriteFrameTrace &trace) -{ - log(JSON({ - {"action", json_to_string(trace.name)}, - {"timestamp", json_to_string(trace.timestamp)}, - {"device_id", json_to_string(trace.device_id)}, - {"core_op_handle", json_to_string(trace.core_op_handle)}, - {"queue_name", json_to_string(trace.queue_name)} - })); -} - -void SchedulerProfilerHandler::handle_trace(const InputVdmaDequeueTrace &trace) -{ - log(JSON({ - {"action", json_to_string(trace.name)}, - {"timestamp", json_to_string(trace.timestamp)}, - {"device_id", json_to_string(trace.device_id)}, - {"core_op_handle", json_to_string(trace.core_op_handle)}, - {"queue_name", json_to_string(trace.queue_name)} - })); -} - -void SchedulerProfilerHandler::handle_trace(const ReadFrameTrace &trace) -{ - log(JSON({ - {"action", json_to_string(trace.name)}, - {"timestamp", json_to_string(trace.timestamp)}, - {"device_id", json_to_string(trace.device_id)}, - {"core_op_handle", json_to_string(trace.core_op_handle)}, - {"queue_name", json_to_string(trace.queue_name)} - })); -} - -void SchedulerProfilerHandler::handle_trace(const OutputVdmaEnqueueTrace &trace) -{ - log(JSON({ - {"action", json_to_string(trace.name)}, - {"timestamp", json_to_string(trace.timestamp)}, - {"device_id", json_to_string(trace.device_id)}, - {"core_op_handle", json_to_string(trace.core_op_handle)}, - {"queue_name", json_to_string(trace.queue_name)}, - {"frames", json_to_string(trace.frames)} - })); -} - -void SchedulerProfilerHandler::handle_trace(const ChooseCoreOpTrace &trace) -{ - log(JSON({ - {"action", json_to_string(trace.name)}, - {"timestamp", json_to_string(trace.timestamp)}, - {"device_id", json_to_string(trace.device_id)}, - {"chosen_core_op_handle", json_to_string(trace.core_op_handle)}, - {"threshold", json_to_string(trace.threshold)}, - {"timeout", json_to_string(trace.timeout)}, - {"priority", json_to_string(trace.priority)} - })); -} - -void SchedulerProfilerHandler::handle_trace(const SwitchCoreOpTrace &trace) -{ - log(JSON({ - {"action", json_to_string(trace.name)}, - {"timestamp", json_to_string(trace.timestamp)}, - {"device_id", json_to_string(trace.device_id)}, - {"core_op_handle", json_to_string(trace.core_op_handle)} - })); -} - - } diff --git a/hailort/libhailort/src/utils/profiler/tracer.hpp b/hailort/libhailort/src/utils/profiler/tracer.hpp index 369079f..35036aa 100644 --- a/hailort/libhailort/src/utils/profiler/tracer.hpp +++ b/hailort/libhailort/src/utils/profiler/tracer.hpp @@ -11,193 +11,11 @@ #define _HAILO_TRACER_HPP_ #include "hailo/hailort.h" -#include "common/logger_macros.hpp" - -#include "vdevice/scheduler/scheduler_base.hpp" - -#include -#include -#include -#include -#include -#include -#include -#include -#include - +#include "scheduler_profiler_handler.hpp" +#include "monitor_handler.hpp" namespace hailort { - -struct Trace -{ - Trace(const std::string &name) - : name(name) - {} - - virtual ~Trace() = default; - - uint64_t timestamp = 0; - std::string name; -}; - -struct InitTrace : Trace -{ - InitTrace() : Trace("init") {} -}; - -struct AddCoreOpTrace : Trace -{ - AddCoreOpTrace(const std::string &device_id, const std::string &core_op_name, uint64_t timeout, uint32_t threshold, scheduler_core_op_handle_t handle) - : Trace("add_core_op"), device_id(device_id), core_op_name(core_op_name), timeout(timeout), threshold(threshold), core_op_handle(handle) - {} - - std::string device_id; - std::string core_op_name; - uint64_t timeout = 0; - uint32_t threshold = 0; - scheduler_core_op_handle_t core_op_handle = INVALID_CORE_OP_HANDLE; -}; - -struct CreateCoreOpInputStreamsTrace : Trace -{ - CreateCoreOpInputStreamsTrace(const std::string &device_id, const std::string &core_op_name, const std::string &stream_name, uint32_t queue_size) - : Trace("create_input_stream"), device_id(device_id), core_op_name(core_op_name), stream_name(stream_name), queue_size(queue_size) - {} - - std::string device_id; - std::string core_op_name; - std::string stream_name; - uint32_t queue_size; -}; - -struct CreateCoreOpOutputStreamsTrace : Trace -{ - CreateCoreOpOutputStreamsTrace(const std::string &device_id, const std::string &core_op_name, const std::string &stream_name, uint32_t queue_size) - : Trace("create_output_stream"), device_id(device_id), core_op_name(core_op_name), stream_name(stream_name), queue_size(queue_size) - {} - - std::string device_id; - std::string core_op_name; - std::string stream_name; - uint32_t queue_size; -}; - -struct WriteFrameTrace : Trace -{ - WriteFrameTrace(const std::string &device_id, scheduler_core_op_handle_t core_op_handle, const std::string &queue_name) - : Trace("write_frame"), device_id(device_id), core_op_handle(core_op_handle), queue_name(queue_name) - {} - - std::string device_id; - scheduler_core_op_handle_t core_op_handle; - std::string queue_name; -}; - -struct InputVdmaDequeueTrace : Trace -{ - InputVdmaDequeueTrace(const std::string &device_id, scheduler_core_op_handle_t core_op_handle, const std::string &queue_name) - : Trace("input_vdma_dequeue"), device_id(device_id), core_op_handle(core_op_handle), queue_name(queue_name) - {} - - std::string device_id; - scheduler_core_op_handle_t core_op_handle; - std::string queue_name; -}; - -struct ReadFrameTrace : Trace -{ - ReadFrameTrace(const std::string &device_id, scheduler_core_op_handle_t core_op_handle, const std::string &queue_name) - : Trace("read_frame"), device_id(device_id), core_op_handle(core_op_handle), queue_name(queue_name) - {} - - std::string device_id; - scheduler_core_op_handle_t core_op_handle; - std::string queue_name; -}; - -struct OutputVdmaEnqueueTrace : Trace -{ - OutputVdmaEnqueueTrace(const std::string &device_id, scheduler_core_op_handle_t core_op_handle, const std::string &queue_name, uint32_t frames) - : Trace("output_vdma_enqueue"), device_id(device_id), core_op_handle(core_op_handle), queue_name(queue_name), frames(frames) - {} - - std::string device_id; - scheduler_core_op_handle_t core_op_handle; - std::string queue_name; - uint32_t frames = 0; -}; - -struct ChooseCoreOpTrace : Trace -{ - ChooseCoreOpTrace(const std::string &device_id, scheduler_core_op_handle_t handle, bool threshold, bool timeout, core_op_priority_t priority) - : Trace("choose_core_op"), device_id(device_id), core_op_handle(handle), threshold(threshold), timeout(timeout), priority(priority) - {} - - std::string device_id; - scheduler_core_op_handle_t core_op_handle; - bool threshold = false; - bool timeout = false; - core_op_priority_t priority; -}; - -struct SwitchCoreOpTrace : Trace -{ - SwitchCoreOpTrace(const std::string &device_id, scheduler_core_op_handle_t handle) - : Trace("switch_core_op"), device_id(device_id), core_op_handle(handle) - {} - - std::string device_id; - scheduler_core_op_handle_t core_op_handle; -}; - -class Handler -{ -public: - virtual ~Handler() = default; - - virtual void handle_trace(const InitTrace&) {}; - virtual void handle_trace(const AddCoreOpTrace&) {}; - virtual void handle_trace(const CreateCoreOpInputStreamsTrace&) {}; - virtual void handle_trace(const CreateCoreOpOutputStreamsTrace&) {}; - virtual void handle_trace(const WriteFrameTrace&) {}; - virtual void handle_trace(const InputVdmaDequeueTrace&) {}; - virtual void handle_trace(const ReadFrameTrace&) {}; - virtual void handle_trace(const OutputVdmaEnqueueTrace&) {}; - virtual void handle_trace(const ChooseCoreOpTrace&) {}; - virtual void handle_trace(const SwitchCoreOpTrace&) {}; -}; - -struct JSON; - -class SchedulerProfilerHandler : public Handler -{ -public: - SchedulerProfilerHandler(SchedulerProfilerHandler const&) = delete; - void operator=(SchedulerProfilerHandler const&) = delete; - - SchedulerProfilerHandler(int64_t &start_time); - ~SchedulerProfilerHandler(); - - virtual void handle_trace(const AddCoreOpTrace&) override; - virtual void handle_trace(const CreateCoreOpInputStreamsTrace&) override; - virtual void handle_trace(const CreateCoreOpOutputStreamsTrace&) override; - virtual void handle_trace(const WriteFrameTrace&) override; - virtual void handle_trace(const InputVdmaDequeueTrace&) override; - virtual void handle_trace(const ReadFrameTrace&) override; - virtual void handle_trace(const OutputVdmaEnqueueTrace&) override; - virtual void handle_trace(const ChooseCoreOpTrace&) override; - virtual void handle_trace(const SwitchCoreOpTrace&) override; - -private: - void log(JSON json); - bool comma(); - - std::shared_ptr m_file_sink; - std::shared_ptr m_profiler_logger; - std::atomic m_first_write; -}; - class Tracer { public: @@ -210,6 +28,8 @@ public: private: Tracer(); + void init_monitor_handler(); + void init_scheduler_profiler_handler(); static Tracer& get_instance() { @@ -220,7 +40,7 @@ private: template void execute_trace(Args... trace_args) { - if (!m_should_trace) { + if ((!m_should_trace) && (!m_should_monitor)) { return; } @@ -233,6 +53,7 @@ private: } bool m_should_trace = false; + bool m_should_monitor = false; std::chrono::high_resolution_clock::time_point m_start_time; std::vector> m_handlers; }; diff --git a/hailort/libhailort/src/utils/shared_resource_manager.hpp b/hailort/libhailort/src/utils/shared_resource_manager.hpp index 9dfdd30..687f66e 100644 --- a/hailort/libhailort/src/utils/shared_resource_manager.hpp +++ b/hailort/libhailort/src/utils/shared_resource_manager.hpp @@ -37,6 +37,7 @@ struct ResourceRef { std::shared_ptr resource; }; +// TODO: Merge ExportedResourceManager and SharedResourceManager (HRT-10317) template class SharedResourceManager { diff --git a/hailort/libhailort/src/utils/thread_safe_queue.hpp b/hailort/libhailort/src/utils/thread_safe_queue.hpp index 6d8646f..3be5f5c 100644 --- a/hailort/libhailort/src/utils/thread_safe_queue.hpp +++ b/hailort/libhailort/src/utils/thread_safe_queue.hpp @@ -15,8 +15,7 @@ #include "common/utils.hpp" #include "common/logger_macros.hpp" - -#include "utils/event_internal.hpp" +#include "common/event_internal.hpp" // Define __unix__ for inclusion of readerwriterqueue.h because readerwriterqueue is implemented over POSIX standards // but checks __unix__ - otherwise QNX returns unsupported platform (need HAILO_UNDEF_UNIX_FLAG in order to undefine diff --git a/hailort/libhailort/src/vdevice/CMakeLists.txt b/hailort/libhailort/src/vdevice/CMakeLists.txt index cacefd2..f9535d3 100644 --- a/hailort/libhailort/src/vdevice/CMakeLists.txt +++ b/hailort/libhailort/src/vdevice/CMakeLists.txt @@ -6,11 +6,14 @@ set(SRC_FILES ${CMAKE_CURRENT_SOURCE_DIR}/pipeline_multiplexer.cpp ${CMAKE_CURRENT_SOURCE_DIR}/vdevice_stream.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/vdevice_native_stream.cpp ${CMAKE_CURRENT_SOURCE_DIR}/vdevice_stream_multiplexer_wrapper.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/callback_reorder_queue.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/scheduler/network_group_scheduler.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/scheduler/scheduler.cpp ${CMAKE_CURRENT_SOURCE_DIR}/scheduler/scheduler_oracle.cpp ${CMAKE_CURRENT_SOURCE_DIR}/scheduler/scheduled_core_op_state.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/scheduler/scheduled_stream.cpp ${CMAKE_CURRENT_SOURCE_DIR}/scheduler/multi_device_scheduled_stream.cpp ) diff --git a/hailort/libhailort/src/vdevice/callback_reorder_queue.cpp b/hailort/libhailort/src/vdevice/callback_reorder_queue.cpp new file mode 100644 index 0000000..d2b1b42 --- /dev/null +++ b/hailort/libhailort/src/vdevice/callback_reorder_queue.cpp @@ -0,0 +1,74 @@ +/** + * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved. + * Distributed under the MIT license (https://opensource.org/licenses/MIT) +**/ +/** + * @file callback_reorder_queue.cpp + **/ + +#include "callback_reorder_queue.hpp" + +namespace hailort +{ + +InternalTransferDoneCallback CallbackReorderQueue::wrap_callback(const InternalTransferDoneCallback &original) +{ + std::lock_guard lock_guard(m_queue_mutex); + const uint64_t current_callback_index = m_registered_callbacks++; + + return [this, original, current_callback_index](hailo_status status) { + // Push callback without calling it yet. + push_callback(std::make_pair(current_callback_index, [original, status]() { + return original(status); + })); + + // Then, call the queued callbacks in order (if there is ready callback). + call_queued_callbacks_in_order(); + }; +} + +void CallbackReorderQueue::cancel_last_callback() +{ + std::lock_guard lock_guard(m_queue_mutex); + assert(m_called_callbacks < m_registered_callbacks); + m_registered_callbacks--; +} + +void CallbackReorderQueue::push_callback(const Callback &callback) +{ + std::lock_guard lock_guard(m_queue_mutex); + assert(m_callbacks_queue.size() < m_max_size); + m_callbacks_queue.push(callback); +} + +void CallbackReorderQueue::call_queued_callbacks_in_order() +{ + // Allow only one thread to execute the callbacks. + std::lock_guard callbacks_lock(m_callbacks_mutex); + + while (auto callback = pop_ready_callback()) { + callback->second(); + } +} + +Expected CallbackReorderQueue::pop_ready_callback() +{ + std::lock_guard lock_guard(m_queue_mutex); + + if (m_callbacks_queue.empty()) { + return make_unexpected(HAILO_NOT_AVAILABLE); + } + + if (m_callbacks_queue.top().first != m_called_callbacks) { + // We need to wait until top() contains callback with index - m_called_callbacks. + return make_unexpected(HAILO_NOT_AVAILABLE); + } + + auto next_callback = m_callbacks_queue.top(); + m_callbacks_queue.pop(); + + m_called_callbacks++; + return next_callback; +} + +} /* namespace hailort */ diff --git a/hailort/libhailort/src/vdevice/callback_reorder_queue.hpp b/hailort/libhailort/src/vdevice/callback_reorder_queue.hpp new file mode 100644 index 0000000..e5df53e --- /dev/null +++ b/hailort/libhailort/src/vdevice/callback_reorder_queue.hpp @@ -0,0 +1,91 @@ +/** + * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved. + * Distributed under the MIT license (https://opensource.org/licenses/MIT) +**/ +/** + * @file callback_reorder_queue.hpp + * @brief When using multiple devices with async API, we may get interrupt for some input/output stream out of order + * (For example - the second device may be faster than the first). + * To ensure the order of the callbacks, we put the callbacks in queue and call them in the same order inserted. + **/ + +#ifndef _HAILO_CALLBACK_REORDER_QUEUE_HPP_ +#define _HAILO_CALLBACK_REORDER_QUEUE_HPP_ + +#include "stream_common/async_common.hpp" + +#include +#include + +namespace hailort +{ + +class CallbackReorderQueue final { +public: + CallbackReorderQueue(size_t max_size) : + m_max_size(max_size), + m_callbacks_queue(compare_callbacks{}, make_queue_storage(m_max_size)) + {} + + // Wraps the given original callback so it will be called in the same wrap_callback order. + InternalTransferDoneCallback wrap_callback(const InternalTransferDoneCallback &original); + + // If some wrapped callback wasn't registered to some async API (for example because the queue is full), we need to + // remove the counters we added in `wrap_callback` (otherwise, next callback will wait forever). + // Note! + // * Call this function only after a `wrap_callback` was called. + // * Make sure the wrapped callback will never be called! (Otherwise counters will loss syncronization). + void cancel_last_callback(); + +private: + // must be called with m_lock held + void call_queued_callbacks_in_order(); + + // Each callback has a function pointer and its index + using Callback = std::pair>; + + void push_callback(const Callback &callback); + + // Pop next callback ready to be called. Can return HAILO_NOT_AVAILABLE if there is no callback ready. + Expected pop_ready_callback(); + + // We don't want to have any memory allocations in runtime, so we init the priority queue with a reserved vector. + static std::vector make_queue_storage(size_t max_size) + { + std::vector storage; + storage.reserve(max_size); + return storage; + } + + const size_t m_max_size; + + // Guards access to m_callbacks_queue and the counters. + std::mutex m_queue_mutex; + + // Increasing counter for the index on next register callback. We don't worry about overflow (Even if we assume + // extreme value of 1,000,000 per second) + uint64_t m_registered_callbacks = 0; + + // Amount of callback that have called. Because the callbacks are called in order, this counter contains the index + // of the next callback expected to be executed. + uint64_t m_called_callbacks = 0; + + struct compare_callbacks { + bool operator()(const Callback &a, const Callback &b) + { + // We want to pop the lower index first + return a.first > b.first; + } + }; + + // Callbacks are stored inside a priority_queue data-structure. + // The queue is sorted by the callbacks index (so we pop the callbacks with the smallest index first). + std::priority_queue, compare_callbacks> m_callbacks_queue; + + // This lock guarantee that only one thread is executing the callbacks. + std::mutex m_callbacks_mutex; +}; + +} /* namespace hailort */ + +#endif /* _HAILO_CALLBACK_REORDER_QUEUE_HPP_ */ diff --git a/hailort/libhailort/src/vdevice/pipeline_multiplexer.cpp b/hailort/libhailort/src/vdevice/pipeline_multiplexer.cpp index 3526662..c60476c 100644 --- a/hailort/libhailort/src/vdevice/pipeline_multiplexer.cpp +++ b/hailort/libhailort/src/vdevice/pipeline_multiplexer.cpp @@ -28,9 +28,11 @@ PipelineMultiplexer::PipelineMultiplexer() : m_written_streams_count(0), m_read_streams_count(0), m_next_to_read_after_drain(INVALID_CORE_OP_HANDLE) -{} +{ + assert(is_multiplexer_supported()); +} -bool PipelineMultiplexer::should_use_multiplexer() +bool PipelineMultiplexer::is_multiplexer_supported() { auto disable_multiplexer_env = std::getenv(DISABLE_MULTIPLEXER_ENV_VAR); if ((nullptr != disable_multiplexer_env) && (strnlen(disable_multiplexer_env, 2) == 1) && (strncmp(disable_multiplexer_env, "1", 1) == 0)) { @@ -120,7 +122,7 @@ hailo_status PipelineMultiplexer::wait_for_write(multiplexer_core_op_handle_t co m_is_waiting_to_write[core_op_handle] = true; hailo_status status = HAILO_SUCCESS; m_writing_cv.wait(lock, [this, core_op_handle, &status] { - if (!has_more_than_one_core_op_instance() || !should_use_multiplexer()) { + if (!has_more_than_one_core_op_instance()) { return true; } @@ -213,7 +215,7 @@ Expected PipelineMultiplexer::wait_for_read(multiplexer_core_op_handle hailo_status status = HAILO_SUCCESS; auto wait_res = m_reading_cv.wait_for(lock, timeout, [this, core_op_handle, stream_name, &drain_frames, &status] { - if (should_core_op_stop(core_op_handle)) { + if (m_should_core_op_stop[core_op_handle][stream_name]) { status = HAILO_STREAM_ABORTED_BY_USER; return true; // return true so that the wait will finish } diff --git a/hailort/libhailort/src/vdevice/pipeline_multiplexer.hpp b/hailort/libhailort/src/vdevice/pipeline_multiplexer.hpp index e781697..e9223aa 100644 --- a/hailort/libhailort/src/vdevice/pipeline_multiplexer.hpp +++ b/hailort/libhailort/src/vdevice/pipeline_multiplexer.hpp @@ -15,7 +15,7 @@ #include "common/barrier.hpp" -#include "vdevice/scheduler/network_group_scheduler.hpp" +#include "vdevice/scheduler/scheduler.hpp" #include #include @@ -24,7 +24,7 @@ namespace hailort { -#define DISABLE_MULTIPLEXER_ENV_VAR "HAILO_DISABLE_MULTIPLEXER" +#define DISABLE_MULTIPLEXER_ENV_VAR "HAILO_DISABLE_MULTIPLEXER_INTERNAL" using multiplexer_core_op_handle_t = uint32_t; using run_once_for_stream_handle_t = uint32_t; @@ -58,7 +58,7 @@ public: void set_can_output_vstream_read(multiplexer_core_op_handle_t core_op_handle, const std::string &vstream_name, bool can_read); - static bool should_use_multiplexer(); + static bool is_multiplexer_supported(); private: diff --git a/hailort/libhailort/src/vdevice/scheduler/multi_device_scheduled_stream.cpp b/hailort/libhailort/src/vdevice/scheduler/multi_device_scheduled_stream.cpp index 7ae77e9..d8d236c 100644 --- a/hailort/libhailort/src/vdevice/scheduler/multi_device_scheduled_stream.cpp +++ b/hailort/libhailort/src/vdevice/scheduler/multi_device_scheduled_stream.cpp @@ -14,7 +14,29 @@ namespace hailort { -hailo_status MultiDeviceScheduledInputStream::send_pending_buffer(size_t device_index) +Expected> MultiDeviceScheduledInputStream::create( + std::map> &&streams, + const scheduler_core_op_handle_t &core_op_handle, + EventPtr &&core_op_activated_event, + const LayerInfo &layer_info, + CoreOpsSchedulerWeakPtr core_ops_scheduler) +{ + auto buffer_frame_size = streams.begin()->second.get().get_buffer_frames_size(); + CHECK_EXPECTED(buffer_frame_size); + auto frame_size = streams.begin()->second.get().get_frame_size(); + auto buffers_queue_ptr = BuffersQueue::create_unique(frame_size, (streams.size() * buffer_frame_size.value())); + CHECK_EXPECTED(buffers_queue_ptr); + + auto status = HAILO_UNINITIALIZED; + auto stream = make_unique_nothrow(std::move(streams), + core_op_handle, std::move(core_op_activated_event), layer_info, + core_ops_scheduler, buffers_queue_ptr.release(), status); + CHECK_AS_EXPECTED((nullptr != stream), HAILO_OUT_OF_HOST_MEMORY); + CHECK_SUCCESS_AS_EXPECTED(status); + return stream; +} + +hailo_status MultiDeviceScheduledInputStream::send_pending_buffer(const device_id_t &device_id) { auto buffer = m_queue->front(get_timeout()); // Counting on scheduler to not allow paralle calls to this function if (HAILO_STREAM_ABORTED_BY_USER == buffer.status()) { @@ -22,7 +44,8 @@ hailo_status MultiDeviceScheduledInputStream::send_pending_buffer(size_t device_ return buffer.status(); } CHECK_EXPECTED_AS_STATUS(buffer); - auto status = m_streams[device_index].get().write_buffer_only(buffer.value()); + assert(contains(m_streams, device_id)); + auto status = m_streams.at(device_id).get().write_buffer_only(buffer.value()); if (HAILO_STREAM_ABORTED_BY_USER == status) { LOGGER__INFO("send_pending_buffer was aborted."); return status; @@ -30,38 +53,34 @@ hailo_status MultiDeviceScheduledInputStream::send_pending_buffer(size_t device_ CHECK_SUCCESS(status); m_queue->pop(); // Release buffer to free the queue for other dequeues - VdmaInputStream &vdma_input = static_cast(m_streams[device_index].get()); - return vdma_input.send_pending_buffer(); + auto &vdma_input = dynamic_cast(m_streams.at(device_id).get()); + return vdma_input.send_pending_buffer(device_id); } -Expected MultiDeviceScheduledInputStream::sync_write_raw_buffer(const MemoryView &buffer, +hailo_status MultiDeviceScheduledInputStream::write_impl(const MemoryView &buffer, const std::function &should_cancel) { - auto core_ops_scheduler = m_core_ops_scheduler.lock(); - CHECK_AS_EXPECTED(core_ops_scheduler, HAILO_INTERNAL_FAILURE); - - auto status = core_ops_scheduler->wait_for_write(m_core_op_handle, name(), get_timeout(), should_cancel); - if (HAILO_STREAM_ABORTED_BY_USER == status) { - LOGGER__INFO("Write to stream was aborted."); - return make_unexpected(status); + if (should_cancel()) { + return HAILO_STREAM_ABORTED_BY_USER; } - CHECK_SUCCESS_AS_EXPECTED(status); - status = m_queue->push(buffer, get_timeout()); + auto core_ops_scheduler = m_core_ops_scheduler.lock(); + CHECK(core_ops_scheduler, HAILO_INTERNAL_FAILURE); - auto write_finish_status = core_ops_scheduler->signal_write_finish(m_core_op_handle, name(), status != HAILO_SUCCESS); + auto status = m_queue->push(buffer, get_timeout()); if (HAILO_STREAM_ABORTED_BY_USER == status) { LOGGER__INFO("'push' was aborted."); - return make_unexpected(status); + return status; } - CHECK_SUCCESS_AS_EXPECTED(status); + CHECK_SUCCESS(status); + auto write_finish_status = core_ops_scheduler->signal_frame_pending_to_send(m_core_op_handle, name()); if (HAILO_STREAM_ABORTED_BY_USER == write_finish_status) { - return make_unexpected(write_finish_status); + return write_finish_status; } - CHECK_SUCCESS_AS_EXPECTED(write_finish_status); + CHECK_SUCCESS(write_finish_status); - return buffer.size(); + return HAILO_SUCCESS; } Expected MultiDeviceScheduledInputStream::get_pending_frames_count() const @@ -77,7 +96,8 @@ size_t MultiDeviceScheduledInputStream::get_queue_size() const hailo_status MultiDeviceScheduledInputStream::abort() { auto status = HAILO_SUCCESS; // Best effort - for (auto &stream : m_streams) { + for (const auto &pair : m_streams) { + auto &stream = pair.second; auto abort_status = stream.get().abort(); if (HAILO_SUCCESS != status) { LOGGER__ERROR("Failed to abort input stream. (status: {} device: {})", status, stream.get().get_dev_id()); @@ -101,7 +121,8 @@ hailo_status MultiDeviceScheduledInputStream::abort() hailo_status MultiDeviceScheduledInputStream::clear_abort() { auto status = HAILO_SUCCESS; // Best effort - for (auto &stream : m_streams) { + for (const auto &pair : m_streams) { + auto &stream = pair.second; auto clear_abort_status = stream.get().clear_abort(); if ((HAILO_SUCCESS != clear_abort_status) && (HAILO_STREAM_NOT_ACTIVATED != clear_abort_status)) { LOGGER__ERROR("Failed to clear abort input stream. (status: {} device: {})", clear_abort_status, stream.get().get_dev_id()); diff --git a/hailort/libhailort/src/vdevice/scheduler/multi_device_scheduled_stream.hpp b/hailort/libhailort/src/vdevice/scheduler/multi_device_scheduled_stream.hpp index ac8fe41..63eadf8 100644 --- a/hailort/libhailort/src/vdevice/scheduler/multi_device_scheduled_stream.hpp +++ b/hailort/libhailort/src/vdevice/scheduler/multi_device_scheduled_stream.hpp @@ -151,27 +151,35 @@ private: std::atomic_bool m_should_stop; }; -class MultiDeviceScheduledInputStream : public ScheduledInputStream { +// Stream used on scheduler input multiple device with SYNC api (On async api, the ScheduledAsyncInputStream handles +// both single and multiple devices). +class MultiDeviceScheduledInputStream : public ScheduledInputStreamBase { public: + static Expected> create( + std::map> &&streams, + const scheduler_core_op_handle_t &core_op_handle, + EventPtr &&core_op_activated_event, + const LayerInfo &layer_info, + CoreOpsSchedulerWeakPtr core_ops_scheduler); + MultiDeviceScheduledInputStream( - std::vector> &&streams, + std::map> &&streams, const scheduler_core_op_handle_t &core_op_handle, EventPtr &&core_op_activated_event, const LayerInfo &layer_info, CoreOpsSchedulerWeakPtr core_ops_scheduler, std::unique_ptr &&frames_queue, hailo_status &status) : - ScheduledInputStream(std::move(streams), core_op_handle, + ScheduledInputStreamBase(std::move(streams), core_op_handle, std::move(core_op_activated_event), layer_info, core_ops_scheduler, status), m_queue(std::move(frames_queue)) {} - virtual hailo_status send_pending_buffer(size_t device_index = 0) override; + virtual hailo_status send_pending_buffer(const device_id_t &device_id) override; virtual Expected get_pending_frames_count() const override; protected: - virtual Expected sync_write_raw_buffer(const MemoryView &buffer, - const std::function &should_cancel = []() { return false; }) override; + virtual hailo_status write_impl(const MemoryView &buffer, const std::function &should_cancel) override; virtual hailo_status abort() override; virtual hailo_status clear_abort() override; diff --git a/hailort/libhailort/src/vdevice/scheduler/network_group_scheduler.cpp b/hailort/libhailort/src/vdevice/scheduler/network_group_scheduler.cpp deleted file mode 100644 index cc7ace8..0000000 --- a/hailort/libhailort/src/vdevice/scheduler/network_group_scheduler.cpp +++ /dev/null @@ -1,1006 +0,0 @@ -/** - * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved. - * Distributed under the MIT license (https://opensource.org/licenses/MIT) - **/ -/** - * TODO: Rename in a different PR - * @file network_group_scheduler.cpp - * @brief: Network scheduler - **/ - -#include "common/os_utils.hpp" - - -#include "vdevice/scheduler/network_group_scheduler.hpp" -#include "vdevice/vdevice_core_op.hpp" -#include "vdevice/scheduler/scheduler_oracle.hpp" -#include "vdevice/vdevice_stream_multiplexer_wrapper.hpp" -#include "hef/hef_internal.hpp" -#include "utils/profiler/tracer_macros.hpp" - -#include - - -namespace hailort -{ - -#define SINGLE_CONTEXT_BATCH_SIZE (1) -#define DEFAULT_BURST_SIZE (1) - -// TODO: use device handles instead device count -CoreOpsScheduler::CoreOpsScheduler(hailo_scheduling_algorithm_t algorithm, uint32_t device_count, std::vector &devices_bdf_id, - std::vector &devices_arch) : - SchedulerBase(algorithm, device_count, devices_bdf_id, devices_arch), - m_changing_current_batch_size(), - m_should_core_op_stop(), - m_before_read_write_mutex(), - m_core_ops_cvs(), - m_should_monitor(false) -#if defined(__GNUC__) - , m_mon_tmp_output() -#endif -{ - // TODO: HRT-7391 - Change scheduler monitor to work only when MON command is active - m_should_monitor = SchedulerMon::should_monitor(); - if (m_should_monitor) { - auto status = start_mon(); - if (HAILO_SUCCESS != status) { - LOGGER__ERROR("Failed to initiate hailo monitor of networks, with status {}", status); - } - } -} - -CoreOpsScheduler::~CoreOpsScheduler() -{ - for (auto device_info : m_devices) { - if (INVALID_CORE_OP_HANDLE != device_info->current_core_op_handle) { - auto current_core_op = m_scheduled_core_ops[device_info->current_core_op_handle]->get_core_op(); - auto current_core_op_bundle = std::dynamic_pointer_cast(current_core_op); - assert(nullptr != current_core_op_bundle); - auto vdma_core_op = current_core_op_bundle->get_core_op_by_device_index(device_info->device_id); - if (!vdma_core_op) { - LOGGER__ERROR("Error retrieving core-op in scheduler destructor"); - } else { - static const auto RESUME_PENDING_STREAM_TRANSFERS = true; - if (HAILO_SUCCESS != VdmaConfigManager::switch_core_op(vdma_core_op.value(), nullptr, 0, - RESUME_PENDING_STREAM_TRANSFERS)) { - LOGGER__ERROR("Error deactivating core-op when destroying scheduler"); - } - } - } - } - - if (m_should_monitor) { - m_should_monitor = false; - m_mon_shutdown_event->signal(); - if (m_mon_thread.joinable()) { - m_mon_thread.join(); - } - } -} - -Expected CoreOpsScheduler::create_round_robin(uint32_t device_count, std::vector &devices_bdf_id, std::vector &devices_arch) -{ - auto ptr = make_shared_nothrow(HAILO_SCHEDULING_ALGORITHM_ROUND_ROBIN, device_count, devices_bdf_id, devices_arch); - CHECK_AS_EXPECTED(nullptr != ptr, HAILO_OUT_OF_HOST_MEMORY); - - return ptr; -} - -std::string get_curr_pid_as_str() -{ - return std::to_string(OsUtils::get_curr_pid()); -} - -hailo_status CoreOpsScheduler::start_mon() -{ -#if defined(__GNUC__) - m_last_measured_timestamp = std::chrono::steady_clock::now(); - m_mon_shutdown_event = Event::create_shared(Event::State::not_signalled); - CHECK(nullptr != m_mon_shutdown_event, HAILO_OUT_OF_HOST_MEMORY); - auto device_count = get_device_count(); - for (uint32_t i = 0; i < device_count; i++) { - m_last_measured_utilization_timestamp[i] = {}; - m_device_has_drained_everything[i] = true; - m_device_utilization[i] = 0; - } - - auto tmp_file = open_temp_mon_file(); - CHECK_EXPECTED_AS_STATUS(tmp_file); - m_mon_tmp_output = tmp_file.release(); - - m_mon_thread = std::thread([this] () - { - while (m_should_monitor) { - auto status = m_mon_shutdown_event->wait(DEFAULT_SCHEDULER_MON_INTERVAL); - if (HAILO_TIMEOUT == status) { - dump_state(); - } else if (HAILO_SUCCESS != status) { - LOGGER__ERROR("Scheduler monitor failed with status {}", status); - return; - } - } - return; - }); - - return HAILO_SUCCESS; -#else - return HAILO_NOT_IMPLEMENTED; -#endif -} - -#if defined(__GNUC__) -Expected> CoreOpsScheduler::open_temp_mon_file() -{ - std::string file_name = get_curr_pid_as_str(); - auto tmp_file = TempFile::create(file_name, SCHEDULER_MON_TMP_DIR); - CHECK_EXPECTED(tmp_file); - - auto tmp_file_ptr = make_shared_nothrow(tmp_file.release()); - CHECK_AS_EXPECTED(nullptr != tmp_file_ptr, HAILO_OUT_OF_HOST_MEMORY); - - return tmp_file_ptr; -} - -void CoreOpsScheduler::dump_state() -{ - auto file = LockedFile::create(m_mon_tmp_output->name(), "w"); - if (HAILO_SUCCESS != file.status()) { - LOGGER__ERROR("Failed to open and lock file {}, with status: {}", m_mon_tmp_output->name(), file.status()); - return; - } - - ProtoMon mon; - mon.set_pid(get_curr_pid_as_str()); - time_dependent_events_cycle_calc(); - log_monitor_networks_infos(mon); - log_monitor_device_infos(mon); - log_monitor_frames_infos(mon); - - // Clear accumulators - for (auto &handle_core_op_utilization_pair : m_core_op_utilization) { - handle_core_op_utilization_pair.second = 0; - } - for (auto &handle_fps_pair : m_fps_accumulator) { - handle_fps_pair.second = 0; - } - for (auto &handle_device_utilization_pair: m_device_utilization) { - handle_device_utilization_pair.second = 0; - } - - if (!mon.SerializeToFileDescriptor(file->get_fd())) { - LOGGER__ERROR("Failed to SerializeToFileDescriptor(), with errno: {}", errno); - } -} -#endif - -std::string CoreOpsScheduler::get_core_op_name(const scheduler_core_op_handle_t &core_op_handle) -{ - assert(m_scheduled_core_ops.size() > core_op_handle); - return m_scheduled_core_ops[core_op_handle]->get_core_op_name(); -} - -// TODO: HRT-9804 - Change monitor to use the tracer design mechanism (curently this functions uses private members) -void CoreOpsScheduler::time_dependent_events_cycle_calc() -{ - auto curr_time = std::chrono::steady_clock::now(); - m_last_measured_time_duration = std::chrono::duration_cast>(curr_time - m_last_measured_timestamp).count(); - - for (auto device_info : m_devices) { - if (!m_device_has_drained_everything[device_info->device_id]) { - update_utilization_read_buffers_finished(device_info->device_id, device_info->current_core_op_handle, false); - } - } - - m_last_measured_timestamp = curr_time; -} - -void CoreOpsScheduler::log_monitor_device_infos(ProtoMon &mon) -{ - for (auto device_info : m_devices) { - assert(contains(m_device_utilization, device_info->device_id)); - auto curr_device_utilization = m_device_utilization[device_info->device_id]; - auto utilization_precentage = ((curr_device_utilization * 100) / m_last_measured_time_duration); - - auto device_infos = mon.add_device_infos(); - device_infos->set_device_id(device_info->device_bdf_id); - device_infos->set_utilization(utilization_precentage); - device_infos->set_device_arch(device_info->device_arch); - } -} - -void CoreOpsScheduler::log_monitor_networks_infos(ProtoMon &mon) -{ - for (uint32_t core_op_handle = 0; core_op_handle < m_core_op_utilization.size(); core_op_handle++) { - assert(contains(m_core_op_utilization, core_op_handle)); - auto curr_core_op_utilization = m_core_op_utilization[core_op_handle]; - auto utilization = ((curr_core_op_utilization * 100) / m_last_measured_time_duration); - auto outputs_count = static_cast(m_scheduled_core_ops[core_op_handle]->get_outputs_names().size()); - auto fps = static_cast((m_fps_accumulator[core_op_handle] / outputs_count) / m_last_measured_time_duration); - - auto net_info = mon.add_networks_infos(); - net_info->set_network_name(get_core_op_name(core_op_handle)); - net_info->set_utilization(utilization); - net_info->set_fps(fps); - } -} - -void CoreOpsScheduler::log_monitor_frames_infos(ProtoMon &mon) -{ - for (uint32_t core_op_handle = 0; core_op_handle < m_scheduled_core_ops.size(); core_op_handle++) { - auto net_frames_info = mon.add_net_frames_infos(); - net_frames_info->set_network_name(get_core_op_name(core_op_handle)); - - for (auto &stream_name : m_scheduled_core_ops[core_op_handle]->get_inputs_names()) { - auto stream_frames_info = net_frames_info->add_streams_frames_infos(); - stream_frames_info->set_stream_name(stream_name); - stream_frames_info->set_stream_direction(PROTO__STREAM_DIRECTION__HOST_TO_DEVICE); - auto status = set_h2d_frames_counters(core_op_handle, stream_name, *stream_frames_info); - if (HAILO_SUCCESS != status) { - LOGGER__ERROR("Failed to set stream's {} frames count, status = {}", stream_name, status); - continue; - } - } - - for (auto &stream_name : m_scheduled_core_ops[core_op_handle]->get_outputs_names()) { - auto stream_frames_info = net_frames_info->add_streams_frames_infos(); - stream_frames_info->set_stream_name(stream_name); - stream_frames_info->set_stream_direction(PROTO__STREAM_DIRECTION__DEVICE_TO_HOST); - auto status = set_d2h_frames_counters(core_op_handle, stream_name, *stream_frames_info); - if (HAILO_SUCCESS != status) { - LOGGER__ERROR("Failed to set stream's {} frames count, status = {}", stream_name, status); - continue; - } - } - } -} - -hailo_status CoreOpsScheduler::set_h2d_frames_counters(const scheduler_core_op_handle_t &core_op_handle, const std::string &stream_name, - ProtoMonStreamFramesInfo &stream_frames_info) -{ - assert(m_scheduled_core_ops.size() > core_op_handle); - auto current_cng = m_scheduled_core_ops[core_op_handle]->get_core_op(); - - auto input_stream = current_cng->get_input_stream_by_name(stream_name); - CHECK_EXPECTED_AS_STATUS(input_stream); - - InputStreamBase &vdevice_input = static_cast(input_stream->get()); - auto buffer_frames_size = vdevice_input.get_buffer_frames_size(); - if (HAILO_SUCCESS == buffer_frames_size.status()) { - stream_frames_info.set_buffer_frames_size(static_cast(buffer_frames_size.value())); - } else { - stream_frames_info.set_buffer_frames_size(SCHEDULER_MON_NAN_VAL); - } - - auto pending_frames_count = vdevice_input.get_pending_frames_count(); - if (HAILO_SUCCESS == pending_frames_count.status()) { - stream_frames_info.set_pending_frames_count(static_cast(pending_frames_count.value())); - } else { - stream_frames_info.set_pending_frames_count(SCHEDULER_MON_NAN_VAL); - } - - return HAILO_SUCCESS; -} - -hailo_status CoreOpsScheduler::set_d2h_frames_counters(const scheduler_core_op_handle_t &core_op_handle, const std::string &stream_name, - ProtoMonStreamFramesInfo &stream_frames_info) -{ - assert(m_scheduled_core_ops.size() > core_op_handle); - auto current_cng = m_scheduled_core_ops[core_op_handle]->get_core_op(); - - auto output_stream = current_cng->get_output_stream_by_name(stream_name); - CHECK_EXPECTED_AS_STATUS(output_stream); - - OutputStreamBase &vdevice_output = static_cast(output_stream->get()); - auto buffer_frames_size = vdevice_output.get_buffer_frames_size(); - if (HAILO_SUCCESS == buffer_frames_size.status()) { - stream_frames_info.set_buffer_frames_size(static_cast(buffer_frames_size.value())); - } else { - stream_frames_info.set_buffer_frames_size(SCHEDULER_MON_NAN_VAL); - } - - auto pending_frames_count = vdevice_output.get_pending_frames_count(); - if (HAILO_SUCCESS == pending_frames_count.status()) { - stream_frames_info.set_pending_frames_count(static_cast(pending_frames_count.value())); - } else { - stream_frames_info.set_pending_frames_count(SCHEDULER_MON_NAN_VAL); - } - - return HAILO_SUCCESS; -} - -Expected CoreOpsScheduler::add_core_op(std::shared_ptr added_cng) -{ - scheduler_core_op_handle_t core_op_handle = INVALID_CORE_OP_HANDLE; - { - std::unique_lock lock(m_before_read_write_mutex); - - core_op_handle = static_cast(m_scheduled_core_ops.size()); - TRACE(AddCoreOpTrace, "", added_cng->name(), DEFAULT_SCHEDULER_TIMEOUT.count(), DEFAULT_SCHEDULER_MIN_THRESHOLD, core_op_handle); - - auto stream_infos = added_cng->get_all_stream_infos(); - CHECK_EXPECTED(stream_infos); - - auto scheduled_core_op = ScheduledCoreOp::create(added_cng, stream_infos.value()); - CHECK_EXPECTED(scheduled_core_op); - - m_scheduled_core_ops.emplace_back(scheduled_core_op.release()); - - m_changing_current_batch_size[core_op_handle] = false; - - for (const auto &stream_info : stream_infos.value()) { - m_should_core_op_stop[core_op_handle][stream_info.name] = false; - } - - for (auto& device_info : m_devices) { - for (const auto &stream_info : stream_infos.value()) { - if (HAILO_H2D_STREAM == stream_info.direction) { - device_info->current_cycle_requested_transferred_frames_h2d[core_op_handle][stream_info.name] = 0; - } else { - device_info->current_cycle_finished_transferred_frames_d2h[core_op_handle][stream_info.name] = 0; - device_info->current_cycle_finished_read_frames_d2h[core_op_handle][stream_info.name] = 0; - } - } - } - - // Monitor members - m_core_op_utilization[core_op_handle] = 0; - m_fps_accumulator[core_op_handle] = 0; - - auto network_cvs = ScheduledCoreOpCV::create(added_cng); - CHECK_EXPECTED(network_cvs); - m_core_ops_cvs[core_op_handle] = network_cvs.release(); - m_core_op_priority[HAILO_SCHEDULER_PRIORITY_NORMAL].emplace_back(core_op_handle); - } - - return core_op_handle; -} - -bool CoreOpsScheduler::is_core_op_active(const scheduler_core_op_handle_t &core_op_handle) -{ - for (auto device_info : m_devices) { - if (core_op_handle == device_info->current_core_op_handle) { - return true; - } - } - - return false; -} - -bool CoreOpsScheduler::is_multi_device() -{ - return m_devices.size() > 1; -} - -hailo_status CoreOpsScheduler::wait_for_write(const scheduler_core_op_handle_t &core_op_handle, const std::string &stream_name, - const std::chrono::milliseconds &timeout, const std::function &should_cancel) -{ - { - std::unique_lock lock(m_before_read_write_mutex); - - hailo_status status = HAILO_SUCCESS; - auto wait_res = m_core_ops_cvs[core_op_handle]->wait_for(stream_name, lock, timeout, [this, core_op_handle, stream_name, &should_cancel, &status] { - - if (should_cancel()) { - status = HAILO_STREAM_ABORTED_BY_USER; - return true; // return true so that the wait will finish - } - - if (should_core_op_stop(core_op_handle)) { - status = HAILO_STREAM_ABORTED_BY_USER; - return true; // return true so that the wait will finish - } - - return m_scheduled_core_ops[core_op_handle]->can_stream_write(stream_name); - }); - CHECK(wait_res, HAILO_TIMEOUT, "{} (H2D) failed with status={}, timeout={}ms", stream_name, HAILO_TIMEOUT, timeout.count()); - if (HAILO_STREAM_ABORTED_BY_USER == status) { - return status; - } - CHECK_SUCCESS(status); - - m_scheduled_core_ops[core_op_handle]->mark_frame_sent(); - m_scheduled_core_ops[core_op_handle]->requested_write_frames().increase(stream_name); - } - - return HAILO_SUCCESS; -} - -hailo_status CoreOpsScheduler::signal_write_finish(const scheduler_core_op_handle_t &core_op_handle, const std::string &stream_name, - bool did_write_fail) -{ - { - std::unique_lock lock(m_before_read_write_mutex); - assert(m_scheduled_core_ops.size() > core_op_handle); - auto scheduled_core_op = m_scheduled_core_ops[core_op_handle]; - - if (did_write_fail) { - scheduled_core_op->requested_write_frames().decrease(stream_name); - return HAILO_SUCCESS; - } - - if (should_core_op_stop(core_op_handle)) { - return HAILO_STREAM_ABORTED_BY_USER; - } - - scheduled_core_op->finished_write_frames().increase(stream_name); - scheduled_core_op->requested_write_frames().decrease(stream_name); - - auto device_id = CoreOpsSchedulerOracle::get_avail_device(*this, core_op_handle); - if (INVALID_DEVICE_ID != device_id) { - auto status = switch_core_op(core_op_handle, device_id); - if (HAILO_STREAM_ABORTED_BY_USER == status) { - LOGGER__INFO("switch_core_op has failed with status=HAILO_STREAM_ABORTED_BY_USER"); - return status; - } - CHECK_SUCCESS(status); - } - - auto status = optimize_streaming_if_enabled(core_op_handle); - if (HAILO_STREAM_ABORTED_BY_USER == status) { - return status; - } - CHECK_SUCCESS(status); - } - - return HAILO_SUCCESS; -} - -hailo_status CoreOpsScheduler::switch_core_op(const scheduler_core_op_handle_t &core_op_handle, uint32_t device_id, bool /*keep_nn_config*/) -{ - auto scheduled_core_op = m_scheduled_core_ops[core_op_handle]; - auto curr_device_info = m_devices[device_id]; - - // initialize current cycle maps - for (const auto &name : scheduled_core_op->get_inputs_names()) { - curr_device_info->current_cycle_requested_transferred_frames_h2d[core_op_handle][name] = 0; - } - - for (const auto &name : scheduled_core_op->get_outputs_names()) { - curr_device_info->current_cycle_finished_transferred_frames_d2h[core_op_handle][name] = 0; - curr_device_info->current_cycle_finished_read_frames_d2h[core_op_handle][name] = 0; - } - - uint16_t batch_size = SINGLE_CONTEXT_BATCH_SIZE; - uint16_t burst_size = static_cast(scheduled_core_op->finished_write_frames_min_value()); - // In multi device finished write frame could be bigger then the vdma buffers we have, can be removed after dynamic desc binding. - if (is_multi_device()) { - burst_size = std::min(burst_size, get_min_avail_buffers_count(core_op_handle, device_id)); - // We limit the max burst size to (dev_count * max_batch) to keep former behavior (this was the buffer_pool size) - // TODO: remove this limitation and work with user-controlled max_burst_size - burst_size = std::min(burst_size, static_cast(scheduled_core_op->get_max_batch_size() * get_device_count())); - } - - if (scheduled_core_op->use_dynamic_batch_flow()) { - batch_size = std::min(static_cast(scheduled_core_op->finished_write_frames_min_value()), scheduled_core_op->get_max_batch_size()); - burst_size = batch_size; - } - - bool has_same_batch_size_as_previous = (curr_device_info->current_batch_size == batch_size); - curr_device_info->current_batch_size = batch_size; - - if (curr_device_info->current_core_op_handle != core_op_handle) { - curr_device_info->is_switching_core_op = false; - } - - if ((core_op_handle != curr_device_info->current_core_op_handle) || (!has_same_batch_size_as_previous)) { - assert(m_scheduled_core_ops.size() > core_op_handle); - auto next_active_cng = scheduled_core_op->get_core_op(); - auto next_active_cng_wrapper = std::dynamic_pointer_cast(next_active_cng); - assert(nullptr != next_active_cng_wrapper); - auto next_active_cng_expected = next_active_cng_wrapper->get_core_op_by_device_index(curr_device_info->device_id); - CHECK_EXPECTED_AS_STATUS(next_active_cng_expected); - - std::shared_ptr current_active_vdma_cng = nullptr; - if (curr_device_info->current_core_op_handle != INVALID_CORE_OP_HANDLE) { - auto current_active_cng = m_scheduled_core_ops[curr_device_info->current_core_op_handle]->get_core_op(); - auto current_active_cng_bundle = std::dynamic_pointer_cast(current_active_cng); - assert(nullptr != current_active_cng_bundle); - auto current_active_cng_expected = current_active_cng_bundle->get_core_op_by_device_index(curr_device_info->device_id); - CHECK_EXPECTED_AS_STATUS(current_active_cng_expected); - current_active_vdma_cng = current_active_cng_expected.release(); - } - - TRACE(SwitchCoreOpTrace, "", core_op_handle); - static const auto RESUME_PENDING_STREAM_TRANSFERS = true; - auto status = VdmaConfigManager::switch_core_op(current_active_vdma_cng, next_active_cng_expected.value(), batch_size, - - RESUME_PENDING_STREAM_TRANSFERS); - CHECK_SUCCESS(status, "Failed switching core-op"); - // Clear the ready_to_switch flag from old activation - scheduled_core_op->mark_unready_to_switch(); - - // Register to get interrupts - has to be after core-op is activated - for (auto &output_stream : next_active_cng_expected.value()->get_output_streams()) { - OutputStreamBase &vdevice_output = static_cast(output_stream.get()); - status = vdevice_output.register_interrupt_callback( - [this, name = output_stream.get().name(), format = vdevice_output.get_layer_info().format.order, scheduled_core_op, core_op_handle, device_id] - (uint32_t frames) { - auto should_notify_next = false; - { - std::unique_lock lock(m_before_read_write_mutex); - // In order to meet performance requirement we enable switch only after first frame is arrived. - // TODO: remove this hack / move it to oracle and add another scheduling algorithm for it - scheduled_core_op->mark_ready_to_switch(); - if (hailo_format_order_t::HAILO_FORMAT_ORDER_HAILO_NMS != format) { - TRACE(OutputVdmaEnqueueTrace, "", core_op_handle, name, frames); - // TODO: Remove d2h_finished_transferred_frames and use current_cycle_finished_transferred_frames_d2h instead - scheduled_core_op->d2h_finished_transferred_frames(name) += frames; - m_devices[device_id]->current_cycle_finished_transferred_frames_d2h[core_op_handle][name] += frames; - } - - auto has_drained_everything = has_core_op_drained_everything(core_op_handle, device_id); - - if (m_should_monitor) { - update_utilization_read_buffers_finished(device_id, core_op_handle, has_drained_everything); - } - - // If ng finished and we didnt choose next lets choose without checking threshold - if (!m_devices[device_id]->is_switching_core_op && has_drained_everything) { - auto was_chosen = choose_next_core_op(device_id, true); - if (!was_chosen) { - choose_next_core_op(device_id, false); - } - } - - if (m_devices[device_id]->is_switching_core_op && has_drained_everything) { - should_notify_next = true; - } - } - // Notify stream that new frame was accepted (wait_for_read) - m_core_ops_cvs[core_op_handle]->notify_one(name); - if (should_notify_next) { - auto next_core_op = m_devices[device_id]->next_core_op_handle; - // Notify all the threads of the next ng (wait_for_read) - m_core_ops_cvs[next_core_op]->notify_all(); - } - }); - CHECK_SUCCESS(status); - } - } - - scheduled_core_op->set_last_run_timestamp(std::chrono::steady_clock::now()); // Mark timestamp on activation - curr_device_info->current_core_op_handle = core_op_handle; - - // Finished switching batch size - m_changing_current_batch_size[core_op_handle] = false; - - auto status = send_all_pending_buffers(core_op_handle, device_id, burst_size); - if (HAILO_STREAM_ABORTED_BY_USER == status) { - LOGGER__INFO("send_all_pending_buffers has failed with status=HAILO_STREAM_ABORTED_BY_USER"); - return status; - } - CHECK_SUCCESS(status); - - return HAILO_SUCCESS; -} - -hailo_status CoreOpsScheduler::send_all_pending_buffers(const scheduler_core_op_handle_t &core_op_handle, uint32_t device_id, uint32_t burst_size) -{ - auto current_device_info = m_devices[device_id]; - if ((INVALID_CORE_OP_HANDLE == current_device_info->current_core_op_handle) || (current_device_info->current_core_op_handle != core_op_handle)) { - return HAILO_SUCCESS; - } - - auto scheduled_core_op = m_scheduled_core_ops[core_op_handle]; - - for (size_t i = 0; i < burst_size; i++) { - auto finished_send = false; - for (const auto &name : scheduled_core_op->get_inputs_names()) { - if (scheduled_core_op->finished_write_frames(name) == 0) { - finished_send = true; - break; - } - } - if (finished_send) { - break; - } - for (const auto &name : scheduled_core_op->get_inputs_names()) { - auto status = send_pending_buffer(core_op_handle, name, device_id); - if (HAILO_STREAM_ABORTED_BY_USER == status) { - LOGGER__INFO("send_pending_buffer has failed with status=HAILO_STREAM_ABORTED_BY_USER"); - return status; - } - CHECK_SUCCESS(status); - } - scheduled_core_op->push_device_index(device_id); - scheduled_core_op->set_last_device_index(device_id); - - if (m_should_monitor) { - update_utilization_send_started(device_id); - } - } - - return HAILO_SUCCESS; -} - -hailo_status CoreOpsScheduler::send_pending_buffer(const scheduler_core_op_handle_t &core_op_handle, const std::string &stream_name, - uint32_t device_id) -{ - assert(m_scheduled_core_ops.size() > core_op_handle); - auto scheduled_core_op = m_scheduled_core_ops[core_op_handle]; - - auto current_cng = scheduled_core_op->get_core_op(); - auto input_stream = current_cng->get_input_stream_by_name(stream_name); - CHECK_EXPECTED_AS_STATUS(input_stream); - - VDeviceInputStreamMultiplexerWrapper &vdevice_input = static_cast(input_stream->get()); - TRACE(InputVdmaDequeueTrace, "", core_op_handle, stream_name); - auto status = vdevice_input.send_pending_buffer(device_id); - if (HAILO_STREAM_ABORTED_BY_USER == status) { - LOGGER__INFO("send_pending_buffer has failed with status=HAILO_STREAM_ABORTED_BY_USER"); - return status; - } - CHECK_SUCCESS(status); - - m_devices[device_id]->current_cycle_requested_transferred_frames_h2d[core_op_handle][stream_name]++; - scheduled_core_op->finished_write_frames().decrease(stream_name); - - scheduled_core_op->h2d_finished_transferred_frames().increase(stream_name); - - if (should_core_op_stop(core_op_handle)) { - return HAILO_STREAM_ABORTED_BY_USER; - } - - return HAILO_SUCCESS; -} - -CoreOpsScheduler::ReadyInfo CoreOpsScheduler::is_core_op_ready(const scheduler_core_op_handle_t &core_op_handle, bool check_threshold) -{ - ReadyInfo result; - result.is_ready = false; - - if (should_core_op_stop(core_op_handle)) { - // Do not switch to an aborted core-op - return result; - } - - auto scheduled_core_op = m_scheduled_core_ops[core_op_handle]; - // Check if there arent any write requests - bool has_pending_writes = scheduled_core_op->finished_write_frames_min_value() > 0; - - // Check if there arent any read requests - bool has_pending_user_reads = false; - for (const auto &name : scheduled_core_op->get_outputs_names()) { - if (scheduled_core_op->requested_read_frames(name) > 0) { - has_pending_user_reads = true; - break; - } - } - - std::vector over_threshold; - over_threshold.reserve(scheduled_core_op->get_inputs_names().size()); - std::vector over_timeout; - over_timeout.reserve(scheduled_core_op->get_inputs_names().size()); - - if (check_threshold) { - for (const auto &name : scheduled_core_op->get_inputs_names()) { - auto threshold_exp = scheduled_core_op->get_threshold(name); - if (!threshold_exp) { - LOGGER__ERROR("Failed to get threshold for stream {}", name); - return result; - } - auto threshold = (DEFAULT_SCHEDULER_MIN_THRESHOLD == threshold_exp.value()) ? 1 : threshold_exp.value(); - auto timeout_exp = scheduled_core_op->get_timeout(); - if (!timeout_exp) { - LOGGER__ERROR("Failed to get timeout for stream {}", name); - return result; - } - auto timeout = timeout_exp.release(); - - // Check if there arent enough write requests to reach threshold and timeout didnt passed - auto write_requests = scheduled_core_op->requested_write_frames(name) + scheduled_core_op->finished_write_frames(name); - auto stream_over_threshold = write_requests >= threshold; - auto stream_over_timeout = timeout <= (std::chrono::steady_clock::now() - scheduled_core_op->get_last_run_timestamp()); - over_threshold.push_back(stream_over_threshold); - over_timeout.push_back(stream_over_timeout); - if (stream_over_threshold || stream_over_timeout) { - continue; - } else { - result.is_ready = false; - return result; - } - } - } - - result.threshold = std::all_of(over_threshold.begin(), over_threshold.end(), [](auto over) { return over; }); - result.timeout = std::all_of(over_timeout.begin(), over_timeout.end(), [](auto over) { return over; }); - result.is_ready = has_pending_writes && has_pending_user_reads; - - return result; -} - -Expected CoreOpsScheduler::wait_for_read(const scheduler_core_op_handle_t &core_op_handle, const std::string &stream_name, - const std::chrono::milliseconds &timeout) -{ - std::unique_lock lock(m_before_read_write_mutex); - - auto scheduled_core_op = m_scheduled_core_ops[core_op_handle]; - scheduled_core_op->requested_read_frames().increase(stream_name); - - hailo_status status = HAILO_SUCCESS; - auto wait_res = m_core_ops_cvs[core_op_handle]->wait_for(stream_name, lock, timeout, [this, core_op_handle, scheduled_core_op, stream_name, &status] { - - if (should_core_op_stop(core_op_handle)) { - status = HAILO_STREAM_ABORTED_BY_USER; - return true; // return true so that the wait will finish - } - - auto device_id = CoreOpsSchedulerOracle::get_avail_device(*this, core_op_handle); - if (INVALID_DEVICE_ID != device_id) { - status = switch_core_op(core_op_handle, device_id); - if (HAILO_SUCCESS != status) { - return true; // return true so that the wait will finish - } - } - - return scheduled_core_op->can_stream_read(stream_name); - }); - CHECK_AS_EXPECTED(wait_res, HAILO_TIMEOUT, "{} (D2H) failed with status={}, timeout={}ms", stream_name, HAILO_TIMEOUT, timeout.count()); - if (HAILO_STREAM_ABORTED_BY_USER == status) { - return make_unexpected(status); - } - CHECK_SUCCESS_AS_EXPECTED(status); - - scheduled_core_op->requested_read_frames().decrease(stream_name); - - return scheduled_core_op->pop_device_index(stream_name); -} - -hailo_status CoreOpsScheduler::signal_read_finish(const scheduler_core_op_handle_t &core_op_handle, const std::string &stream_name, uint32_t device_id) -{ - auto should_notify_next = false; - { - std::unique_lock lock(m_before_read_write_mutex); - - auto scheduled_core_op = m_scheduled_core_ops[core_op_handle]; - - scheduled_core_op->finished_read_frames().increase(stream_name); - m_devices[device_id]->current_cycle_finished_read_frames_d2h[core_op_handle][stream_name]++; - scheduled_core_op->d2h_finished_transferred_frames().decrease(stream_name); - m_fps_accumulator[core_op_handle]++; - - decrease_core_op_counters(core_op_handle); - - auto has_drained_everything = has_core_op_drained_everything(core_op_handle, device_id); - if (scheduled_core_op->is_nms() && has_drained_everything) { - // In NMS networks there is possibility that next wasn't choosen yet - choose_next_core_op(device_id, true); - - // If we didnt choose with treshold or timeout lets choose without treshold - if (!m_devices[device_id]->is_switching_core_op) { - choose_next_core_op(device_id, false); - } - - if (m_devices[device_id]->is_switching_core_op) { - should_notify_next = true; - } - - if (m_should_monitor) { - update_utilization_read_buffers_finished(device_id, core_op_handle, has_drained_everything); - } - } - } - - // Notify stream that frame was read and we have a space in the read buffers (wait_for_write) - m_core_ops_cvs[core_op_handle]->notify_all(); - - if (should_notify_next) { - // Notify all the threads of the next ng, for nms networks this is the only place we know the network was finished (wait_for_read) - m_core_ops_cvs[m_devices[device_id]->next_core_op_handle]->notify_all(); - } - - return HAILO_SUCCESS; -} - -void CoreOpsScheduler::decrease_core_op_counters(const scheduler_core_op_handle_t &core_op_handle) -{ - return m_scheduled_core_ops[core_op_handle]->decrease_current_core_op_counters(); -} - -bool CoreOpsScheduler::has_core_op_drained_everything(const scheduler_core_op_handle_t &core_op_handle, uint32_t device_id) -{ - if (INVALID_CORE_OP_HANDLE == core_op_handle) { - // If no core-op is running, consider it as drained - return true; - } - - if (core_op_all_streams_aborted(core_op_handle)) { - // We treat core-op as drained only if all streams are aborted - to make sure there aren't any ongoing transfers - return true; - } - - if ((!m_scheduled_core_ops[core_op_handle]->is_nms()) && (is_multi_device() || m_scheduled_core_ops[core_op_handle]->use_dynamic_batch_flow())) { - auto current_device_info = m_devices[device_id]; - auto max_transferred_h2d = get_max_value_of_unordered_map(current_device_info->current_cycle_requested_transferred_frames_h2d[core_op_handle]); - auto min_transferred_d2h = get_min_value_of_unordered_map(current_device_info->current_cycle_finished_transferred_frames_d2h[core_op_handle]); - - return (max_transferred_h2d == min_transferred_d2h); - } - - return m_scheduled_core_ops[core_op_handle]->has_core_op_drained_everything(); -} - -hailo_status CoreOpsScheduler::enable_stream(const scheduler_core_op_handle_t &core_op_handle, const std::string &stream_name) -{ - { - std::unique_lock lock(m_before_read_write_mutex); - - if (!m_should_core_op_stop[core_op_handle][stream_name]) { - return HAILO_SUCCESS; - } - - m_should_core_op_stop[core_op_handle][stream_name] = false; - } - m_core_ops_cvs[core_op_handle]->notify_all(); - - return HAILO_SUCCESS; -} - -hailo_status CoreOpsScheduler::disable_stream(const scheduler_core_op_handle_t &core_op_handle, const std::string &stream_name) -{ - { - std::unique_lock lock(m_before_read_write_mutex); - - if (m_should_core_op_stop[core_op_handle][stream_name]) { - return HAILO_SUCCESS; - } - - m_should_core_op_stop[core_op_handle][stream_name] = true; - } - m_core_ops_cvs[core_op_handle]->notify_all(); - - return HAILO_SUCCESS; -} - -hailo_status CoreOpsScheduler::set_timeout(const scheduler_core_op_handle_t &core_op_handle, const std::chrono::milliseconds &timeout, const std::string &/*network_name*/) -{ - // TODO: call in loop for set_timeout with the relevant stream-names (of the given network) - return m_scheduled_core_ops[core_op_handle]->set_timeout(timeout); -} - -hailo_status CoreOpsScheduler::set_threshold(const scheduler_core_op_handle_t &core_op_handle, uint32_t threshold, const std::string &/*network_name*/) -{ - // TODO: call in loop for set_timeout with the relevant stream-names (of the given network) - return m_scheduled_core_ops[core_op_handle]->set_threshold(threshold); -} - -hailo_status CoreOpsScheduler::set_priority(const scheduler_core_op_handle_t &core_op_handle, core_op_priority_t priority, const std::string &/*network_name*/) -{ - CHECK(priority <= HAILO_SCHEDULER_PRIORITY_MAX, HAILO_INVALID_ARGUMENT); - std::unique_lock lock(m_before_read_write_mutex); - auto old_priority = m_scheduled_core_ops[core_op_handle]->get_priority(); - auto &priority_vector = m_core_op_priority[old_priority]; - auto it = std::find(priority_vector.begin(), priority_vector.end(), core_op_handle); - CHECK(it != priority_vector.end(), HAILO_INTERNAL_FAILURE); - - priority_vector.erase(it); - m_scheduled_core_ops[core_op_handle]->set_priority(priority); - m_core_op_priority[priority].push_back(core_op_handle); - - return HAILO_SUCCESS; -} - -bool CoreOpsScheduler::choose_next_core_op(size_t device_id, bool check_threshold) -{ - if (!m_devices[device_id]->is_switching_core_op) { - return CoreOpsSchedulerOracle::choose_next_model(*this, m_devices[device_id]->device_id, check_threshold); - } - return false; -} - -bool CoreOpsScheduler::should_core_op_stop(const scheduler_core_op_handle_t &core_op_handle) -{ - for (const auto &name_flag_pair : m_should_core_op_stop[core_op_handle]) { - if (name_flag_pair.second) { - return true; - } - } - - return false; -} - -bool CoreOpsScheduler::core_op_all_streams_aborted(const scheduler_core_op_handle_t &core_op_handle) -{ - for (const auto &name_flag_pair : m_should_core_op_stop[core_op_handle]) { - if (!name_flag_pair.second) { - return false; - } - } - return true; -} - -void CoreOpsScheduler::notify_all() -{ - { - // Acquire mutex to make sure the notify_all will wake the blocking threads on the cv - std::unique_lock lock(m_before_read_write_mutex); - } - // TODO: consider notify only the relevant ng or stream - for (auto &cng_cvs : m_core_ops_cvs) { - cng_cvs.second->notify_all(); - } -} - -hailo_status CoreOpsScheduler::optimize_streaming_if_enabled(const scheduler_core_op_handle_t &core_op_handle) -{ - auto scheduled_core_op = m_scheduled_core_ops[core_op_handle]; - - if ((!scheduled_core_op->use_dynamic_batch_flow()) && !(scheduled_core_op->is_ready_to_switch() && - CoreOpsSchedulerOracle::should_stop_streaming(*this, scheduled_core_op->get_priority()))) { - for (uint32_t i = 0; i < m_devices.size(); i++) { - uint32_t index = scheduled_core_op->get_last_device_index() + i + 1; - index %= static_cast(m_devices.size()); - auto device_info = m_devices[index]; - // If multi device check for space in the vdma buffers, the send pending buffer is waitable in the current implementation. - // can be removed after dynamic descriptor binding support - if (device_info->current_core_op_handle == core_op_handle && - (!is_multi_device() || (get_min_avail_buffers_count(core_op_handle, device_info->device_id) >= DEFAULT_BURST_SIZE))) { - auto status = send_all_pending_buffers(core_op_handle, device_info->device_id, DEFAULT_BURST_SIZE); - if (HAILO_STREAM_ABORTED_BY_USER == status) { - LOGGER__INFO("send_all_pending_buffers has failed with status=HAILO_STREAM_ABORTED_BY_USER"); - return status; - } - CHECK_SUCCESS(status); - } - } - } - - return HAILO_SUCCESS; -} - -uint16_t CoreOpsScheduler::get_min_avail_buffers_count(const scheduler_core_op_handle_t &core_op_handle, uint32_t device_id) -{ - auto device_info = m_devices[device_id]; - auto scheduled_core_op = m_scheduled_core_ops[core_op_handle]; - - auto max_transferred_h2d = get_max_value_of_unordered_map(device_info->current_cycle_requested_transferred_frames_h2d[core_op_handle]); - auto min_d2h_frames = scheduled_core_op->is_nms() ? get_min_value_of_unordered_map(device_info->current_cycle_finished_read_frames_d2h[core_op_handle]) : - get_min_value_of_unordered_map(device_info->current_cycle_finished_transferred_frames_d2h[core_op_handle]); - auto ongoing_frames = static_cast(max_transferred_h2d - min_d2h_frames); - - uint16_t avail_buffers = static_cast(scheduled_core_op->get_min_input_buffers_count(get_device_count()) - ongoing_frames); - - return avail_buffers; -} - -void CoreOpsScheduler::update_utilization_timers(scheduler_device_idx_t device_id, scheduler_core_op_handle_t core_op_handle) -{ - assert(contains(m_core_op_utilization, core_op_handle)); - - auto time_diff = std::chrono::duration_cast>( - std::chrono::steady_clock::now() - m_last_measured_utilization_timestamp[device_id]).count(); - - m_device_utilization[device_id] += time_diff; - m_core_op_utilization[core_op_handle] += time_diff; -} - -void CoreOpsScheduler::update_utilization_timestamp(scheduler_device_idx_t device_id) -{ - m_last_measured_utilization_timestamp[device_id] = std::chrono::steady_clock::now(); -} - -void CoreOpsScheduler::update_utilization_send_started(scheduler_device_idx_t device_id) -{ - if (m_device_has_drained_everything[device_id]) { - update_device_drained_state(device_id, false); - update_utilization_timestamp(device_id); - } -} - -void CoreOpsScheduler::update_device_drained_state(scheduler_device_idx_t device_id, bool state) -{ - m_device_has_drained_everything[device_id] = state; -} - -void CoreOpsScheduler::update_utilization_read_buffers_finished(scheduler_device_idx_t device_id, - scheduler_core_op_handle_t core_op_handle, bool is_drained_everything) -{ - update_utilization_timers(device_id, core_op_handle); - update_device_drained_state(device_id, is_drained_everything); - if (!is_drained_everything) { - update_utilization_timestamp(device_id); - } -} - -} /* namespace hailort */ \ No newline at end of file diff --git a/hailort/libhailort/src/vdevice/scheduler/network_group_scheduler.hpp b/hailort/libhailort/src/vdevice/scheduler/network_group_scheduler.hpp deleted file mode 100644 index 253e2b9..0000000 --- a/hailort/libhailort/src/vdevice/scheduler/network_group_scheduler.hpp +++ /dev/null @@ -1,144 +0,0 @@ -/** - * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved. - * Distributed under the MIT license (https://opensource.org/licenses/MIT) - **/ -/** - * @file network_group_scheduler.hpp - * @brief Class declaration for CoreOpsScheduler that schedules core-ops to be active depending on the scheduling algorithm. - **/ - -#ifndef _HAILO_NETWORK_GROUP_SCHEDULER_HPP_ -#define _HAILO_NETWORK_GROUP_SCHEDULER_HPP_ - -#include "hailo/hailort.h" -#include "hailo/expected.hpp" - -#include "common/utils.hpp" -#include "common/filesystem.hpp" - -#include "vdevice/scheduler/scheduler_mon.hpp" -#include "vdevice/scheduler/scheduled_core_op_state.hpp" -#include "vdevice/scheduler/scheduled_core_op_cv.hpp" -#include "vdevice/scheduler/scheduler_base.hpp" - - -namespace hailort -{ - -#define INVALID_CORE_OP_HANDLE (UINT32_MAX) -#define INVALID_DEVICE_ID (UINT32_MAX) - -using scheduler_core_op_handle_t = uint32_t; -using core_op_priority_t = uint8_t; -using scheduler_device_idx_t = uint32_t; - -class CoreOpsScheduler; -using CoreOpsSchedulerPtr = std::shared_ptr; - -// We use mostly weak pointer for the scheduler to prevent circular dependency of the pointers -using CoreOpsSchedulerWeakPtr = std::weak_ptr; - -using stream_name_t = std::string; - -class CoreOpsScheduler : public SchedulerBase -{ -public: - static Expected create_round_robin(uint32_t device_count, std::vector &devices_bdf_id, - std::vector &devices_arch); - CoreOpsScheduler(hailo_scheduling_algorithm_t algorithm, uint32_t device_count, std::vector &devices_bdf_id, - std::vector &devices_arch); - - virtual ~CoreOpsScheduler(); - CoreOpsScheduler(const CoreOpsScheduler &other) = delete; - CoreOpsScheduler &operator=(const CoreOpsScheduler &other) = delete; - CoreOpsScheduler &operator=(CoreOpsScheduler &&other) = delete; - CoreOpsScheduler(CoreOpsScheduler &&other) noexcept = delete; - - Expected add_core_op(std::shared_ptr added_core_op); - - hailo_status wait_for_write(const scheduler_core_op_handle_t &core_op_handle, const std::string &stream_name, - const std::chrono::milliseconds &timeout, const std::function &should_cancel); - hailo_status signal_write_finish(const scheduler_core_op_handle_t &core_op_handle, const std::string &stream_name, bool did_write_fail); - Expected wait_for_read(const scheduler_core_op_handle_t &core_op_handle, const std::string &stream_name, - const std::chrono::milliseconds &timeout); - hailo_status signal_read_finish(const scheduler_core_op_handle_t &core_op_handle, const std::string &stream_name, uint32_t device_id); - - hailo_status enable_stream(const scheduler_core_op_handle_t &core_op_handle, const std::string &stream_name); - hailo_status disable_stream(const scheduler_core_op_handle_t &core_op_handle, const std::string &stream_name); - - hailo_status set_timeout(const scheduler_core_op_handle_t &core_op_handle, const std::chrono::milliseconds &timeout, const std::string &network_name); - hailo_status set_threshold(const scheduler_core_op_handle_t &core_op_handle, uint32_t threshold, const std::string &network_name); - hailo_status set_priority(const scheduler_core_op_handle_t &core_op_handle, core_op_priority_t priority, const std::string &network_name); - - virtual ReadyInfo is_core_op_ready(const scheduler_core_op_handle_t &core_op_handle, bool check_threshold) override; - virtual bool has_core_op_drained_everything(const scheduler_core_op_handle_t &core_op_handle, uint32_t device_id) override; - - void notify_all(); - -protected: - bool choose_next_core_op(size_t device_id, bool check_threshold); - - std::unordered_map m_changing_current_batch_size; - std::unordered_map> m_should_core_op_stop; - -private: - hailo_status switch_core_op(const scheduler_core_op_handle_t &core_op_handle, uint32_t device_id, - bool keep_nn_config = false); - void reset_current_core_op_timestamps(uint32_t device_id); - - hailo_status send_all_pending_buffers(const scheduler_core_op_handle_t &core_op_handle, uint32_t device_id, uint32_t burst_size); - hailo_status send_pending_buffer(const scheduler_core_op_handle_t &core_op_handle, const std::string &stream_name, uint32_t device_id); - - void decrease_core_op_counters(const scheduler_core_op_handle_t &core_op_handle); - bool should_core_op_stop(const scheduler_core_op_handle_t &core_op_handle); - bool core_op_all_streams_aborted(const scheduler_core_op_handle_t &core_op_handle); - - std::string get_core_op_name(const scheduler_core_op_handle_t &core_op_handle); - bool is_core_op_active(const scheduler_core_op_handle_t &core_op_handle); - bool is_multi_device(); - hailo_status optimize_streaming_if_enabled(const scheduler_core_op_handle_t &network_group_handle); - uint16_t get_min_avail_buffers_count(const scheduler_core_op_handle_t &network_group_handle, uint32_t device_id); - - hailo_status start_mon(); - void time_dependent_events_cycle_calc(); - void log_monitor_device_infos(ProtoMon &mon); - void log_monitor_networks_infos(ProtoMon &mon); - void log_monitor_frames_infos(ProtoMon &mon); - void update_utilization_timers(scheduler_device_idx_t device_id, scheduler_core_op_handle_t core_op_handle); - void update_utilization_timestamp(scheduler_device_idx_t device_id); - void update_utilization_send_started(scheduler_device_idx_t device_id); - void update_device_drained_state(scheduler_device_idx_t device_id, bool state); - void update_utilization_read_buffers_finished(scheduler_device_idx_t device_id, scheduler_core_op_handle_t core_op_hanle, bool is_drained_everything); - hailo_status set_h2d_frames_counters(const scheduler_core_op_handle_t &core_op_handle, const std::string &stream_name, - ProtoMonStreamFramesInfo &stream_frames_info); - hailo_status set_d2h_frames_counters(const scheduler_core_op_handle_t &core_op_handle, const std::string &stream_name, - ProtoMonStreamFramesInfo &stream_frames_info); -#if defined(__GNUC__) - Expected> open_temp_mon_file(); - void dump_state(); -#endif - - std::vector> m_scheduled_core_ops; - std::mutex m_before_read_write_mutex; - std::unordered_map> m_core_ops_cvs; - - // Params for the scheduler MON - std::atomic_bool m_should_monitor; - std::thread m_mon_thread; - EventPtr m_mon_shutdown_event; -#if defined(__GNUC__) - std::shared_ptr m_mon_tmp_output; -#endif - std::chrono::time_point m_last_measured_timestamp; - double m_last_measured_time_duration; - std::unordered_map m_device_utilization; - std::unordered_map m_device_has_drained_everything; - std::unordered_map> m_last_measured_utilization_timestamp; - // TODO: Consider adding Accumulator classes for more info (min, max, mean, etc..) - std::unordered_map m_core_op_utilization; - std::unordered_map m_fps_accumulator; -}; - -} /* namespace hailort */ - -#endif /* _HAILO_NETWORK_GROUP_SCHEDULER_HPP_ */ diff --git a/hailort/libhailort/src/vdevice/scheduler/scheduled_core_op_cv.hpp b/hailort/libhailort/src/vdevice/scheduler/scheduled_core_op_cv.hpp index 9b6f8af..ef314a0 100644 --- a/hailort/libhailort/src/vdevice/scheduler/scheduled_core_op_cv.hpp +++ b/hailort/libhailort/src/vdevice/scheduler/scheduled_core_op_cv.hpp @@ -15,8 +15,6 @@ #include "common/utils.hpp" -#include "vdevice/scheduler/scheduler_mon.hpp" - #include diff --git a/hailort/libhailort/src/vdevice/scheduler/scheduled_core_op_state.cpp b/hailort/libhailort/src/vdevice/scheduler/scheduled_core_op_state.cpp index 037fb57..bc4fa21 100644 --- a/hailort/libhailort/src/vdevice/scheduler/scheduled_core_op_state.cpp +++ b/hailort/libhailort/src/vdevice/scheduler/scheduled_core_op_state.cpp @@ -19,26 +19,25 @@ namespace hailort #define SINGLE_CONTEXT_BATCH_SIZE (1) ScheduledCoreOp::ScheduledCoreOp(std::shared_ptr core_op, std::chrono::milliseconds timeout, - uint16_t max_batch_size, StreamInfoVector &stream_infos, std::string core_op_name) : + uint16_t max_batch_size, bool use_dynamic_batch_flow, StreamInfoVector &stream_infos, std::string core_op_name) : m_core_op(core_op), m_last_run_time_stamp(std::chrono::steady_clock::now()), m_timeout(std::move(timeout)), m_frame_was_sent(false), m_max_batch_size(max_batch_size), + m_use_dynamic_batch_flow(use_dynamic_batch_flow), m_priority(HAILO_SCHEDULER_PRIORITY_NORMAL), - m_last_device_index(INVALID_DEVICE_ID), + m_last_device_id(INVALID_DEVICE_ID), m_core_op_name(core_op_name), m_inputs_names(), m_outputs_names(), - m_is_nms(false), - m_ready_to_switch(false) + m_is_nms(false) { // Prepare empty counters for the added core-op for (const auto &stream_info : stream_infos) { m_min_threshold_per_stream[stream_info.name] = DEFAULT_SCHEDULER_MIN_THRESHOLD; if (HAILO_H2D_STREAM == stream_info.direction) { - m_requested_write_frames.insert(stream_info.name); - m_finished_write_frames.insert(stream_info.name); + m_pending_to_send_frames.insert(stream_info.name); m_h2d_finished_transferred_frames.insert(stream_info.name); m_inputs_names.push_back(stream_info.name); } else { @@ -46,7 +45,7 @@ ScheduledCoreOp::ScheduledCoreOp(std::shared_ptr core_op, std::chrono::m m_finished_read_frames.insert(stream_info.name); m_d2h_finished_transferred_frames.insert(stream_info.name); m_outputs_names.push_back(stream_info.name); - m_output_streams_read_orders[stream_info.name] = std::queue(); + if (HAILO_FORMAT_ORDER_HAILO_NMS == stream_info.format.order) { m_is_nms = true; } @@ -58,93 +57,44 @@ Expected> ScheduledCoreOp::create(std::shared_p { auto timeout = DEFAULT_SCHEDULER_TIMEOUT; - uint16_t max_batch_size = CONTROL_PROTOCOL__IGNORE_DYNAMIC_BATCH_SIZE; - if (added_core_op->get_supported_features().multi_context) { - auto batch_size = added_core_op->get_stream_batch_size(stream_infos[0].name); - CHECK_EXPECTED(batch_size); - if (batch_size.value() > SINGLE_CONTEXT_BATCH_SIZE) { - max_batch_size = batch_size.release(); - } - } - - return make_shared_nothrow(added_core_op, timeout, max_batch_size, stream_infos, added_core_op->name()); -} + auto batch_size_expected = added_core_op->get_stream_batch_size(stream_infos[0].name); + CHECK_EXPECTED(batch_size_expected); + auto max_batch_size = batch_size_expected.release(); -bool ScheduledCoreOp::has_enough_space_in_read_buffers(uint32_t ongoing_frames) -{ - auto output_streams = m_core_op->get_output_streams(); - for (auto &output_stream : output_streams) { - OutputStreamBase &vdevice_output = static_cast(output_stream.get()); - if (auto pending_frames_size = vdevice_output.get_buffer_frames_size()) { - if (pending_frames_size.value() <= ongoing_frames) { - return false; - } - // If couldnt get pending frames size and count (e.g. NMS layer), assume we have space - scheduler switch will prevent deadlocks here - } - } - return true; + // DEFAULT_BATCH_SIZE and SINGLE_CONTEXT_BATCH_SIZE support streaming and therfore we are not using dynamic batch flow + auto use_dynamic_batch_flow = added_core_op->get_supported_features().multi_context && (max_batch_size > SINGLE_CONTEXT_BATCH_SIZE); + return make_shared_nothrow(added_core_op, timeout, max_batch_size, use_dynamic_batch_flow, stream_infos, added_core_op->name()); } -uint16_t ScheduledCoreOp::get_min_input_buffers_count(uint32_t device_count) +uint16_t ScheduledCoreOp::get_min_input_buffers_count() { auto input_streams = m_core_op->get_input_streams(); uint16_t buffers_count = UINT16_MAX; for (auto &input_stream : input_streams) { InputStreamBase &vdevice_input = static_cast(input_stream.get()); if (auto pending_frames_size = vdevice_input.get_buffer_frames_size()) { - buffers_count = std::min(buffers_count, static_cast(pending_frames_size.value() / device_count)); + buffers_count = std::min(buffers_count, static_cast(pending_frames_size.value())); } } return buffers_count; } -bool ScheduledCoreOp::has_input_written_most_frames(const std::string &stream_name) -{ - auto total_writes = total_written_frames_count(); - return total_writes[stream_name] == get_max_value_of_unordered_map(total_writes); -} - -// TODO: Use get_pre_transfer_h2d_frames_count + get_h2d_transferred_frames_count -// TODO: Avoid returning map (malloc) -std::unordered_map ScheduledCoreOp::total_written_frames_count() -{ - std::unordered_map write_sum; - for (const auto &name : get_inputs_names()) { - write_sum[name] = m_requested_write_frames[name] + m_finished_write_frames[name] - + m_h2d_finished_transferred_frames[name]; - } - return write_sum; -} - -// TODO: Use max(m_d2h_finished_transferred_frames) == 0 instead -bool ScheduledCoreOp::has_pending_frames() +uint16_t ScheduledCoreOp::get_min_output_buffers_count() { - auto h2d_transferred_frames_count = m_h2d_finished_transferred_frames.get_max_value(); - for (const auto &name : get_outputs_names()) { - if (m_finished_read_frames[name] < h2d_transferred_frames_count) { - return true; + auto output_streams = m_core_op->get_output_streams(); + uint16_t buffers_count = UINT16_MAX; + for (auto &output_stream : output_streams) { + OutputStreamBase &vdevice_input = static_cast(output_stream.get()); + if (auto pending_frames_size = vdevice_input.get_buffer_frames_size()) { + buffers_count = std::min(buffers_count, static_cast(pending_frames_size.value())); } } - return false; -} - -bool ScheduledCoreOp::can_stream_read(const std::string &stream_name) -{ - return !m_output_streams_read_orders[stream_name].empty(); -} - -bool ScheduledCoreOp::can_stream_write(const std::string &stream_name) -{ - auto total_written_frames = total_written_frames_count()[stream_name]; - auto min_finished_read = finished_read_frames_min_value(); - auto ongoing_frames = (min_finished_read < total_written_frames) ? (total_written_frames - min_finished_read) : 0; - return has_enough_space_in_read_buffers(ongoing_frames); + return buffers_count; } - bool ScheduledCoreOp::use_dynamic_batch_flow() { - return (CONTROL_PROTOCOL__IGNORE_DYNAMIC_BATCH_SIZE != m_max_batch_size); + return m_use_dynamic_batch_flow; } bool ScheduledCoreOp::has_core_op_drained_everything() @@ -160,11 +110,10 @@ bool ScheduledCoreOp::has_core_op_drained_everything() void ScheduledCoreOp::decrease_current_core_op_counters() { - // Decrease only if counter is 2 or bigger because reaching 0 can cause states to change - if (!m_h2d_finished_transferred_frames.all_values_bigger_or_equal(2)) { + if (!m_h2d_finished_transferred_frames.all_values_bigger_or_equal(1)) { return; } - if (!m_finished_read_frames.all_values_bigger_or_equal(2)) { + if (!m_finished_read_frames.all_values_bigger_or_equal(1)) { return; } @@ -176,15 +125,6 @@ void ScheduledCoreOp::decrease_current_core_op_counters() } } -uint32_t ScheduledCoreOp::get_pre_transfer_h2d_frames_count() -{ - std::unordered_map write_sum; - for (const auto &name : get_inputs_names()) { - write_sum[name] = m_requested_write_frames[name] + m_finished_write_frames[name]; - } - return get_max_value_of_unordered_map(write_sum); -} - hailo_status ScheduledCoreOp::set_timeout(const std::chrono::milliseconds &timeout, const stream_name_t &stream_name) { CHECK(!m_frame_was_sent, HAILO_INVALID_OPERATION, @@ -199,7 +139,7 @@ hailo_status ScheduledCoreOp::set_timeout(const std::chrono::milliseconds &timeo hailo_status ScheduledCoreOp::set_threshold(uint32_t threshold, const stream_name_t &stream_name) { - CHECK((CONTROL_PROTOCOL__IGNORE_DYNAMIC_BATCH_SIZE == m_max_batch_size) || + CHECK(!use_dynamic_batch_flow() || (threshold <= m_max_batch_size), HAILO_INVALID_ARGUMENT, "Threshold must be equal or lower than the maximum batch size!"); CHECK(!m_frame_was_sent, HAILO_INVALID_OPERATION, @@ -226,14 +166,14 @@ void ScheduledCoreOp::set_priority(core_op_priority_t priority) m_priority = priority; } -uint32_t ScheduledCoreOp::get_last_device_index() +device_id_t ScheduledCoreOp::get_last_device() { - return m_last_device_index; + return m_last_device_id; } -void ScheduledCoreOp::set_last_device_index(uint32_t device_index) +void ScheduledCoreOp::set_last_device(const device_id_t &device_id) { - m_last_device_index = device_index; + m_last_device_id = device_id; } std::string ScheduledCoreOp::get_core_op_name() @@ -276,35 +216,26 @@ Expected ScheduledCoreOp::get_threshold(const stream_name_t &stream_na uint16_t ScheduledCoreOp::get_max_batch_size() { - if (!use_dynamic_batch_flow()) { - return SINGLE_CONTEXT_BATCH_SIZE; + if (CONTROL_PROTOCOL__IGNORE_DYNAMIC_BATCH_SIZE == m_max_batch_size) { + // In nms networks we dont know the output buffers count and therfore we are using the input buffer count + return is_nms() ? get_min_input_buffers_count() : get_min_output_buffers_count(); } return m_max_batch_size; } -Counter &ScheduledCoreOp::requested_write_frames() -{ - return m_requested_write_frames; -} - -std::atomic_uint32_t &ScheduledCoreOp::requested_write_frames(const stream_name_t &stream_name) -{ - return m_requested_write_frames[stream_name]; -} - -Counter &ScheduledCoreOp::finished_write_frames() +Counter &ScheduledCoreOp::pending_to_send_frames() { - return m_finished_write_frames; + return m_pending_to_send_frames; } -std::atomic_uint32_t &ScheduledCoreOp::finished_write_frames(const stream_name_t &stream_name) +std::atomic_uint32_t &ScheduledCoreOp::pending_to_send_frames(const stream_name_t &stream_name) { - return m_finished_write_frames[stream_name]; + return m_pending_to_send_frames[stream_name]; } -uint32_t ScheduledCoreOp::finished_write_frames_min_value() +uint32_t ScheduledCoreOp::pending_to_send_frames_min_value() { - return m_finished_write_frames.get_min_value(); + return m_pending_to_send_frames.get_min_value(); } Counter &ScheduledCoreOp::h2d_finished_transferred_frames() @@ -317,6 +248,11 @@ std::atomic_uint32_t &ScheduledCoreOp::h2d_finished_transferred_frames(const str return m_h2d_finished_transferred_frames[stream_name]; } +uint32_t ScheduledCoreOp::h2d_finished_transferred_frames_max_value() +{ + return m_h2d_finished_transferred_frames.get_max_value(); +} + Counter &ScheduledCoreOp::requested_read_frames() { return m_requested_read_frames; @@ -362,36 +298,4 @@ const std::vector &ScheduledCoreOp::get_outputs_names() return m_outputs_names; } -void ScheduledCoreOp::push_device_index(uint32_t device_index) -{ - for (auto& stream_name : get_outputs_names()) { - m_output_streams_read_orders[stream_name].push(device_index); - } -} - -uint32_t ScheduledCoreOp::pop_device_index(const stream_name_t &stream_name) -{ - assert(contains(m_output_streams_read_orders, stream_name)); - assert(!m_output_streams_read_orders[stream_name].empty()); - auto device_index = m_output_streams_read_orders[stream_name].front(); - m_output_streams_read_orders[stream_name].pop(); - - return device_index; -} - -bool ScheduledCoreOp::is_ready_to_switch() -{ - return m_ready_to_switch; -} - -void ScheduledCoreOp::mark_ready_to_switch() -{ - m_ready_to_switch = true; -} - -void ScheduledCoreOp::mark_unready_to_switch() -{ - m_ready_to_switch = false; -} - } /* namespace hailort */ diff --git a/hailort/libhailort/src/vdevice/scheduler/scheduled_core_op_state.hpp b/hailort/libhailort/src/vdevice/scheduler/scheduled_core_op_state.hpp index 29b50ae..e18eefa 100644 --- a/hailort/libhailort/src/vdevice/scheduler/scheduled_core_op_state.hpp +++ b/hailort/libhailort/src/vdevice/scheduler/scheduled_core_op_state.hpp @@ -3,7 +3,7 @@ * Distributed under the MIT license (https://opensource.org/licenses/MIT) **/ /** - * @file network_group_scheduler.hpp + * @file scheduler.hpp * @brief Class declaration for CoreOpsScheduler that schedules core-ops to be active depending on the scheduling algorithm. **/ @@ -18,6 +18,8 @@ #include "core_op/core_op.hpp" +#include "scheduler_base.hpp" + #include #include @@ -27,7 +29,7 @@ namespace hailort #define DEFAULT_SCHEDULER_TIMEOUT (std::chrono::milliseconds(0)) #define DEFAULT_SCHEDULER_MIN_THRESHOLD (0) -#define INVALID_DEVICE_ID (UINT32_MAX) +#define INVALID_DEVICE_ID (std::to_string(UINT32_MAX)) using stream_name_t = std::string; using core_op_priority_t = uint8_t; @@ -111,87 +113,70 @@ public: ScheduledCoreOp &operator=(ScheduledCoreOp &&other) = delete; ScheduledCoreOp(ScheduledCoreOp &&other) noexcept = delete; - bool has_enough_space_in_read_buffers(uint32_t ongoing_frames); - uint16_t get_min_input_buffers_count(uint32_t device_count); - bool has_input_written_most_frames(const std::string &stream_name); - std::unordered_map total_written_frames_count(); - bool has_pending_frames(); - bool can_stream_read(const std::string &stream_name); - bool can_stream_write(const std::string &stream_name); - bool use_dynamic_batch_flow(); - bool has_core_op_drained_everything(); - void decrease_current_core_op_counters(); - uint32_t get_pre_transfer_h2d_frames_count(); - - bool is_ready_to_switch(); - void mark_ready_to_switch(); - void mark_unready_to_switch(); - std::string get_core_op_name(); - std::shared_ptr get_core_op(); + const std::vector &get_outputs_names(); + const std::vector &get_inputs_names(); - void mark_frame_sent(); + uint16_t get_min_input_buffers_count(); + uint16_t get_min_output_buffers_count(); - std::chrono::time_point get_last_run_timestamp(); - void set_last_run_timestamp(const std::chrono::time_point ×tamp); + uint16_t get_max_batch_size(); + bool use_dynamic_batch_flow(); + bool has_core_op_drained_everything(); + + device_id_t get_last_device(); + void set_last_device(const device_id_t &device_id); Expected get_timeout(const stream_name_t &stream_name = ""); hailo_status set_timeout(const std::chrono::milliseconds &timeout, const stream_name_t &stream_name = ""); Expected get_threshold(const stream_name_t &stream_name); hailo_status set_threshold(uint32_t threshold, const stream_name_t &stream_name = ""); - core_op_priority_t get_priority(); void set_priority(core_op_priority_t priority); - uint32_t get_last_device_index(); - void set_last_device_index(uint32_t device_index); + std::chrono::time_point get_last_run_timestamp(); + void set_last_run_timestamp(const std::chrono::time_point ×tamp); - uint16_t get_max_batch_size(); + void mark_frame_sent(); + void decrease_current_core_op_counters(); - Counter &requested_write_frames(); - std::atomic_uint32_t &requested_write_frames(const stream_name_t &stream_name); - Counter &finished_write_frames(); - std::atomic_uint32_t &finished_write_frames(const stream_name_t &stream_name); - uint32_t finished_write_frames_min_value(); + Counter &pending_to_send_frames(); + std::atomic_uint32_t &pending_to_send_frames(const stream_name_t &stream_name); + uint32_t pending_to_send_frames_min_value(); Counter &h2d_finished_transferred_frames(); std::atomic_uint32_t &h2d_finished_transferred_frames(const stream_name_t &stream_name); + uint32_t h2d_finished_transferred_frames_max_value(); Counter &requested_read_frames(); std::atomic_uint32_t &requested_read_frames(const stream_name_t &stream_name); Counter &d2h_finished_transferred_frames(); std::atomic_uint32_t &d2h_finished_transferred_frames(const stream_name_t &stream_name); + Counter &finished_read_frames(); std::atomic_uint32_t &finished_read_frames(const stream_name_t &stream_name); uint32_t finished_read_frames_min_value(); - const std::vector &get_outputs_names(); - const std::vector &get_inputs_names(); bool is_nms() { return m_is_nms; } - void push_device_index(uint32_t device_index); - uint32_t pop_device_index(const stream_name_t &stream_name); - ScheduledCoreOp(std::shared_ptr core_op, std::chrono::milliseconds timeout, - uint16_t max_batch_size, StreamInfoVector &stream_infos, std::string core_op_name); + uint16_t max_batch_size, bool use_dynamic_batch_flow, StreamInfoVector &stream_infos, std::string core_op_name); private: std::shared_ptr m_core_op; - std::chrono::time_point m_last_run_time_stamp; std::chrono::milliseconds m_timeout; - std::atomic_bool m_frame_was_sent; uint16_t m_max_batch_size; + bool m_use_dynamic_batch_flow; - Counter m_requested_write_frames; // 'wait_for_write()' has been called - Counter m_finished_write_frames; // 'signal_finished_write()' has been called - frame is written in buffer (writes are a-sync) + Counter m_pending_to_send_frames; // 'signal_frame_pending_to_send()' has been called - frame is written in buffer (writes are a-sync) Counter m_h2d_finished_transferred_frames; // Frame has been transferred to device (intrpt was raised) @@ -204,19 +189,14 @@ private: core_op_priority_t m_priority; - std::atomic_uint32_t m_last_device_index; + device_id_t m_last_device_id; std::string m_core_op_name; std::vector m_inputs_names; std::vector m_outputs_names; - std::unordered_map> m_output_streams_read_orders; - bool m_is_nms; - - // TODO: Remove this flag when the old scheduling mode will be deprecated - std::atomic_bool m_ready_to_switch; }; } /* namespace hailort */ diff --git a/hailort/libhailort/src/vdevice/scheduler/scheduled_stream.cpp b/hailort/libhailort/src/vdevice/scheduler/scheduled_stream.cpp new file mode 100644 index 0000000..f1b64f4 --- /dev/null +++ b/hailort/libhailort/src/vdevice/scheduler/scheduled_stream.cpp @@ -0,0 +1,361 @@ +/** + * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved. + * Distributed under the MIT license (https://opensource.org/licenses/MIT) +**/ +/** + * @file scheduled_stream.cpp + * @brief Internal stream implementation for scheduled streams + * + **/ + +#include "scheduled_stream.hpp" + +#include "utils/profiler/tracer_macros.hpp" + +namespace hailort +{ + +/** Input stream **/ +Expected> ScheduledInputStream::create( + std::map> &&streams, + const scheduler_core_op_handle_t &core_op_handle, + EventPtr &&core_op_activated_event, + const LayerInfo &layer_info, + CoreOpsSchedulerWeakPtr core_ops_scheduler) +{ + auto status = HAILO_UNINITIALIZED; + auto local_vdevice_stream = make_unique_nothrow(std::move(streams), + core_op_handle, std::move(core_op_activated_event), layer_info, + core_ops_scheduler, status); + CHECK_NOT_NULL_AS_EXPECTED(local_vdevice_stream, HAILO_OUT_OF_HOST_MEMORY); + CHECK_SUCCESS_AS_EXPECTED(status); + + return local_vdevice_stream; +} + +hailo_status ScheduledInputStreamBase::abort() +{ + return abort_impl(m_core_op_handle); +} + +hailo_status ScheduledInputStreamBase::abort_impl(scheduler_core_op_handle_t core_op_handle) +{ + auto status = HAILO_SUCCESS; // Best effort + assert(1 == m_streams.size()); + auto abort_status = m_streams.begin()->second.get().abort(); + if (HAILO_SUCCESS != status) { + LOGGER__ERROR("Failed to abort input stream. (status: {} device: {})", status, m_streams.begin()->second.get().get_dev_id()); + status = abort_status; + } + + auto core_ops_scheduler = m_core_ops_scheduler.lock(); + CHECK(core_ops_scheduler, HAILO_INTERNAL_FAILURE); + + auto disable_status = core_ops_scheduler->disable_stream(core_op_handle, name()); + if (HAILO_SUCCESS != disable_status) { + LOGGER__ERROR("Failed to disable stream in the core-op scheduler. (status: {})", disable_status); + status = disable_status; + } + + return status; +} + +hailo_status ScheduledInputStreamBase::clear_abort() +{ + return clear_abort_impl(m_core_op_handle); +} + +hailo_status ScheduledInputStreamBase::flush() +{ + auto core_ops_scheduler = m_core_ops_scheduler.lock(); + CHECK(core_ops_scheduler, HAILO_INTERNAL_FAILURE); + + auto status = core_ops_scheduler->flush_pending_buffers(m_core_op_handle, name(), get_timeout()); + if (HAILO_STREAM_ABORTED_BY_USER == status) { + LOGGER__INFO("Got HAILO_STREAM_ABORTED_BY_USER in flush of stream {}", name()); + return status; + } + CHECK_SUCCESS(status); + + return VDeviceInputStreamBase::flush(); +} + +hailo_status ScheduledInputStreamBase::clear_abort_impl(scheduler_core_op_handle_t core_op_handle) +{ + auto status = HAILO_SUCCESS; // Best effort + assert(1 == m_streams.size()); + auto clear_abort_status = m_streams.begin()->second.get().clear_abort(); + if ((HAILO_SUCCESS != clear_abort_status) && (HAILO_STREAM_NOT_ACTIVATED != clear_abort_status)) { + LOGGER__ERROR("Failed to clear abort input stream. (status: {} device: {})", clear_abort_status, m_streams.begin()->second.get().get_dev_id()); + status = clear_abort_status; + } + + auto core_ops_scheduler = m_core_ops_scheduler.lock(); + CHECK(core_ops_scheduler, HAILO_INTERNAL_FAILURE); + + auto enable_status = core_ops_scheduler->enable_stream(core_op_handle, name()); + if (HAILO_SUCCESS != enable_status) { + LOGGER__ERROR("Failed to enable stream in the core-op scheduler. (status: {})", enable_status); + status = enable_status; + } + + return status; +} + +hailo_status ScheduledInputStream::write_impl(const MemoryView &buffer, const std::function &should_cancel) +{ + auto core_ops_scheduler = m_core_ops_scheduler.lock(); + CHECK(core_ops_scheduler, HAILO_INTERNAL_FAILURE); + + assert(1 == m_streams.size()); + auto status = m_streams.begin()->second.get().write_buffer_only(buffer, should_cancel); + if (HAILO_SUCCESS != status) { + LOGGER__INFO("Write to stream has failed! status = {}", status); + return status; + } + + auto write_finish_status = core_ops_scheduler->signal_frame_pending_to_send(m_core_op_handle, name()); + if (HAILO_STREAM_ABORTED_BY_USER == write_finish_status) { + return write_finish_status; + } + CHECK_SUCCESS(write_finish_status); + + return HAILO_SUCCESS; +} + +Expected> ScheduledAsyncInputStream::create( + std::map> &&streams, + const scheduler_core_op_handle_t &core_op_handle, + EventPtr &&core_op_activated_event, + const LayerInfo &layer_info, + CoreOpsSchedulerWeakPtr core_ops_scheduler) +{ + auto max_queue_size_per_stream = streams.begin()->second.get().get_buffer_frames_size(); + CHECK_EXPECTED(max_queue_size_per_stream); + const auto max_queue_size = max_queue_size_per_stream.value() * streams.size(); + + auto status = HAILO_UNINITIALIZED; + auto local_vdevice_stream = make_unique_nothrow(std::move(streams), + core_op_handle, std::move(core_op_activated_event), layer_info, + core_ops_scheduler, max_queue_size, status); + CHECK_NOT_NULL_AS_EXPECTED(local_vdevice_stream, HAILO_OUT_OF_HOST_MEMORY); + CHECK_SUCCESS_AS_EXPECTED(status); + + return local_vdevice_stream; +} + +hailo_status ScheduledAsyncInputStream::send_pending_buffer(const device_id_t &device_id) +{ + // TODO HRT-10583 - allow option to remove reorder queue + auto pending_buffer = m_pending_buffers.dequeue(); + CHECK_EXPECTED_AS_STATUS(pending_buffer); + + pending_buffer->callback = m_callback_reorder_queue.wrap_callback(pending_buffer->callback); + assert(contains(m_streams, device_id)); + auto status = m_streams.at(device_id).get().write_async(pending_buffer.release()); + if (HAILO_SUCCESS != status) { + m_callback_reorder_queue.cancel_last_callback(); + } + return status; +} + +hailo_status ScheduledAsyncInputStream::wait_for_async_ready(size_t transfer_size, std::chrono::milliseconds timeout) +{ + (void)transfer_size; + return m_pending_buffers.wait_for_room(timeout); +} + +hailo_status ScheduledAsyncInputStream::write_async(TransferRequest &&transfer_request) +{ + auto core_ops_scheduler = m_core_ops_scheduler.lock(); + CHECK(core_ops_scheduler, HAILO_INTERNAL_FAILURE); + + auto status = m_pending_buffers.enqueue(std::move(transfer_request)); + CHECK_SUCCESS(status); + + auto write_finish_status = core_ops_scheduler->signal_frame_pending_to_send(m_core_op_handle, name()); + if (HAILO_STREAM_ABORTED_BY_USER == write_finish_status) { + return write_finish_status; + } + CHECK_SUCCESS(write_finish_status); + + return HAILO_SUCCESS; +} + +Expected ScheduledAsyncInputStream::get_async_max_queue_size() const +{ + return m_pending_buffers.max_size(); +} + + +hailo_status ScheduledAsyncInputStream::abort() +{ + m_pending_buffers.abort(); + return ScheduledInputStreamBase::abort(); +} + +hailo_status ScheduledAsyncInputStream::clear_abort() +{ + m_pending_buffers.clear_abort(); + return ScheduledInputStreamBase::clear_abort(); +} + +hailo_status ScheduledAsyncInputStream::write_impl(const MemoryView &, const std::function &) +{ + LOGGER__ERROR("Sync write is not supported by async streams"); + return HAILO_NOT_SUPPORTED; +} + +/** Output stream **/ +Expected> ScheduledOutputStream::create( + std::map> &&streams, + const scheduler_core_op_handle_t &core_op_handle, + const LayerInfo &layer_info, + EventPtr &&core_op_activated_event, + CoreOpsSchedulerWeakPtr core_ops_scheduler) +{ + auto status = HAILO_UNINITIALIZED; + auto stream = make_unique_nothrow(std::move(streams), core_op_handle, + layer_info, std::move(core_op_activated_event), core_ops_scheduler, status); + CHECK_NOT_NULL_AS_EXPECTED(stream, HAILO_OUT_OF_HOST_MEMORY); + CHECK_SUCCESS_AS_EXPECTED(status); + return stream; +} + +ScheduledOutputStream::ScheduledOutputStream( + std::map> &&streams, + const scheduler_core_op_handle_t &core_op_handle, + const LayerInfo &layer_info, + EventPtr &&core_op_activated_event, + CoreOpsSchedulerWeakPtr core_ops_scheduler, + hailo_status &status) : ScheduledOutputStreamBase(std::move(streams), core_op_handle, layer_info, + std::move(core_op_activated_event), core_ops_scheduler, status) + { + for (auto &stream_pair : m_streams) { + stream_pair.second.get().register_interrupt_callback( + [scheduler_weak=m_core_ops_scheduler, core_op_handle=m_core_op_handle, name=name(), device_id=stream_pair.first]() { + auto scheduler = scheduler_weak.lock(); + assert(scheduler); + scheduler->signal_frame_transferred_d2h(core_op_handle, name, device_id); + } + ); + } + } + +hailo_status ScheduledOutputStream::set_next_device_to_read(const device_id_t &device_id) +{ + std::lock_guard lock(m_device_read_order_mutex); + m_device_read_order.push(device_id); + return HAILO_SUCCESS; +} + +hailo_status ScheduledOutputStreamBase::abort() +{ + return abort_impl(m_core_op_handle); +} + +hailo_status ScheduledOutputStreamBase::abort_impl(scheduler_core_op_handle_t core_op_handle) +{ + auto status = HAILO_SUCCESS; // Best effort + for (const auto &pair : m_streams) { + auto &stream = pair.second; + auto abort_status = stream.get().abort(); + if (HAILO_SUCCESS != status) { + LOGGER__ERROR("Failed to abort output stream. (status: {} device: {})", status, stream.get().get_dev_id()); + status = abort_status; + } + } + auto core_ops_scheduler = m_core_ops_scheduler.lock(); + CHECK(core_ops_scheduler, HAILO_INTERNAL_FAILURE); + + auto disable_status = core_ops_scheduler->disable_stream(core_op_handle, name()); + if (HAILO_SUCCESS != disable_status) { + LOGGER__ERROR("Failed to disable stream in the core-op scheduler. (status: {})", disable_status); + status = disable_status; + } + + return status; +} + +hailo_status ScheduledOutputStreamBase::clear_abort() +{ + return clear_abort_impl(m_core_op_handle); +} + +hailo_status ScheduledOutputStreamBase::clear_abort_impl(scheduler_core_op_handle_t core_op_handle) +{ + auto status = HAILO_SUCCESS; // Best effort + for (const auto &pair : m_streams) { + auto &stream = pair.second; + auto clear_abort_status = stream.get().clear_abort(); + if ((HAILO_SUCCESS != clear_abort_status) && (HAILO_STREAM_NOT_ACTIVATED != clear_abort_status)) { + LOGGER__ERROR("Failed to clear abort output stream. (status: {} device: {})", clear_abort_status, stream.get().get_dev_id()); + status = clear_abort_status; + } + } + + auto core_ops_scheduler = m_core_ops_scheduler.lock(); + CHECK(core_ops_scheduler, HAILO_INTERNAL_FAILURE); + + auto enable_status = core_ops_scheduler->enable_stream(core_op_handle, name()); + if (HAILO_SUCCESS != enable_status) { + LOGGER__ERROR("Failed to enable stream in the core-op scheduler. (status: {})", enable_status); + status = enable_status; + } + + return status; +} + +hailo_status ScheduledOutputStream::read(MemoryView buffer) +{ + auto core_ops_scheduler = m_core_ops_scheduler.lock(); + CHECK(core_ops_scheduler, HAILO_INTERNAL_FAILURE); + + auto status = core_ops_scheduler->signal_frame_pending_to_read(m_core_op_handle, name()); + CHECK_SUCCESS(status); + + auto device_id = wait_for_read(); + if (HAILO_STREAM_ABORTED_BY_USER == device_id.status()) { + LOGGER__INFO("Read from stream was aborted."); + return device_id.status(); + } + CHECK_EXPECTED_AS_STATUS(device_id); + + assert(contains(m_streams, device_id.value())); + status = m_streams.at(device_id.value()).get().read(buffer); + if (HAILO_SUCCESS != status) { + LOGGER__INFO("Read from stream has failed! status = {}", status); + return status; + } + + status = core_ops_scheduler->signal_read_finish(m_core_op_handle, name(), device_id.value()); + if (HAILO_STREAM_ABORTED_BY_USER == status) { + return status; + } + CHECK_SUCCESS(status); + + return HAILO_SUCCESS; +} + +Expected ScheduledOutputStream::wait_for_read() +{ + auto core_ops_scheduler = m_core_ops_scheduler.lock(); + CHECK_AS_EXPECTED(core_ops_scheduler, HAILO_INTERNAL_FAILURE); + + auto status = core_ops_scheduler->wait_for_read(m_core_op_handle, name(), get_timeout(), [this]() { + std::lock_guard lock(m_device_read_order_mutex); + return !m_device_read_order.empty(); + }); + if (HAILO_STREAM_ABORTED_BY_USER == status) { + LOGGER__INFO("Read from stream was aborted."); + return make_unexpected(status); + } + CHECK_SUCCESS_AS_EXPECTED(status); + + std::lock_guard lock(m_device_read_order_mutex); + auto device_id = m_device_read_order.front(); + m_device_read_order.pop(); + return device_id; +} + +} /* namespace hailort */ diff --git a/hailort/libhailort/src/vdevice/scheduler/scheduled_stream.hpp b/hailort/libhailort/src/vdevice/scheduler/scheduled_stream.hpp index fb89a62..5aa530c 100644 --- a/hailort/libhailort/src/vdevice/scheduler/scheduled_stream.hpp +++ b/hailort/libhailort/src/vdevice/scheduler/scheduled_stream.hpp @@ -17,29 +17,29 @@ #include "stream_common/stream_internal.hpp" #include "vdevice/vdevice_internal.hpp" #include "vdevice/vdevice_stream.hpp" +#include "vdevice/callback_reorder_queue.hpp" #include "vdma/vdma_device.hpp" namespace hailort { -class ScheduledInputStream : public InputVDeviceBaseStream { + +class ScheduledInputStreamBase : public VDeviceInputStreamBase { public: - ScheduledInputStream( - std::vector> &&streams, + ScheduledInputStreamBase( + std::map> &&streams, const scheduler_core_op_handle_t &core_op_handle, EventPtr &&core_op_activated_event, const LayerInfo &layer_info, CoreOpsSchedulerWeakPtr core_ops_scheduler, hailo_status &status) : - InputVDeviceBaseStream(std::move(streams), std::move(core_op_activated_event), layer_info, status), + VDeviceInputStreamBase(std::move(streams), std::move(core_op_activated_event), layer_info, status), m_core_op_handle(core_op_handle), m_core_ops_scheduler(core_ops_scheduler) {} - virtual hailo_status abort() override; - virtual hailo_status clear_abort() override; - virtual bool is_scheduled() override { return true; }; + virtual bool is_scheduled() override final { return true; }; virtual void notify_all() override { @@ -50,18 +50,17 @@ public: } scheduler->notify_all(); - for (auto &stream : m_streams) { + for (const auto &pair : m_streams) { + auto &stream = pair.second; stream.get().notify_all(); } } -protected: - virtual Expected sync_write_raw_buffer(const MemoryView &buffer, - const std::function &should_cancel = []() { return false; }); - - Expected sync_write_raw_buffer_impl(const MemoryView &buffer, scheduler_core_op_handle_t core_op_handle, - const std::function &should_cancel); + virtual hailo_status abort() override; + virtual hailo_status clear_abort() override; + virtual hailo_status flush() override; +protected: scheduler_core_op_handle_t m_core_op_handle; CoreOpsSchedulerWeakPtr m_core_ops_scheduler; @@ -70,27 +69,179 @@ private: hailo_status clear_abort_impl(scheduler_core_op_handle_t core_op_handle); }; -class ScheduledOutputStream : public OutputVDeviceBaseStream { +class ScheduledInputStream : public ScheduledInputStreamBase { public: - ScheduledOutputStream( - std::vector> &&streams, + static Expected> create( + std::map> &&streams, const scheduler_core_op_handle_t &core_op_handle, + EventPtr &&core_op_activated_event, const LayerInfo &layer_info, + CoreOpsSchedulerWeakPtr core_ops_scheduler); + + ScheduledInputStream( + std::map> &&streams, + const scheduler_core_op_handle_t &core_op_handle, EventPtr &&core_op_activated_event, + const LayerInfo &layer_info, CoreOpsSchedulerWeakPtr core_ops_scheduler, hailo_status &status) : - OutputVDeviceBaseStream(std::move(streams), layer_info, std::move(core_op_activated_event), status), + ScheduledInputStreamBase(std::move(streams), core_op_handle, std::move(core_op_activated_event), layer_info, + core_ops_scheduler, status) + {} + +protected: + virtual hailo_status write_impl(const MemoryView &buffer, const std::function &should_cancel) override; +}; + +class TransferRequestsQueue final { +public: + TransferRequestsQueue(size_t max_size) : + m_max_size(max_size) + {} + + ~TransferRequestsQueue() + { + while (!m_queue.empty()) { + auto &request = m_queue.front(); + request.callback(HAILO_STREAM_ABORTED_BY_USER); + m_queue.pop(); + } + } + + TransferRequestsQueue(const TransferRequestsQueue &) = delete; + TransferRequestsQueue &operator=(const TransferRequestsQueue &) = delete; + + hailo_status wait_for_room(std::chrono::milliseconds timeout) + { + std::unique_lock lock(m_mutex); + auto result = m_dequeue_cv.wait_for(lock, timeout, + [&] { + return m_is_aborted || (m_queue.size() < m_max_size); + }); + if (!result) { + return HAILO_TIMEOUT; + } + if (m_is_aborted) { + return HAILO_STREAM_ABORTED_BY_USER; + } + return HAILO_SUCCESS; + } + + hailo_status enqueue(TransferRequest &&transfer_request) + { + std::unique_lock lock(m_mutex); + if (m_is_aborted) { + return HAILO_STREAM_ABORTED_BY_USER; + } + CHECK(m_queue.size() < m_max_size, HAILO_QUEUE_IS_FULL, "No space left in stream queue"); + m_queue.emplace(std::move(transfer_request)); + return HAILO_SUCCESS; + } + + Expected dequeue() + { + TransferRequest transfer_request{}; + { + std::unique_lock lock(m_mutex); + if (m_is_aborted) { + return make_unexpected(HAILO_STREAM_ABORTED_BY_USER); + } + CHECK_AS_EXPECTED(!m_queue.empty(), HAILO_INTERNAL_FAILURE, "Queue should not be empty"); + transfer_request = m_queue.front(); + m_queue.pop(); + } + m_dequeue_cv.notify_one(); + return transfer_request; + } + + void abort() + { + { + std::unique_lock lock(m_mutex); + m_is_aborted = true; + } + + m_dequeue_cv.notify_all(); + } + + void clear_abort() + { + std::unique_lock lock(m_mutex); + m_is_aborted = false; + } + + size_t max_size() const { return m_max_size; } + +private: + // TODO: use SpscQueue (HRT-10554) + const size_t m_max_size; + std::mutex m_mutex; + bool m_is_aborted = false; + std::condition_variable m_dequeue_cv; + std::queue m_queue; +}; + +class ScheduledAsyncInputStream : public ScheduledInputStreamBase { +public: + + static Expected> create( + std::map> &&streams, + const scheduler_core_op_handle_t &core_op_handle, + EventPtr &&core_op_activated_event, + const LayerInfo &layer_info, + CoreOpsSchedulerWeakPtr core_ops_scheduler); + + ScheduledAsyncInputStream( + std::map> &&streams, + const scheduler_core_op_handle_t &core_op_handle, + EventPtr &&core_op_activated_event, + const LayerInfo &layer_info, + CoreOpsSchedulerWeakPtr core_ops_scheduler, + size_t max_queue_size, + hailo_status &status) : + ScheduledInputStreamBase(std::move(streams), core_op_handle, std::move(core_op_activated_event), layer_info, + core_ops_scheduler, status), + m_pending_buffers(max_queue_size), + m_callback_reorder_queue(max_queue_size) // TODO HRT-1058 - use reorder queue only when needed + {} + + virtual hailo_status send_pending_buffer(const device_id_t &device_id) override; + virtual hailo_status wait_for_async_ready(size_t transfer_size, std::chrono::milliseconds timeout) override; + virtual hailo_status write_async(TransferRequest &&transfer_request) override; + virtual Expected get_async_max_queue_size() const override; + virtual hailo_status abort() override; + virtual hailo_status clear_abort() override; + +protected: + virtual hailo_status write_impl(const MemoryView &, const std::function &) override; + + // All buffers written by the user using write_async are first stored in this queue. + // When the scheduler decides to activate the network on a specific device, send_pending_buffer is called, and + // the buffers are sent to the underlying stream. + TransferRequestsQueue m_pending_buffers; + CallbackReorderQueue m_callback_reorder_queue; +}; + +class ScheduledOutputStreamBase : public VDeviceOutputStreamBase { +public: + ScheduledOutputStreamBase( + std::map> &&streams, + const scheduler_core_op_handle_t &core_op_handle, + const LayerInfo &layer_info, + EventPtr &&core_op_activated_event, + CoreOpsSchedulerWeakPtr core_ops_scheduler, + hailo_status &status) : + VDeviceOutputStreamBase(std::move(streams), layer_info, std::move(core_op_activated_event), status), m_core_op_handle(core_op_handle), m_core_ops_scheduler(core_ops_scheduler) {} + virtual bool is_scheduled() override { return true; }; + virtual hailo_status abort() override; virtual hailo_status clear_abort() override; - virtual bool is_scheduled() override { return true; }; protected: - virtual hailo_status read(MemoryView buffer) override; - hailo_status read_impl(MemoryView buffer, scheduler_core_op_handle_t core_op_handle); scheduler_core_op_handle_t m_core_op_handle; CoreOpsSchedulerWeakPtr m_core_ops_scheduler; @@ -100,6 +251,38 @@ private: hailo_status clear_abort_impl(scheduler_core_op_handle_t core_op_handle); }; + +class ScheduledOutputStream : public ScheduledOutputStreamBase { +public: + static Expected> create( + std::map> &&streams, + const scheduler_core_op_handle_t &core_op_handle, + const LayerInfo &layer_info, + EventPtr &&core_op_activated_event, + CoreOpsSchedulerWeakPtr core_ops_scheduler); + + ScheduledOutputStream( + std::map> &&streams, + const scheduler_core_op_handle_t &core_op_handle, + const LayerInfo &layer_info, + EventPtr &&core_op_activated_event, + CoreOpsSchedulerWeakPtr core_ops_scheduler, + hailo_status &status); + + virtual hailo_status set_next_device_to_read(const device_id_t &device_id) override; + +protected: + virtual hailo_status read(MemoryView buffer) override; + +private: + + // Returns device id to read from + Expected wait_for_read(); + + std::queue m_device_read_order; + std::mutex m_device_read_order_mutex; +}; + } /* namespace hailort */ #endif /* HAILO_SCHEDULED_STREAM_HPP_ */ diff --git a/hailort/libhailort/src/vdevice/scheduler/scheduler.cpp b/hailort/libhailort/src/vdevice/scheduler/scheduler.cpp new file mode 100644 index 0000000..c14f49a --- /dev/null +++ b/hailort/libhailort/src/vdevice/scheduler/scheduler.cpp @@ -0,0 +1,778 @@ +/** + * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved. + * Distributed under the MIT license (https://opensource.org/licenses/MIT) + **/ +/** + * @file scheduler.cpp + * @brief: Network scheduler + **/ + +#include "common/os_utils.hpp" + + +#include "vdevice/scheduler/scheduler.hpp" +#include "vdevice/vdevice_core_op.hpp" +#include "vdevice/scheduler/scheduler_oracle.hpp" +#include "vdevice/vdevice_stream_multiplexer_wrapper.hpp" +#include "hef/hef_internal.hpp" +#include "utils/profiler/tracer_macros.hpp" + +#include + + +namespace hailort +{ + +#define SINGLE_CONTEXT_BATCH_SIZE (1) +#define DEFAULT_BURST_SIZE (1) + +// TODO: use device handles instead device count +CoreOpsScheduler::CoreOpsScheduler(hailo_scheduling_algorithm_t algorithm, std::vector &devices_ids, + std::vector &devices_arch) : + SchedulerBase(algorithm, devices_ids, devices_arch), + m_should_core_op_stop(), + m_before_read_write_mutex(), + m_core_ops_cvs(), + m_scheduler_cv() +{ + TRACE(SchedulerStartTrace, get_device_count()); + for (const auto &pair : m_devices) { + auto &device_info = pair.second; + TRACE(AddDeviceTrace, device_info->device_id, device_info->device_arch); + } + + m_is_running = true; + m_scheduler_thread = std::thread(&CoreOpsScheduler::worker_thread_main, this); + m_execute_worker_thread = true; +} + +CoreOpsScheduler::~CoreOpsScheduler() +{ + for (const auto &pair : m_devices) { + auto &device_info = pair.second; + if (INVALID_CORE_OP_HANDLE != device_info->current_core_op_handle) { + auto current_core_op = m_scheduled_core_ops[device_info->current_core_op_handle]->get_core_op(); + auto current_core_op_bundle = std::dynamic_pointer_cast(current_core_op); + assert(nullptr != current_core_op_bundle); + auto vdma_core_op = current_core_op_bundle->get_core_op_by_device_id(device_info->device_id); + if (!vdma_core_op) { + LOGGER__ERROR("Error retrieving core-op in scheduler destructor"); + } else { + if (HAILO_SUCCESS != VdmaConfigManager::deactivate_core_op(vdma_core_op.value())) { + LOGGER__ERROR("Error deactivating core-op when destroying scheduler"); + } + } + } + } + + // signal scheduler thread to stop and join + { + std::unique_lock lock(m_before_read_write_mutex); + m_is_running = false; + m_execute_worker_thread = true; + } + m_scheduler_cv.notify_one(); + if (m_scheduler_thread.joinable()) { + m_scheduler_thread.join(); + } +} + +Expected CoreOpsScheduler::create_round_robin(std::vector &devices_bdf_id, std::vector &devices_arch) +{ + auto ptr = make_shared_nothrow(HAILO_SCHEDULING_ALGORITHM_ROUND_ROBIN, devices_bdf_id, devices_arch); + CHECK_AS_EXPECTED(nullptr != ptr, HAILO_OUT_OF_HOST_MEMORY); + + return ptr; +} + +std::string CoreOpsScheduler::get_core_op_name(const scheduler_core_op_handle_t &core_op_handle) +{ + assert(m_scheduled_core_ops.size() > core_op_handle); + return m_scheduled_core_ops[core_op_handle]->get_core_op_name(); +} + +Expected CoreOpsScheduler::add_core_op(std::shared_ptr added_cng) +{ + scheduler_core_op_handle_t core_op_handle = INVALID_CORE_OP_HANDLE; + { + std::unique_lock lock(m_before_read_write_mutex); + core_op_handle = static_cast(m_scheduled_core_ops.size()); + + auto stream_infos = added_cng->get_all_stream_infos(); + CHECK_EXPECTED(stream_infos); + + auto scheduled_core_op = ScheduledCoreOp::create(added_cng, stream_infos.value()); + CHECK_EXPECTED(scheduled_core_op); + + bool is_nms = scheduled_core_op->get()->is_nms(); + TRACE(AddCoreOpTrace, "", added_cng->name(), DEFAULT_SCHEDULER_TIMEOUT.count(), DEFAULT_SCHEDULER_MIN_THRESHOLD, + core_op_handle, is_nms); + + m_scheduled_core_ops.emplace_back(scheduled_core_op.release()); + + + for (const auto &stream_info : stream_infos.value()) { + m_should_core_op_stop[core_op_handle][stream_info.name] = false; + } + + for (const auto &pair : m_devices) { + auto &device_info = pair.second; + for (const auto &stream_info : stream_infos.value()) { + if (HAILO_H2D_STREAM == stream_info.direction) { + device_info->current_cycle_requested_transferred_frames_h2d[core_op_handle][stream_info.name] = 0; + } else { + device_info->current_cycle_finished_transferred_frames_d2h[core_op_handle][stream_info.name] = 0; + device_info->pending_to_read_frames[core_op_handle][stream_info.name] = 0; + } + } + } + + auto network_cvs = ScheduledCoreOpCV::create(added_cng); + CHECK_EXPECTED(network_cvs); + m_core_ops_cvs[core_op_handle] = network_cvs.release(); + m_core_op_priority[HAILO_SCHEDULER_PRIORITY_NORMAL].emplace_back(core_op_handle); + } + + return core_op_handle; +} + +bool CoreOpsScheduler::is_core_op_active(const scheduler_core_op_handle_t &core_op_handle) +{ + for (const auto &pair : m_devices) { + auto &device_info = pair.second; + if (core_op_handle == device_info->current_core_op_handle) { + return true; + } + } + + return false; +} + +bool CoreOpsScheduler::is_multi_device() +{ + return m_devices.size() > 1; +} + +hailo_status CoreOpsScheduler::signal_frame_pending_to_send(const scheduler_core_op_handle_t &core_op_handle, const std::string &stream_name) +{ + { + std::unique_lock lock(m_before_read_write_mutex); + assert(m_scheduled_core_ops.size() > core_op_handle); + auto scheduled_core_op = m_scheduled_core_ops[core_op_handle]; + + if (should_core_op_stop(core_op_handle)) { + return HAILO_STREAM_ABORTED_BY_USER; + } + + TRACE(WriteFrameTrace, "", core_op_handle, stream_name); + + m_scheduled_core_ops[core_op_handle]->mark_frame_sent(); + scheduled_core_op->pending_to_send_frames().increase(stream_name); + m_execute_worker_thread = true; + } + m_scheduler_cv.notify_one(); + + return HAILO_SUCCESS; +} + +hailo_status CoreOpsScheduler::switch_core_op(const scheduler_core_op_handle_t &core_op_handle, const device_id_t &device_id, bool /*keep_nn_config*/) +{ + auto scheduled_core_op = m_scheduled_core_ops[core_op_handle]; + assert(contains(m_devices, device_id)); + auto curr_device_info = m_devices[device_id]; + curr_device_info->is_switching_core_op = false; + + // initialize current cycle maps + for (const auto &name : scheduled_core_op->get_inputs_names()) { + curr_device_info->current_cycle_requested_transferred_frames_h2d[core_op_handle][name] = 0; + } + + for (const auto &name : scheduled_core_op->get_outputs_names()) { + curr_device_info->current_cycle_finished_transferred_frames_d2h[core_op_handle][name] = 0; + } + + uint16_t batch_size = std::min(scheduled_core_op->get_max_batch_size(), get_min_avail_buffers_count(core_op_handle, device_id)); + uint16_t hw_batch_size = SINGLE_CONTEXT_BATCH_SIZE; + + if (scheduled_core_op->use_dynamic_batch_flow()) { + batch_size = std::min(static_cast(scheduled_core_op->pending_to_send_frames_min_value()), batch_size); + hw_batch_size = batch_size; + } + + if (batch_size == 0) { + return HAILO_SUCCESS; + } + + bool has_same_hw_batch_size_as_previous = scheduled_core_op->use_dynamic_batch_flow() ? (curr_device_info->current_batch_size == batch_size) : true; + curr_device_info->current_batch_size = batch_size; + + if ((core_op_handle != curr_device_info->current_core_op_handle) || (!has_same_hw_batch_size_as_previous)) { + assert(m_scheduled_core_ops.size() > core_op_handle); + auto next_active_cng = scheduled_core_op->get_core_op(); + auto next_active_cng_wrapper = std::dynamic_pointer_cast(next_active_cng); + assert(nullptr != next_active_cng_wrapper); + auto next_active_cng_expected = next_active_cng_wrapper->get_core_op_by_device_id(curr_device_info->device_id); + CHECK_EXPECTED_AS_STATUS(next_active_cng_expected); + + std::shared_ptr current_active_vdma_cng = nullptr; + if (curr_device_info->current_core_op_handle != INVALID_CORE_OP_HANDLE) { + auto current_active_cng = m_scheduled_core_ops[curr_device_info->current_core_op_handle]->get_core_op(); + auto current_active_cng_bundle = std::dynamic_pointer_cast(current_active_cng); + assert(nullptr != current_active_cng_bundle); + auto current_active_cng_expected = current_active_cng_bundle->get_core_op_by_device_id(curr_device_info->device_id); + CHECK_EXPECTED_AS_STATUS(current_active_cng_expected); + current_active_vdma_cng = current_active_cng_expected.release(); + + // Flushing h2d channel in order to make sure we got all interrupts before switching the network. + for (auto &stream : current_active_vdma_cng->get_input_streams()) { + auto status = stream.get().flush(); + if (HAILO_STREAM_ABORTED_BY_USER == status) { + continue; + } + CHECK_SUCCESS(status); + } + } + + TRACE(SwitchCoreOpTrace, device_id, core_op_handle); + static const auto RESUME_PENDING_STREAM_TRANSFERS = true; + auto status = VdmaConfigManager::switch_core_op(current_active_vdma_cng, next_active_cng_expected.value(), hw_batch_size, + RESUME_PENDING_STREAM_TRANSFERS); + CHECK_SUCCESS(status, "Failed switching core-op"); + } + + scheduled_core_op->set_last_run_timestamp(std::chrono::steady_clock::now()); // Mark timestamp on activation + curr_device_info->current_core_op_handle = core_op_handle; + + auto status = send_all_pending_buffers(core_op_handle, device_id, batch_size); + if (HAILO_STREAM_ABORTED_BY_USER == status) { + LOGGER__INFO("send_all_pending_buffers has failed with status=HAILO_STREAM_ABORTED_BY_USER"); + return status; + } + CHECK_SUCCESS(status); + + return HAILO_SUCCESS; +} + +void CoreOpsScheduler::signal_read_finish_impl(const scheduler_core_op_handle_t &core_op_handle, + const std::string &stream_name, const device_id_t &device_id) +{ + TRACE(ReadFrameTrace, "", core_op_handle, stream_name); + + auto scheduled_core_op = m_scheduled_core_ops[core_op_handle]; + scheduled_core_op->requested_read_frames().decrease(stream_name); + scheduled_core_op->finished_read_frames().increase(stream_name); + scheduled_core_op->d2h_finished_transferred_frames().decrease(stream_name); + + if (m_devices[device_id]->pending_to_read_frames[core_op_handle][stream_name] > 0) { + m_devices[device_id]->pending_to_read_frames[core_op_handle][stream_name]--; + } + + decrease_core_op_counters(core_op_handle); + + auto has_drained_everything = has_core_op_drained_everything(core_op_handle, device_id); + if (scheduled_core_op->is_nms() && has_drained_everything) { + // In NMS networks there is possibility that next wasn't choosen yet + choose_next_core_op(device_id, true); + + // If we didn't choose with threshold or timeout lets choose without threshold + if (!m_devices[device_id]->is_switching_core_op) { + choose_next_core_op(device_id, false); + } + + if (has_drained_everything) { + TRACE(CoreOpIdleTrace, device_id, core_op_handle); + } + } + + m_execute_worker_thread = true; +} + +hailo_status CoreOpsScheduler::send_all_pending_buffers(const scheduler_core_op_handle_t &core_op_handle, const device_id_t &device_id, uint32_t burst_size) +{ + auto current_device_info = m_devices[device_id]; + if ((INVALID_CORE_OP_HANDLE == current_device_info->current_core_op_handle) || (current_device_info->current_core_op_handle != core_op_handle)) { + return HAILO_SUCCESS; + } + + auto scheduled_core_op = m_scheduled_core_ops[core_op_handle]; + + for (size_t i = 0; i < burst_size; i++) { + auto finished_send = false; + for (const auto &name : scheduled_core_op->get_inputs_names()) { + if (scheduled_core_op->pending_to_send_frames(name) == 0) { + finished_send = true; + break; + } + } + if (finished_send) { + break; + } + + for (const auto &name : scheduled_core_op->get_outputs_names()) { + auto output_stream = scheduled_core_op->get_core_op()->get_output_stream_by_name(name); + CHECK_EXPECTED_AS_STATUS(output_stream); + + auto &output_stream_base = static_cast(output_stream->get()); + auto status = output_stream_base.set_next_device_to_read(device_id); + CHECK_SUCCESS(status); + } + + for (const auto &name : scheduled_core_op->get_inputs_names()) { + auto status = send_pending_buffer(core_op_handle, name, device_id); + if (HAILO_STREAM_ABORTED_BY_USER == status) { + LOGGER__INFO("send_pending_buffer has failed with status=HAILO_STREAM_ABORTED_BY_USER"); + return status; + } + CHECK_SUCCESS(status); + } + scheduled_core_op->set_last_device(device_id); + } + + return HAILO_SUCCESS; +} + +hailo_status CoreOpsScheduler::send_pending_buffer(const scheduler_core_op_handle_t &core_op_handle, const std::string &stream_name, + const device_id_t &device_id) +{ + assert(m_scheduled_core_ops.size() > core_op_handle); + auto scheduled_core_op = m_scheduled_core_ops[core_op_handle]; + + auto current_cng = scheduled_core_op->get_core_op(); + auto input_stream = current_cng->get_input_stream_by_name(stream_name); + CHECK_EXPECTED_AS_STATUS(input_stream); + + auto &input_stream_base = static_cast(input_stream->get()); + auto status = input_stream_base.send_pending_buffer(device_id); + if (HAILO_STREAM_ABORTED_BY_USER == status) { + LOGGER__INFO("send_pending_buffer has failed with status=HAILO_STREAM_ABORTED_BY_USER"); + return status; + } + CHECK_SUCCESS(status); + + TRACE(InputVdmaDequeueTrace, device_id, core_op_handle, stream_name); + + m_devices[device_id]->current_cycle_requested_transferred_frames_h2d[core_op_handle][stream_name]++; + scheduled_core_op->pending_to_send_frames().decrease(stream_name); + // Notifying for flush + m_core_ops_cvs[core_op_handle]->notify_one(stream_name); + + scheduled_core_op->h2d_finished_transferred_frames().increase(stream_name); + + if (should_core_op_stop(core_op_handle)) { + return HAILO_STREAM_ABORTED_BY_USER; + } + + return HAILO_SUCCESS; +} + +CoreOpsScheduler::ReadyInfo CoreOpsScheduler::is_core_op_ready(const scheduler_core_op_handle_t &core_op_handle, bool check_threshold) +{ + ReadyInfo result; + result.is_ready = false; + + if (should_core_op_stop(core_op_handle)) { + // Do not switch to an aborted core-op + return result; + } + + auto scheduled_core_op = m_scheduled_core_ops[core_op_handle]; + // Check if there arent any write requests + const bool has_pending_writes = scheduled_core_op->pending_to_send_frames_min_value() > 0; + + // Check for read request on all the output streams + const bool has_avail_pending_to_read_buffers = get_min_avail_output_buffers(core_op_handle) > 0; + + std::vector over_threshold; + over_threshold.reserve(scheduled_core_op->get_inputs_names().size()); + std::vector over_timeout; + over_timeout.reserve(scheduled_core_op->get_inputs_names().size()); + + if (check_threshold) { + for (const auto &name : scheduled_core_op->get_inputs_names()) { + auto threshold_exp = scheduled_core_op->get_threshold(name); + if (!threshold_exp) { + LOGGER__ERROR("Failed to get threshold for stream {}", name); + return result; + } + auto threshold = (DEFAULT_SCHEDULER_MIN_THRESHOLD == threshold_exp.value()) ? 1 : threshold_exp.value(); + auto timeout_exp = scheduled_core_op->get_timeout(); + if (!timeout_exp) { + LOGGER__ERROR("Failed to get timeout for stream {}", name); + return result; + } + auto timeout = timeout_exp.release(); + + // Check if there arent enough write requests to reach threshold and timeout didnt passed + uint32_t write_requests = scheduled_core_op->pending_to_send_frames(name); + auto stream_over_threshold = write_requests >= threshold; + auto stream_over_timeout = timeout <= (std::chrono::steady_clock::now() - scheduled_core_op->get_last_run_timestamp()); + over_threshold.push_back(stream_over_threshold); + over_timeout.push_back(stream_over_timeout); + if (stream_over_threshold || stream_over_timeout) { + continue; + } else { + result.is_ready = false; + return result; + } + } + result.over_threshold = std::all_of(over_threshold.begin(), over_threshold.end(), [](auto over) { return over; }); + result.over_timeout = std::all_of(over_timeout.begin(), over_timeout.end(), [](auto over) { return over; }); + } + + result.is_ready = has_pending_writes && has_avail_pending_to_read_buffers; + + return result; +} + +hailo_status CoreOpsScheduler::wait_for_read(const scheduler_core_op_handle_t &core_op_handle, + const std::string &stream_name, const std::chrono::milliseconds &timeout, const std::function &predicate) +{ + std::unique_lock lock(m_before_read_write_mutex); + + hailo_status status = HAILO_SUCCESS; + auto wait_res = m_core_ops_cvs[core_op_handle]->wait_for(stream_name, lock, timeout, + [this, core_op_handle, predicate, &stream_name, &status] { + if (m_should_core_op_stop[core_op_handle][stream_name]) { + status = HAILO_STREAM_ABORTED_BY_USER; + return true; // return true so that the wait will finish + } + + return predicate(); + }); + CHECK(wait_res, HAILO_TIMEOUT, "{} (D2H) failed with status={}, timeout={}ms", stream_name, HAILO_TIMEOUT, timeout.count()); + if (HAILO_STREAM_ABORTED_BY_USER == status) { + return status; + } + CHECK_SUCCESS(status); + + return HAILO_SUCCESS; +} + +hailo_status CoreOpsScheduler::signal_frame_pending_to_read(const scheduler_core_op_handle_t &core_op_handle, const std::string &stream_name) +{ + { + std::unique_lock lock(m_before_read_write_mutex); + + auto scheduled_core_op = m_scheduled_core_ops[core_op_handle]; + scheduled_core_op->requested_read_frames().increase(stream_name); + m_execute_worker_thread = true; + } + m_scheduler_cv.notify_one(); + + return HAILO_SUCCESS; +} + +void CoreOpsScheduler::signal_frame_transferred_d2h(const scheduler_core_op_handle_t &core_op_handle, + const std::string &stream_name, const device_id_t &device_id) +{ + { + std::unique_lock lock(m_before_read_write_mutex); + + auto scheduled_core_op = m_scheduled_core_ops[core_op_handle]; + if (!scheduled_core_op->is_nms()) { + TRACE(OutputVdmaEnqueueTrace, "", core_op_handle, stream_name, 1); + // TODO: Remove d2h_finished_transferred_frames and use current_cycle_finished_transferred_frames_d2h instead + scheduled_core_op->d2h_finished_transferred_frames().increase(stream_name); + m_devices[device_id]->pending_to_read_frames[core_op_handle][stream_name] += 1; + m_devices[device_id]->current_cycle_finished_transferred_frames_d2h[core_op_handle][stream_name] += 1; + } + + auto has_drained_everything = has_core_op_drained_everything(core_op_handle, device_id); + + if (has_drained_everything) { + TRACE(CoreOpIdleTrace, device_id, core_op_handle); + } + + // If ng finished and we didn't choose next lets choose without checking threshold + if (!m_devices[device_id]->is_switching_core_op && has_drained_everything) { + auto was_chosen = choose_next_core_op(device_id, true); + if (!was_chosen) { + choose_next_core_op(device_id, false); + } + } + + if (m_devices[device_id]->is_switching_core_op) { + m_execute_worker_thread = true; + } + } + + // Notify stream that new frame was accepted (wait_for read operation) + m_core_ops_cvs[core_op_handle]->notify_one(stream_name); + m_scheduler_cv.notify_one(); +} + +hailo_status CoreOpsScheduler::signal_read_finish(const scheduler_core_op_handle_t &core_op_handle, + const std::string &stream_name, const device_id_t &device_id) +{ + { + std::unique_lock lock(m_before_read_write_mutex); + signal_read_finish_impl(core_op_handle, stream_name, device_id); + } + m_scheduler_cv.notify_one(); + return HAILO_SUCCESS; +} + +void CoreOpsScheduler::decrease_core_op_counters(const scheduler_core_op_handle_t &core_op_handle) +{ + return m_scheduled_core_ops[core_op_handle]->decrease_current_core_op_counters(); +} + +bool CoreOpsScheduler::has_core_op_drained_everything(const scheduler_core_op_handle_t &core_op_handle, const device_id_t &device_id) +{ + if (core_op_all_streams_aborted(core_op_handle)) { + // We treat core-op as drained only if all streams are aborted - to make sure there aren't any ongoing transfers + return true; + } + + if (INVALID_CORE_OP_HANDLE == core_op_handle) { + // If no core-op is running, consider it as drained + return true; + } + + if ((!m_scheduled_core_ops[core_op_handle]->is_nms()) && (is_multi_device() || m_scheduled_core_ops[core_op_handle]->use_dynamic_batch_flow())) { + auto current_device_info = m_devices[device_id]; + auto max_transferred_h2d = get_max_value_of_unordered_map(current_device_info->current_cycle_requested_transferred_frames_h2d[core_op_handle]); + auto min_transferred_d2h = get_min_value_of_unordered_map(current_device_info->current_cycle_finished_transferred_frames_d2h[core_op_handle]); + + return (max_transferred_h2d == min_transferred_d2h); + } + + return m_scheduled_core_ops[core_op_handle]->has_core_op_drained_everything(); +} + +hailo_status CoreOpsScheduler::flush_pending_buffers(const scheduler_core_op_handle_t &core_op_handle, const std::string &stream_name, + const std::chrono::milliseconds &timeout) +{ + std::unique_lock lock(m_before_read_write_mutex); + + hailo_status status = HAILO_SUCCESS; + auto wait_res = m_core_ops_cvs[core_op_handle]->wait_for(stream_name, lock, timeout, + [this, core_op_handle, &stream_name, &status] { + if (should_core_op_stop(core_op_handle)) { + status = HAILO_STREAM_ABORTED_BY_USER; + return true; // return true so that the wait will finish + } + + assert(m_scheduled_core_ops.size() > core_op_handle); + auto scheduled_core_op = m_scheduled_core_ops[core_op_handle]; + auto pending = scheduled_core_op->pending_to_send_frames(stream_name).load(); + return (pending == 0); + }); + CHECK(wait_res, HAILO_TIMEOUT, "{} (H2D) failed with status={}, timeout={}ms", stream_name, HAILO_TIMEOUT, timeout.count()); + if (HAILO_STREAM_ABORTED_BY_USER == status) { + LOGGER__INFO("flush pending buffers was aborted in stream ={}", stream_name); + return status; + } + CHECK_SUCCESS(status); + + return HAILO_SUCCESS; +} + +hailo_status CoreOpsScheduler::enable_stream(const scheduler_core_op_handle_t &core_op_handle, const std::string &stream_name) +{ + { + std::unique_lock lock(m_before_read_write_mutex); + + if (!m_should_core_op_stop[core_op_handle][stream_name]) { + return HAILO_SUCCESS; + } + + m_should_core_op_stop[core_op_handle][stream_name] = false; + } + m_core_ops_cvs[core_op_handle]->notify_all(); + + return HAILO_SUCCESS; +} + +hailo_status CoreOpsScheduler::disable_stream(const scheduler_core_op_handle_t &core_op_handle, const std::string &stream_name) +{ + { + std::unique_lock lock(m_before_read_write_mutex); + + if (m_should_core_op_stop[core_op_handle][stream_name]) { + return HAILO_SUCCESS; + } + + m_should_core_op_stop[core_op_handle][stream_name] = true; + } + m_core_ops_cvs[core_op_handle]->notify_all(); + + return HAILO_SUCCESS; +} + +hailo_status CoreOpsScheduler::set_timeout(const scheduler_core_op_handle_t &core_op_handle, const std::chrono::milliseconds &timeout, const std::string &/*network_name*/) +{ + // TODO: call in loop for set_timeout with the relevant stream-names (of the given network) + return m_scheduled_core_ops[core_op_handle]->set_timeout(timeout); +} + +hailo_status CoreOpsScheduler::set_threshold(const scheduler_core_op_handle_t &core_op_handle, uint32_t threshold, const std::string &/*network_name*/) +{ + // TODO: call in loop for set_timeout with the relevant stream-names (of the given network) + return m_scheduled_core_ops[core_op_handle]->set_threshold(threshold); +} + +hailo_status CoreOpsScheduler::set_priority(const scheduler_core_op_handle_t &core_op_handle, core_op_priority_t priority, const std::string &/*network_name*/) +{ + CHECK(priority <= HAILO_SCHEDULER_PRIORITY_MAX, HAILO_INVALID_ARGUMENT); + std::unique_lock lock(m_before_read_write_mutex); + auto old_priority = m_scheduled_core_ops[core_op_handle]->get_priority(); + auto &priority_vector = m_core_op_priority[old_priority]; + auto it = std::find(priority_vector.begin(), priority_vector.end(), core_op_handle); + CHECK(it != priority_vector.end(), HAILO_INTERNAL_FAILURE); + + priority_vector.erase(it); + m_scheduled_core_ops[core_op_handle]->set_priority(priority); + m_core_op_priority[priority].push_back(core_op_handle); + + return HAILO_SUCCESS; +} + +bool CoreOpsScheduler::choose_next_core_op(const device_id_t &device_id, bool check_threshold) +{ + if (!m_devices[device_id]->is_switching_core_op) { + return CoreOpsSchedulerOracle::choose_next_model(*this, m_devices[device_id]->device_id, check_threshold) != INVALID_CORE_OP_HANDLE; + } + return false; +} + +bool CoreOpsScheduler::should_core_op_stop(const scheduler_core_op_handle_t &core_op_handle) +{ + for (const auto &name_flag_pair : m_should_core_op_stop[core_op_handle]) { + if (name_flag_pair.second) { + return true; + } + } + + return false; +} + +bool CoreOpsScheduler::core_op_all_streams_aborted(const scheduler_core_op_handle_t &core_op_handle) +{ + for (const auto &name_flag_pair : m_should_core_op_stop[core_op_handle]) { + if (!name_flag_pair.second) { + return false; + } + } + return true; +} + +void CoreOpsScheduler::notify_all() +{ + { + // Acquire mutex to make sure the notify_all will wake the blocking threads on the cv + std::unique_lock lock(m_before_read_write_mutex); + } + // TODO: consider notify only the relevant ng or stream + for (auto &cng_cvs : m_core_ops_cvs) { + cng_cvs.second->notify_all(); + } +} + +hailo_status CoreOpsScheduler::optimize_streaming_if_enabled(const scheduler_core_op_handle_t &core_op_handle) +{ + auto scheduled_core_op = m_scheduled_core_ops[core_op_handle]; + if (!scheduled_core_op->use_dynamic_batch_flow()) { + auto next_pair = m_devices.upper_bound(scheduled_core_op->get_last_device()); // Get last device and go to the next device in the map + if (m_devices.end() == next_pair){ // In case we reached to the end of the map - start from the beggining + next_pair = m_devices.begin(); + } + auto &device_info = next_pair->second; + if (device_info->current_core_op_handle == core_op_handle && !device_info->is_switching_core_op && + !CoreOpsSchedulerOracle::should_stop_streaming(*this, scheduled_core_op->get_priority(), device_info->device_id) && + (get_min_avail_buffers_count(core_op_handle, device_info->device_id) >= DEFAULT_BURST_SIZE)) { + auto status = send_all_pending_buffers(core_op_handle, device_info->device_id, DEFAULT_BURST_SIZE); + if (HAILO_STREAM_ABORTED_BY_USER == status) { + LOGGER__INFO("send_all_pending_buffers has failed with status=HAILO_STREAM_ABORTED_BY_USER"); + return status; + } + CHECK_SUCCESS(status); + } + } + return HAILO_SUCCESS; +} + +uint16_t CoreOpsScheduler::get_min_avail_buffers_count(const scheduler_core_op_handle_t &core_op_handle, const device_id_t &device_id) +{ + auto scheduled_core_op = m_scheduled_core_ops[core_op_handle]; + auto device_info = m_devices[device_id]; + + uint16_t avail_buffer_count = UINT16_MAX; + for (auto &output_stream : scheduled_core_op->get_core_op()->get_output_streams()) { + auto &vdevice_output = static_cast(output_stream.get()); + if (auto buffer_size_in_frames = vdevice_output.get_buffer_frames_size()) { + auto &pending_frames_in_buffer = device_info->pending_to_read_frames[core_op_handle][vdevice_output.name()]; + auto ongoing_frames = get_max_value_of_unordered_map(device_info->current_cycle_requested_transferred_frames_h2d[core_op_handle]) - + device_info->current_cycle_finished_transferred_frames_d2h[core_op_handle][vdevice_output.name()]; + assert(*buffer_size_in_frames >= (pending_frames_in_buffer + ongoing_frames)); + avail_buffer_count = std::min(avail_buffer_count, static_cast(*buffer_size_in_frames - pending_frames_in_buffer - ongoing_frames)); + } + } + + auto transferred_frames = get_max_value_of_unordered_map(device_info->current_cycle_requested_transferred_frames_h2d[core_op_handle]) - + get_min_value_of_unordered_map(device_info->current_cycle_finished_transferred_frames_d2h[core_op_handle]); + if (is_multi_device()) { + auto avail_input_buffer_count = static_cast((scheduled_core_op->get_min_input_buffers_count()) - transferred_frames); + avail_buffer_count = std::min(avail_input_buffer_count, avail_buffer_count); + } + + return avail_buffer_count; +} + +uint16_t CoreOpsScheduler::get_min_avail_output_buffers(const scheduler_core_op_handle_t &core_op_handle) +{ + auto scheduled_core_op = m_scheduled_core_ops[core_op_handle]; + auto sent_frames = scheduled_core_op->h2d_finished_transferred_frames_max_value() - + scheduled_core_op->finished_read_frames_min_value(); + + return static_cast((scheduled_core_op->get_min_output_buffers_count()) - sent_frames); +} + +void CoreOpsScheduler::worker_thread_main() +{ + OsUtils::set_current_thread_name("SCHEDULER"); + std::unique_lock lock(m_before_read_write_mutex); + while (m_is_running) { + + m_scheduler_cv.wait(lock, [this]() { + return m_execute_worker_thread.load(); + }); + m_execute_worker_thread = false; + + if (!m_is_running) { + break; + } + + for (uint32_t core_op_handle = 0; core_op_handle < m_scheduled_core_ops.size(); core_op_handle++) { + auto status = optimize_streaming_if_enabled(core_op_handle); + if (HAILO_STREAM_ABORTED_BY_USER == status) { + continue; + } + + if (HAILO_SUCCESS != status) { + if (m_is_running) { + LOGGER__ERROR("Scheduler thread failed with status={}", status); + } + break; + } + } + + auto oracle_decisions = CoreOpsSchedulerOracle::get_oracle_decisions(*this); + + for (const auto &run_params : oracle_decisions) { + auto status = switch_core_op(run_params.core_op_handle, run_params.device_id); + if (HAILO_STREAM_ABORTED_BY_USER == status) { + continue; + } + + if (HAILO_SUCCESS != status) { + if (m_is_running) { + LOGGER__ERROR("Scheduler thread failed with status={}", status); + } + break; + } + } + } +} + +} /* namespace hailort */ \ No newline at end of file diff --git a/hailort/libhailort/src/vdevice/scheduler/scheduler.hpp b/hailort/libhailort/src/vdevice/scheduler/scheduler.hpp new file mode 100644 index 0000000..c85c216 --- /dev/null +++ b/hailort/libhailort/src/vdevice/scheduler/scheduler.hpp @@ -0,0 +1,121 @@ +/** + * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved. + * Distributed under the MIT license (https://opensource.org/licenses/MIT) + **/ +/** + * @file scheduler.hpp + * @brief Class declaration for CoreOpsScheduler that schedules core-ops to be active depending on the scheduling algorithm. + **/ + +#ifndef _HAILO_SCHEDULER_HPP_ +#define _HAILO_SCHEDULER_HPP_ + +#include "hailo/hailort.h" +#include "hailo/expected.hpp" + +#include "common/utils.hpp" +#include "common/filesystem.hpp" + +#include "vdevice/scheduler/scheduled_core_op_state.hpp" +#include "vdevice/scheduler/scheduled_core_op_cv.hpp" +#include "vdevice/scheduler/scheduler_base.hpp" + + +namespace hailort +{ + +#define INVALID_CORE_OP_HANDLE (UINT32_MAX) + +using scheduler_core_op_handle_t = uint32_t; +using core_op_priority_t = uint8_t; + +class CoreOpsScheduler; +using CoreOpsSchedulerPtr = std::shared_ptr; + +// We use mostly weak pointer for the scheduler to prevent circular dependency of the pointers +using CoreOpsSchedulerWeakPtr = std::weak_ptr; + +using stream_name_t = std::string; + +class CoreOpsScheduler : public SchedulerBase +{ +public: + static Expected create_round_robin(std::vector &devices_ids, + std::vector &devices_arch); + CoreOpsScheduler(hailo_scheduling_algorithm_t algorithm, std::vector &devices_ids, + std::vector &devices_arch); + + virtual ~CoreOpsScheduler(); + CoreOpsScheduler(const CoreOpsScheduler &other) = delete; + CoreOpsScheduler &operator=(const CoreOpsScheduler &other) = delete; + CoreOpsScheduler &operator=(CoreOpsScheduler &&other) = delete; + CoreOpsScheduler(CoreOpsScheduler &&other) noexcept = delete; + + Expected add_core_op(std::shared_ptr added_core_op); + + hailo_status signal_frame_pending_to_send(const scheduler_core_op_handle_t &core_op_handle, const std::string &stream_name); + + hailo_status wait_for_read(const scheduler_core_op_handle_t &core_op_handle, const std::string &stream_name, + const std::chrono::milliseconds &timeout, const std::function &predicate); + + hailo_status signal_frame_pending_to_read(const scheduler_core_op_handle_t &core_op_handle, const std::string &stream_name); + + void signal_frame_transferred_d2h(const scheduler_core_op_handle_t &core_op_handle, + const std::string &stream_name, const device_id_t &device_id); + hailo_status signal_read_finish(const scheduler_core_op_handle_t &core_op_handle, const std::string &stream_name, + const device_id_t &device_id); + + hailo_status enable_stream(const scheduler_core_op_handle_t &core_op_handle, const std::string &stream_name); + hailo_status disable_stream(const scheduler_core_op_handle_t &core_op_handle, const std::string &stream_name); + + hailo_status set_timeout(const scheduler_core_op_handle_t &core_op_handle, const std::chrono::milliseconds &timeout, const std::string &network_name); + hailo_status set_threshold(const scheduler_core_op_handle_t &core_op_handle, uint32_t threshold, const std::string &network_name); + hailo_status set_priority(const scheduler_core_op_handle_t &core_op_handle, core_op_priority_t priority, const std::string &network_name); + + virtual ReadyInfo is_core_op_ready(const scheduler_core_op_handle_t &core_op_handle, bool check_threshold) override; + virtual bool has_core_op_drained_everything(const scheduler_core_op_handle_t &core_op_handle, const device_id_t &device_id) override; + hailo_status flush_pending_buffers(const scheduler_core_op_handle_t &core_op_handle, const std::string &stream_name, const std::chrono::milliseconds &timeout); + + void notify_all(); + +protected: + bool choose_next_core_op(const device_id_t &device_id, bool check_threshold); + + std::unordered_map> m_should_core_op_stop; + +private: + hailo_status switch_core_op(const scheduler_core_op_handle_t &core_op_handle, const device_id_t &device_id, + bool keep_nn_config = false); + // Needs to be called with m_before_read_write_mutex held. + void signal_read_finish_impl(const scheduler_core_op_handle_t &core_op_handle, const std::string &stream_name, + const device_id_t &device_id); + + hailo_status send_all_pending_buffers(const scheduler_core_op_handle_t &core_op_handle, const device_id_t &device_id, uint32_t burst_size); + hailo_status send_pending_buffer(const scheduler_core_op_handle_t &core_op_handle, const std::string &stream_name, const device_id_t &device_id); + + void decrease_core_op_counters(const scheduler_core_op_handle_t &core_op_handle); + bool should_core_op_stop(const scheduler_core_op_handle_t &core_op_handle); + bool core_op_all_streams_aborted(const scheduler_core_op_handle_t &core_op_handle); + + std::string get_core_op_name(const scheduler_core_op_handle_t &core_op_handle); + bool is_core_op_active(const scheduler_core_op_handle_t &core_op_handle); + bool is_multi_device(); + + hailo_status optimize_streaming_if_enabled(const scheduler_core_op_handle_t &core_op_handle); + uint16_t get_min_avail_buffers_count(const scheduler_core_op_handle_t &core_op_handle, const device_id_t &device_id); + uint16_t get_min_avail_output_buffers(const scheduler_core_op_handle_t &core_op_handle); + + void worker_thread_main(); + + std::vector> m_scheduled_core_ops; + std::mutex m_before_read_write_mutex; + std::unordered_map> m_core_ops_cvs; + + std::atomic_bool m_is_running; + std::atomic_bool m_execute_worker_thread; + std::thread m_scheduler_thread; + std::condition_variable m_scheduler_cv; +}; +} /* namespace hailort */ + +#endif /* _HAILO_SCHEDULER_HPP_ */ diff --git a/hailort/libhailort/src/vdevice/scheduler/scheduler_base.hpp b/hailort/libhailort/src/vdevice/scheduler/scheduler_base.hpp index e9fc0a9..a8575f1 100644 --- a/hailort/libhailort/src/vdevice/scheduler/scheduler_base.hpp +++ b/hailort/libhailort/src/vdevice/scheduler/scheduler_base.hpp @@ -16,6 +16,8 @@ #include "common/utils.hpp" #include "common/filesystem.hpp" +#include "stream_common/stream_internal.hpp" + #include @@ -26,7 +28,6 @@ namespace hailort #define DEFAULT_SCHEDULER_MIN_THRESHOLD (0) #define INVALID_CORE_OP_HANDLE (UINT32_MAX) -#define INVALID_DEVICE_ID (UINT32_MAX) using scheduler_core_op_handle_t = uint32_t; using core_op_priority_t = uint8_t; @@ -34,10 +35,10 @@ using core_op_priority_t = uint8_t; using stream_name_t = std::string; struct ActiveDeviceInfo { - ActiveDeviceInfo(uint32_t device_id, const std::string &device_bdf_id, const std::string &device_arch) : + ActiveDeviceInfo(const device_id_t &device_id, const std::string &device_arch) : current_core_op_handle(INVALID_CORE_OP_HANDLE), next_core_op_handle(INVALID_CORE_OP_HANDLE), is_switching_core_op(false), current_batch_size(0), current_cycle_requested_transferred_frames_h2d(), current_cycle_finished_transferred_frames_d2h(), - current_cycle_finished_read_frames_d2h(), device_id(device_id), device_bdf_id(device_bdf_id), device_arch(device_arch) + pending_to_read_frames(), device_id(device_id), device_arch(device_arch) {} scheduler_core_op_handle_t current_core_op_handle; scheduler_core_op_handle_t next_core_op_handle; @@ -45,9 +46,8 @@ struct ActiveDeviceInfo { std::atomic_uint32_t current_batch_size; std::unordered_map> current_cycle_requested_transferred_frames_h2d; std::unordered_map> current_cycle_finished_transferred_frames_d2h; - std::unordered_map> current_cycle_finished_read_frames_d2h; - uint32_t device_id; - std::string device_bdf_id; + std::unordered_map> pending_to_read_frames; + device_id_t device_id; std::string device_arch; }; @@ -61,45 +61,53 @@ public: } struct ReadyInfo { - bool threshold = false; - bool timeout = false; + bool over_threshold = false; + bool over_timeout = false; bool is_ready = false; }; virtual ReadyInfo is_core_op_ready(const scheduler_core_op_handle_t &core_op_handle, bool check_threshold) = 0; - virtual bool has_core_op_drained_everything(const scheduler_core_op_handle_t &core_op_handle, uint32_t device_id) = 0; + virtual bool has_core_op_drained_everything(const scheduler_core_op_handle_t &core_op_handle, const device_id_t &device_id) = 0; virtual uint32_t get_device_count() const { return static_cast(m_devices.size()); } - virtual std::shared_ptr get_devices_info(uint32_t device_id) + virtual std::shared_ptr get_device_info(const device_id_t &device_id) { return m_devices[device_id]; } + + virtual std::map> &get_device_infos() + { + return m_devices; + } virtual std::map> get_core_op_priority_map() { return m_core_op_priority; } - virtual scheduler_core_op_handle_t get_last_choosen_core_op(core_op_priority_t priority) + virtual scheduler_core_op_handle_t get_next_core_op(core_op_priority_t priority) { - return m_last_choosen_core_op[priority]; + if (!contains(m_next_core_op, priority)) { + m_next_core_op[priority] = 0; + } + return m_next_core_op[priority]; } - virtual void set_last_choosen_core_op(const core_op_priority_t priority, const scheduler_core_op_handle_t &core_op_handle) + virtual void set_next_core_op(const core_op_priority_t priority, const scheduler_core_op_handle_t &core_op_handle) { - m_last_choosen_core_op[priority] = core_op_handle; + m_next_core_op[priority] = core_op_handle; } protected: - SchedulerBase(hailo_scheduling_algorithm_t algorithm, uint32_t device_count, std::vector &devices_bdf_id, + SchedulerBase(hailo_scheduling_algorithm_t algorithm, std::vector &devices_ids, std::vector &devices_arch) : m_algorithm(algorithm) { - for (uint32_t i = 0; i < device_count; i++) { - m_devices.push_back(make_shared_nothrow(i, devices_bdf_id[i], devices_arch[i])); + for (uint32_t i = 0; i < devices_ids.size(); i++) { + m_devices[devices_ids.at(i)] = make_shared_nothrow(devices_ids[i], devices_arch[i]); } }; @@ -109,11 +117,12 @@ protected: SchedulerBase &operator=(SchedulerBase &&other) = delete; SchedulerBase(SchedulerBase &&other) noexcept = delete; - std::vector> m_devices; + std::map> m_devices; + std::map> m_core_op_priority; hailo_scheduling_algorithm_t m_algorithm; - std::unordered_map m_last_choosen_core_op; + std::unordered_map m_next_core_op; }; } /* namespace hailort */ diff --git a/hailort/libhailort/src/vdevice/scheduler/scheduler_mon.hpp b/hailort/libhailort/src/vdevice/scheduler/scheduler_mon.hpp deleted file mode 100644 index 64fa99b..0000000 --- a/hailort/libhailort/src/vdevice/scheduler/scheduler_mon.hpp +++ /dev/null @@ -1,61 +0,0 @@ -/** - * Copyright (c) 2022 Hailo Technologies Ltd. All rights reserved. - * Distributed under the MIT license (https://opensource.org/licenses/MIT) -**/ -/** - * @file scheduler_mon.hpp - * @brief Defines for scheduler monitor of networks. - **/ - -#ifndef _HAILO_SCHEDULER_MON_HPP_ -#define _HAILO_SCHEDULER_MON_HPP_ - -#include "hailo/hailort.h" - -#include "common/filesystem.hpp" - -#if defined(_MSC_VER) -#pragma warning(push) -#pragma warning(disable: 4244 4267 4127) -#else -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wconversion" -#endif -#include "scheduler_mon.pb.h" -#if defined(_MSC_VER) -#pragma warning( pop ) -#else -#pragma GCC diagnostic pop -#endif - -#include -#include - - -namespace hailort -{ - -#define SCHEDULER_MON_TMP_DIR ("/tmp/hmon_files/") -#define SCHEDULER_MON_ENV_VAR ("HAILO_MONITOR") -#define DEFAULT_SCHEDULER_MON_INTERVAL (std::chrono::seconds(1)) -#define SCHEDULER_MON_NAN_VAL (-1) - -class SchedulerMon -{ -public: - - static bool should_monitor() - { - #if defined(__GNUC__) - auto mon_var = std::getenv(SCHEDULER_MON_ENV_VAR); - return (mon_var != nullptr) && strncmp(mon_var, "1", 1) == 0; - #else - // TODO: HRT-7304 - Add support for windows - return false; - #endif - } -}; - -} /* namespace hailort */ - -#endif /* _HAILO_SCHEDULER_MON_HPP_ */ diff --git a/hailort/libhailort/src/vdevice/scheduler/scheduler_oracle.cpp b/hailort/libhailort/src/vdevice/scheduler/scheduler_oracle.cpp index b39ea3d..1d97c58 100644 --- a/hailort/libhailort/src/vdevice/scheduler/scheduler_oracle.cpp +++ b/hailort/libhailort/src/vdevice/scheduler/scheduler_oracle.cpp @@ -14,77 +14,46 @@ namespace hailort { -bool CoreOpsSchedulerOracle::choose_next_model(SchedulerBase &scheduler, uint32_t device_id, bool check_threshold) +scheduler_core_op_handle_t CoreOpsSchedulerOracle::choose_next_model(SchedulerBase &scheduler, const device_id_t &device_id, bool check_threshold) { - auto device_info = scheduler.get_devices_info(device_id); + auto device_info = scheduler.get_device_info(device_id); auto priority_map = scheduler.get_core_op_priority_map(); for (auto iter = priority_map.rbegin(); iter != priority_map.rend(); ++iter) { auto priority_group_size = iter->second.size(); for (uint32_t i = 0; i < priority_group_size; i++) { - uint32_t index = scheduler.get_last_choosen_core_op(iter->first) + i + 1; + uint32_t index = scheduler.get_next_core_op(iter->first) + i; index %= static_cast(priority_group_size); auto core_op_handle = iter->second[index]; - if (!is_core_op_active(scheduler, core_op_handle)) { - auto ready_info = scheduler.is_core_op_ready(core_op_handle, check_threshold); - if (ready_info.is_ready) { - TRACE(ChooseCoreOpTrace, "", core_op_handle, ready_info.threshold, ready_info.timeout, iter->first); - device_info->is_switching_core_op = true; - device_info->next_core_op_handle = core_op_handle; - scheduler.set_last_choosen_core_op(iter->first, index); - - return true; - } + auto ready_info = scheduler.is_core_op_ready(core_op_handle, check_threshold); + if (ready_info.is_ready) { + TRACE(ChooseCoreOpTrace, "", core_op_handle, ready_info.over_threshold, ready_info.over_timeout, iter->first); + device_info->is_switching_core_op = true; + device_info->next_core_op_handle = core_op_handle; + // Set next to run as next in round-robin + index = ((index + 1) % static_cast(priority_group_size)); + scheduler.set_next_core_op(iter->first, index); + return core_op_handle; } } } - return false; + return INVALID_CORE_OP_HANDLE; } -// TODO: return device handle instead index -uint32_t CoreOpsSchedulerOracle::get_avail_device(SchedulerBase &scheduler, scheduler_core_op_handle_t core_op_handle) -{ - const bool check_threshold = false; - auto device_count = scheduler.get_device_count(); - - // Check if should be next - /* Checking (INVALID_CORE_OP_HANDLE == m_current_core_op) for activating the first time the scheduler is running. - In this case we don't want to check threshold. */ - for (uint32_t device_index = 0; device_index < device_count; device_index++) { - auto active_device_info = scheduler.get_devices_info(device_index); - if (active_device_info->is_switching_core_op && scheduler.has_core_op_drained_everything(active_device_info->current_core_op_handle, active_device_info->device_id) && - (((INVALID_CORE_OP_HANDLE == active_device_info->current_core_op_handle) && - scheduler.is_core_op_ready(core_op_handle, check_threshold).is_ready) || - (active_device_info->next_core_op_handle == core_op_handle))) { - return active_device_info->device_id; - } - } - - // Check if device Idle - // We dont need to check if the core op is ready, because the device is idle and if we arrive here frame is already sent and as a space in the output buffer. - for (uint32_t device_index = 0; device_index < device_count; device_index++) { - auto active_device_info = scheduler.get_devices_info(device_index); - if (!active_device_info->is_switching_core_op && scheduler.has_core_op_drained_everything(active_device_info->current_core_op_handle, active_device_info->device_id)) { - return active_device_info->device_id; - } - } - - return INVALID_DEVICE_ID; -} - -bool CoreOpsSchedulerOracle::should_stop_streaming(SchedulerBase &scheduler, core_op_priority_t core_op_priority) +bool CoreOpsSchedulerOracle::should_stop_streaming(SchedulerBase &scheduler, core_op_priority_t core_op_priority, const device_id_t &device_id) { auto priority_map = scheduler.get_core_op_priority_map(); for (auto iter = priority_map.rbegin(); (iter != priority_map.rend()) && (iter->first >= core_op_priority); ++iter) { auto priority_group_size = iter->second.size(); for (uint32_t i = 0; i < priority_group_size; i++) { - uint32_t index = scheduler.get_last_choosen_core_op(iter->first) + i + 1; + uint32_t index = scheduler.get_next_core_op(iter->first) + i; index %= static_cast(priority_group_size); auto core_op_handle = iter->second[index]; // We dont want to stay with the same network group if there is a other qualified network group - if ((!is_core_op_active(scheduler, core_op_handle)) && scheduler.is_core_op_ready(core_op_handle, true).is_ready) { + if ((!is_core_op_active(scheduler, core_op_handle)) && scheduler.is_core_op_ready(core_op_handle, true).is_ready && + is_core_op_finished_batch(scheduler, device_id)) { return true; } } @@ -95,9 +64,9 @@ bool CoreOpsSchedulerOracle::should_stop_streaming(SchedulerBase &scheduler, cor bool CoreOpsSchedulerOracle::is_core_op_active(SchedulerBase &scheduler, scheduler_core_op_handle_t core_op_handle) { - auto device_count = scheduler.get_device_count(); - for (uint32_t device_index = 0; device_index < device_count; device_index++) { - auto active_device_info = scheduler.get_devices_info(device_index); + auto &devices = scheduler.get_device_infos(); + for (const auto &pair : devices) { + auto &active_device_info = pair.second; if (core_op_handle == active_device_info->current_core_op_handle) { return true; } @@ -106,4 +75,38 @@ bool CoreOpsSchedulerOracle::is_core_op_active(SchedulerBase &scheduler, schedul return false; } +bool CoreOpsSchedulerOracle::is_core_op_finished_batch(SchedulerBase &scheduler, const device_id_t &device_id) +{ + auto device_info = scheduler.get_device_info(device_id); + auto max_transferred_h2d = get_max_value_of_unordered_map(device_info->current_cycle_requested_transferred_frames_h2d[device_info->current_core_op_handle]); + + return device_info->current_batch_size <= max_transferred_h2d; +} + +std::vector CoreOpsSchedulerOracle::get_oracle_decisions(SchedulerBase &scheduler) +{ + auto &devices = scheduler.get_device_infos(); + std::vector oracle_decision; + + for (const auto &pair : devices) { + auto &active_device_info = pair.second; + + // Check if device is switching ng + if (active_device_info->is_switching_core_op) { + oracle_decision.push_back({active_device_info->next_core_op_handle, active_device_info->device_id}); + } + + // Check if device is idle + if (!active_device_info->is_switching_core_op && + scheduler.has_core_op_drained_everything(active_device_info->current_core_op_handle, active_device_info->device_id)) { + auto core_op_handle = choose_next_model(scheduler, active_device_info->device_id, false); + if (core_op_handle != INVALID_CORE_OP_HANDLE) { + oracle_decision.push_back({core_op_handle, active_device_info->device_id}); + } + } + } + + return oracle_decision; +} + } /* namespace hailort */ diff --git a/hailort/libhailort/src/vdevice/scheduler/scheduler_oracle.hpp b/hailort/libhailort/src/vdevice/scheduler/scheduler_oracle.hpp index 766bf45..fd09944 100644 --- a/hailort/libhailort/src/vdevice/scheduler/scheduler_oracle.hpp +++ b/hailort/libhailort/src/vdevice/scheduler/scheduler_oracle.hpp @@ -21,17 +21,23 @@ namespace hailort { +struct RunParams { + scheduler_core_op_handle_t core_op_handle; + device_id_t device_id; +}; + class CoreOpsSchedulerOracle { public: - static bool choose_next_model(SchedulerBase &scheduler, uint32_t device_id, bool check_threshold); - static uint32_t get_avail_device(SchedulerBase &scheduler, scheduler_core_op_handle_t core_op_handle); - static bool should_stop_streaming(SchedulerBase &scheduler, core_op_priority_t core_op_priority); + static scheduler_core_op_handle_t choose_next_model(SchedulerBase &scheduler, const device_id_t &device_id, bool check_threshold); + static std::vector get_oracle_decisions(SchedulerBase &scheduler); + static bool should_stop_streaming(SchedulerBase &scheduler, core_op_priority_t core_op_priority, const device_id_t &device_id); private: CoreOpsSchedulerOracle() {} // TODO: Consider returning a vector of devices (we can use this function in other places) static bool is_core_op_active(SchedulerBase &scheduler, scheduler_core_op_handle_t core_op_handle); + static bool is_core_op_finished_batch(SchedulerBase &scheduler, const device_id_t &device_id); }; } /* namespace hailort */ diff --git a/hailort/libhailort/src/vdevice/vdevice.cpp b/hailort/libhailort/src/vdevice/vdevice.cpp index 7030013..310892b 100644 --- a/hailort/libhailort/src/vdevice/vdevice.cpp +++ b/hailort/libhailort/src/vdevice/vdevice.cpp @@ -40,17 +40,23 @@ std::string SharedResourceManager::unique_key() static hailo_status validate_device_ids_match(const hailo_vdevice_params_t ¶ms, const std::set &old_ids) { - std::set new_ids; + const auto group_id_name = (nullptr == params.group_id ? "NULL" : params.group_id); + CHECK(old_ids.size() == static_cast(params.device_count), HAILO_INVALID_OPERATION, + "VDevice invalid device count for group_id {}", group_id_name); + for (uint32_t i = 0; i < params.device_count; i++) { - // TODO: maybe needs to normalize domain? - new_ids.insert(params.device_ids[i].id); + auto device_id_found = std::find_if(old_ids.begin(), old_ids.end(), + [&](const std::string &device_id) { + return Device::device_ids_equal(params.device_ids[i].id, device_id); + }); + CHECK(device_id_found != old_ids.end(), HAILO_INVALID_OPERATION, + "Device {} not used by group_id {}", params.device_ids[i].id, group_id_name); } - CHECK(old_ids == new_ids, HAILO_INVALID_OPERATION, "Different VDevice ids used by group_id {}", (nullptr == params.group_id ? "NULL" : params.group_id)); return HAILO_SUCCESS; } -hailo_status validate_same_vdevice(const hailo_vdevice_params_t ¶ms, const VDevice &vdevice) +static hailo_status validate_same_vdevice(const hailo_vdevice_params_t ¶ms, const VDevice &vdevice) { // Validate device ids if (params.device_ids != nullptr) { @@ -102,9 +108,6 @@ VDeviceHandle::~VDeviceHandle() Expected> VDeviceHandle::create(const hailo_vdevice_params_t ¶ms) { - auto status = VDeviceBase::validate_params(params); - CHECK_SUCCESS_AS_EXPECTED(status); - auto &manager = SharedResourceManager::get_instance(); auto create = [¶ms]() { return VDeviceBase::create(params); @@ -164,9 +167,10 @@ Expected VDeviceHandle::get_default_streams_interface( #ifdef HAILO_SUPPORT_MULTI_PROCESS -VDeviceClient::VDeviceClient(std::unique_ptr client, uint32_t handle) +VDeviceClient::VDeviceClient(std::unique_ptr client, uint32_t handle, std::vector> &&devices) : m_client(std::move(client)) , m_handle(handle) + , m_devices(std::move(devices)) {} VDeviceClient::~VDeviceClient() @@ -177,7 +181,7 @@ VDeviceClient::~VDeviceClient() // The vdevice in the service will destruct the ConfiguredNetworkGroupBase, // and then the ConfiguredNetworkGroupClient destructor will be called - causing double destruction on ConfiguredNetworkGroupBase. m_network_groups.clear(); - auto reply = m_client->VDevice_release(m_handle); + auto reply = m_client->VDevice_release(m_handle, OsUtils::get_curr_pid()); if (reply != HAILO_SUCCESS) { LOGGER__CRITICAL("VDevice_release failed!"); } @@ -233,7 +237,11 @@ Expected> VDeviceClient::create(const hailo_vdevice_par auto reply = client->VDevice_create(params, OsUtils::get_curr_pid()); CHECK_EXPECTED(reply); - auto client_vdevice = std::unique_ptr(new VDeviceClient(std::move(client), reply.value())); + auto handle = reply.value(); + auto devices = client->VDevice_get_physical_devices(handle); + CHECK_EXPECTED(devices); + + auto client_vdevice = std::unique_ptr(new VDeviceClient(std::move(client), handle, devices.release())); CHECK_AS_EXPECTED(client_vdevice != nullptr, HAILO_OUT_OF_HOST_MEMORY); return std::unique_ptr(std::move(client_vdevice)); @@ -263,8 +271,13 @@ Expected VDeviceClient::configure(Hef &hef, Expected>> VDeviceClient::get_physical_devices() const { - LOGGER__ERROR("ConfiguredNetworkGroup::get_physical_devices function is not supported when using multi-process service"); - return make_unexpected(HAILO_INVALID_OPERATION); + std::vector> devices_refs; + + for (auto &device : m_devices) { + devices_refs.push_back(*device); + } + + return devices_refs; } Expected> VDeviceClient::get_physical_devices_ids() const @@ -282,9 +295,15 @@ Expected VDeviceClient::get_default_streams_interface( Expected> VDevice::create(const hailo_vdevice_params_t ¶ms) { + auto status = VDeviceBase::validate_params(params); + CHECK_SUCCESS_AS_EXPECTED(status); + std::unique_ptr vdevice; + if (params.multi_process_service) { #ifdef HAILO_SUPPORT_MULTI_PROCESS + CHECK_AS_EXPECTED(params.scheduling_algorithm != HAILO_SCHEDULING_ALGORITHM_NONE, HAILO_INVALID_ARGUMENT, + "Multi-process service is supported only with HailoRT scheduler, please choose scheduling algorithm"); auto expected_vdevice = VDeviceClient::create(params); CHECK_EXPECTED(expected_vdevice); vdevice = expected_vdevice.release(); @@ -351,7 +370,8 @@ Expected> VDeviceBase::create(const hailo_vdevice_p device_archs.reserve(params.device_count); std::string vdevice_ids = "VDevice Infos:"; - for (const auto &device : devices) { + for (const auto &pair : devices) { + auto &device = pair.second; auto id_info_str = device->get_dev_id(); device_ids.emplace_back(id_info_str); auto device_arch = device->get_architecture(); @@ -366,7 +386,7 @@ Expected> VDeviceBase::create(const hailo_vdevice_p CoreOpsSchedulerPtr scheduler_ptr; if (HAILO_SCHEDULING_ALGORITHM_NONE != params.scheduling_algorithm) { if (HAILO_SCHEDULING_ALGORITHM_ROUND_ROBIN == params.scheduling_algorithm) { - auto core_ops_scheduler = CoreOpsScheduler::create_round_robin(params.device_count, device_ids, device_archs); + auto core_ops_scheduler = CoreOpsScheduler::create_round_robin(device_ids, device_archs); CHECK_EXPECTED(core_ops_scheduler); scheduler_ptr = core_ops_scheduler.release(); } else { @@ -395,34 +415,36 @@ Expected VDeviceBase::configure(Hef &hef, for (const auto &network_params_pair : local_config_params.value()) { std::vector> core_ops; + const bool use_multiplexer = should_use_multiplexer(network_params_pair.second); + std::shared_ptr identical_core_op = nullptr; - if (m_core_ops_scheduler && PipelineMultiplexer::should_use_multiplexer()) { + if (use_multiplexer) { for (auto &network_group : m_vdevice_core_ops) { - if ((network_group->equals(hef, network_params_pair)) && (1 == network_group->get_input_streams().size())) { - // TODO (HRT-8634): Support multi-inputs NGs (multi networks) + if (network_group->multiplexer_supported() && network_group->equals(hef, network_params_pair)) { identical_core_op = network_group; break; } } } - std::shared_ptr vdevice_netwrok_group = nullptr; + std::shared_ptr vdevice_network_group = nullptr; if (identical_core_op) { - auto vdevice_netwrok_group_exp = VDeviceCoreOp::duplicate(identical_core_op); - CHECK_EXPECTED(vdevice_netwrok_group_exp); + auto vdevice_network_group_exp = VDeviceCoreOp::duplicate(identical_core_op); + CHECK_EXPECTED(vdevice_network_group_exp); - vdevice_netwrok_group = vdevice_netwrok_group_exp.release(); - vdevice_netwrok_group->set_core_op_handle(identical_core_op->core_op_handle()); - vdevice_netwrok_group->create_vdevice_streams_from_duplicate(identical_core_op); + vdevice_network_group = vdevice_network_group_exp.release(); + vdevice_network_group->set_core_op_handle(identical_core_op->core_op_handle()); + auto status = vdevice_network_group->create_vdevice_streams_from_duplicate(identical_core_op); + CHECK_SUCCESS_AS_EXPECTED(status); } else { - auto vdevice_netwrok_group_expected = create_vdevice_network_group(hef, network_params_pair); - CHECK_EXPECTED(vdevice_netwrok_group_expected); - vdevice_netwrok_group = vdevice_netwrok_group_expected.release(); - m_vdevice_core_ops.push_back(vdevice_netwrok_group); + auto vdevice_network_group_expected = create_vdevice_network_group(hef, network_params_pair, use_multiplexer); + CHECK_EXPECTED(vdevice_network_group_expected); + vdevice_network_group = vdevice_network_group_expected.release(); + m_vdevice_core_ops.push_back(vdevice_network_group); } - core_ops.push_back(vdevice_netwrok_group); - auto net_flow_ops = hef.pimpl->post_process_ops(vdevice_netwrok_group->name()); - auto net_group_expected = ConfiguredNetworkGroupBase::create(network_params_pair.second, std::move(core_ops), std::move(net_flow_ops)); + core_ops.push_back(vdevice_network_group); + auto metadata = hef.pimpl->network_group_metadata(vdevice_network_group->name()); + auto net_group_expected = ConfiguredNetworkGroupBase::create(network_params_pair.second, std::move(core_ops), std::move(metadata)); CHECK_EXPECTED(net_group_expected); auto network_group_ptr = net_group_expected.release(); @@ -438,9 +460,10 @@ Expected VDeviceBase::configure(Hef &hef, Expected VDeviceBase::get_default_streams_interface() const { - auto stream_interface = m_devices[0]->get_default_streams_interface(); + auto stream_interface = m_devices.begin()->second.get()->get_default_streams_interface(); CHECK_EXPECTED(stream_interface); - for (auto &dev : m_devices) { + for (const auto &pair : m_devices) { + auto &dev = pair.second; auto current_stream_interface = dev->get_default_streams_interface(); CHECK_EXPECTED(current_stream_interface); CHECK_AS_EXPECTED(*current_stream_interface == *stream_interface, HAILO_INTERNAL_FAILURE, @@ -449,10 +472,9 @@ Expected VDeviceBase::get_default_streams_interface() return stream_interface.release(); } -Expected>> VDeviceBase::create_devices(const hailo_vdevice_params_t ¶ms) +Expected>> VDeviceBase::create_devices(const hailo_vdevice_params_t ¶ms) { - std::vector> devices; - devices.reserve(params.device_count); + std::map> devices; const bool user_specific_devices = (params.device_ids != nullptr); @@ -484,7 +506,7 @@ Expected>> VDeviceBase::create_devices(const } CHECK_SUCCESS_AS_EXPECTED(status); } - devices.emplace_back(device.release()); + devices[device_id] = device.release(); } CHECK_AS_EXPECTED(params.device_count == devices.size(), HAILO_OUT_OF_PHYSICAL_DEVICES, "Failed to create vdevice. there are not enough free devices. requested: {}, found: {}", @@ -513,7 +535,8 @@ Expected> VDeviceBase::get_device_ids(const hailo_vdevi Expected VDeviceBase::create_local_config_params(Hef &hef, const NetworkGroupsParamsMap &configure_params) { - for (auto &device : m_devices) { + for (const auto &pair : m_devices) { + auto &device = pair.second; auto status = dynamic_cast(*device).check_hef_is_compatible(hef); CHECK_SUCCESS_AS_EXPECTED(status); } @@ -521,7 +544,7 @@ Expected VDeviceBase::create_local_config_params(Hef &he auto local_config_params = configure_params; if (local_config_params.empty()) { // All stream iface should be the same - auto config_params_exp = m_devices[0]->create_configure_params(hef); + auto config_params_exp = m_devices.begin()->second->create_configure_params(hef); CHECK_EXPECTED(config_params_exp); local_config_params = config_params_exp.release(); } @@ -544,39 +567,77 @@ Expected VDeviceBase::create_local_config_params(Hef &he return local_config_params; } -Expected> VDeviceBase::create_vdevice_network_group(Hef &hef, const std::pair ¶ms) +Expected> VDeviceBase::create_vdevice_network_group(Hef &hef, + const std::pair ¶ms, bool use_multiplexer) { - std::vector> core_ops_bundle; // bundle of the same CoreOps for all devices - core_ops_bundle.reserve(m_devices.size()); + std::map>> core_ops_bundle; // configure all the devices to this ng and then push the core ops to bundle vector - for (auto &device : m_devices) { + for (const auto &pair : m_devices) { + auto &device = pair.second; auto ng_vector = device->configure(hef, { std::make_pair(params.first, params.second) }); CHECK_EXPECTED(ng_vector); assert(1 == ng_vector->size()); auto network_group_base = std::dynamic_pointer_cast(ng_vector.value()[0]); + + auto networks_info = network_group_base->get_network_infos(); + CHECK_EXPECTED(networks_info); + if (m_core_ops_scheduler && 1 < networks_info->size()) { + LOGGER__WARNING("Configuring '{}' which is a multi-networks model with scheduler enabled." + " The model will be scheduled only when all inputs and outputs of the network group will be ready", + network_group_base->name()); + } + auto ng_core_ops = network_group_base->get_core_ops(); + auto &core_ops_vector = core_ops_bundle.emplace(device->get_dev_id(), std::vector>{}).first->second; - core_ops_bundle.insert(core_ops_bundle.begin(), ng_core_ops.begin(), ng_core_ops.end()); + core_ops_vector.insert(core_ops_vector.begin(), ng_core_ops.begin(), ng_core_ops.end()); } - auto vdevice_netwrok_group_exp = VDeviceCoreOp::create(core_ops_bundle, m_core_ops_scheduler, hef.hash()); - CHECK_EXPECTED(vdevice_netwrok_group_exp); - auto vdevice_netwrok_group = vdevice_netwrok_group_exp.release(); + + auto vdevice_network_group_exp = VDeviceCoreOp::create(core_ops_bundle, m_core_ops_scheduler, hef.hash()); + CHECK_EXPECTED(vdevice_network_group_exp); + auto vdevice_network_group = vdevice_network_group_exp.release(); auto ng_handle = INVALID_CORE_OP_HANDLE; if (m_core_ops_scheduler) { - auto core_op_handle_exp = m_core_ops_scheduler->add_core_op(vdevice_netwrok_group); + auto core_op_handle_exp = m_core_ops_scheduler->add_core_op(vdevice_network_group); CHECK_EXPECTED(core_op_handle_exp); ng_handle = core_op_handle_exp.release(); } - vdevice_netwrok_group->set_core_op_handle(ng_handle); - auto status = vdevice_netwrok_group->create_vdevice_streams_from_config_params(make_shared_nothrow(), ng_handle); + vdevice_network_group->set_core_op_handle(ng_handle); + + std::shared_ptr multiplexer = nullptr; + if (use_multiplexer) { + multiplexer = make_shared_nothrow(); + CHECK_NOT_NULL_AS_EXPECTED(multiplexer, HAILO_OUT_OF_HOST_MEMORY); + } + + auto status = vdevice_network_group->create_vdevice_streams_from_config_params(multiplexer, ng_handle); CHECK_SUCCESS_AS_EXPECTED(status); - return vdevice_netwrok_group; + return vdevice_network_group; } +bool VDeviceBase::should_use_multiplexer(const ConfigureNetworkParams &network_params) +{ + const auto &stream_params_by_name = network_params.stream_params_by_name; + const auto input_counts = std::count_if(stream_params_by_name.begin(), stream_params_by_name.end(), + [](const std::pair &stream_params) { + return HAILO_H2D_STREAM == stream_params.second.direction; + }); + + const bool has_async_stream = std::any_of(stream_params_by_name.begin(), stream_params_by_name.end(), + [](const std::pair &stream_params) { + return 0 != (stream_params.second.flags & HAILO_STREAM_FLAGS_ASYNC); + }); + + return + PipelineMultiplexer::is_multiplexer_supported() && + m_core_ops_scheduler && + input_counts == 1 && // TODO (HRT-8634): Support multi-inputs NGs (multi networks) + !has_async_stream; // TODO (HRT-10557): Support async multiplexer +} } /* namespace hailort */ diff --git a/hailort/libhailort/src/vdevice/vdevice_core_op.cpp b/hailort/libhailort/src/vdevice/vdevice_core_op.cpp index a37c01d..18e3715 100644 --- a/hailort/libhailort/src/vdevice/vdevice_core_op.cpp +++ b/hailort/libhailort/src/vdevice/vdevice_core_op.cpp @@ -18,7 +18,7 @@ namespace hailort { Expected> VDeviceActivatedCoreOp::create( - std::vector> &core_ops, + std::map>> &core_ops, std::map> &input_streams, std::map> &output_streams, const hailo_activate_network_group_params_t &network_group_params, @@ -29,11 +29,14 @@ Expected> VDeviceActivatedCoreOp::create( auto status = HAILO_UNINITIALIZED; std::vector> activated_network_groups; activated_network_groups.reserve(core_ops.size()); - for (auto core_op : core_ops) { - auto ang = core_op->create_activated_network_group(network_group_params, dynamic_batch_size, - resume_pending_stream_transfers); - CHECK_EXPECTED(ang); - activated_network_groups.emplace_back(ang.release()); + for (const auto &pair : core_ops) { + auto &core_op_vector = pair.second; + for (auto &core_op : core_op_vector) { + auto ang = core_op->create_activated_network_group(network_group_params, dynamic_batch_size, + resume_pending_stream_transfers); + CHECK_EXPECTED(ang); + activated_network_groups.emplace_back(ang.release()); + } } auto ang = VDeviceActivatedCoreOp(std::move(activated_network_groups), input_streams, output_streams, network_group_params, core_op_activated_event, deactivation_time_accumulator, status); @@ -87,7 +90,7 @@ VDeviceActivatedCoreOp::VDeviceActivatedCoreOp(VDeviceActivatedCoreOp &&other) n } -Expected> VDeviceCoreOp::create(std::vector> core_ops, +Expected> VDeviceCoreOp::create(const std::map>> &core_ops, CoreOpsSchedulerWeakPtr core_ops_scheduler, const std::string &hef_hash) { auto status = HAILO_UNINITIALIZED; @@ -116,9 +119,9 @@ Expected> VDeviceCoreOp::duplicate(std::shared_pt } -VDeviceCoreOp::VDeviceCoreOp(std::vector> core_ops, +VDeviceCoreOp::VDeviceCoreOp(const std::map>> &core_ops, CoreOpsSchedulerWeakPtr core_ops_scheduler, const std::string &hef_hash, hailo_status &status) : - CoreOp(core_ops[0]->m_config_params, core_ops[0]->m_metadata, status), + CoreOp((core_ops.begin()->second)[0]->m_config_params, (core_ops.begin()->second)[0]->m_metadata, status), m_core_ops(std::move(core_ops)), m_core_ops_scheduler(core_ops_scheduler), m_scheduler_handle(INVALID_CORE_OP_HANDLE), @@ -129,21 +132,25 @@ VDeviceCoreOp::VDeviceCoreOp(std::vector> core_ops, Expected VDeviceCoreOp::get_default_streams_interface() { - auto first_streams_interface = m_core_ops[0]->get_default_streams_interface(); + auto first_streams_interface = (m_core_ops.begin()->second)[0]->get_default_streams_interface(); CHECK_EXPECTED(first_streams_interface); #ifndef NDEBUG // Check that all physical devices has the same interface - for (auto &core_op : m_core_ops) { - auto iface = core_op->get_default_streams_interface(); - CHECK_EXPECTED(iface); - CHECK_AS_EXPECTED(iface.value() == first_streams_interface.value(), HAILO_INTERNAL_FAILURE, - "Not all default stream interfaces are the same"); + for (const auto &pair : m_core_ops) { + auto &core_op_vector = pair.second; + for (auto &core_op : core_op_vector) { + auto iface = core_op->get_default_streams_interface(); + CHECK_EXPECTED(iface); + CHECK_AS_EXPECTED(iface.value() == first_streams_interface.value(), HAILO_INTERNAL_FAILURE, + "Not all default stream interfaces are the same"); + } } #endif return first_streams_interface; } -hailo_status VDeviceCoreOp::create_vdevice_streams_from_config_params(std::shared_ptr multiplexer, scheduler_core_op_handle_t scheduler_handle) +hailo_status VDeviceCoreOp::create_vdevice_streams_from_config_params(std::shared_ptr multiplexer, + scheduler_core_op_handle_t scheduler_handle) { // TODO - HRT-6931 - raise error on this case if (((m_config_params.latency & HAILO_LATENCY_MEASURE) == HAILO_LATENCY_MEASURE) && (1 < m_core_ops.size())) { @@ -183,8 +190,11 @@ hailo_status VDeviceCoreOp::create_vdevice_streams_from_config_params(std::share TRACE(CreateCoreOpInputStreamsTrace, "", name(), input_stream.first, (uint32_t)expected_queue_size.value()); } for (const auto &output_stream : m_output_streams) { - if ((hailo_format_order_t::HAILO_FORMAT_ORDER_HAILO_NMS == (static_cast(*output_stream.second).get_layer_info().format.order)) || - (HAILO_STREAM_INTERFACE_ETH == static_cast(*output_stream.second).get_interface())) { + if (hailo_format_order_t::HAILO_FORMAT_ORDER_HAILO_NMS == (static_cast(*output_stream.second).get_layer_info().format.order)) { + TRACE(CreateCoreOpOutputStreamsTrace, "", name(), output_stream.first, SCHEDULER_MON_NAN_VAL); + continue; + } + if (HAILO_STREAM_INTERFACE_ETH == static_cast(*output_stream.second).get_interface()) { continue; } auto expected_queue_size = static_cast(*output_stream.second).get_buffer_frames_size(); @@ -192,8 +202,10 @@ hailo_status VDeviceCoreOp::create_vdevice_streams_from_config_params(std::share TRACE(CreateCoreOpOutputStreamsTrace, "", name(), output_stream.first, (uint32_t)expected_queue_size.value()); } - auto status = m_multiplexer->add_core_op_instance(m_multiplexer_handle, *this); - CHECK_SUCCESS(status); + if (m_multiplexer) { + auto status = m_multiplexer->add_core_op_instance(m_multiplexer_handle, *this); + CHECK_SUCCESS(status); + } return HAILO_SUCCESS; } @@ -204,27 +216,36 @@ hailo_status VDeviceCoreOp::create_input_vdevice_stream_from_config_params(const auto edge_layer = get_layer_info(stream_name); CHECK_EXPECTED_AS_STATUS(edge_layer); - if (HailoRTCommon::is_vdma_stream_interface(stream_params.stream_interface)){ - std::vector> low_level_streams; - low_level_streams.reserve(m_core_ops.size()); - for (auto &core_op : m_core_ops) { - auto stream = core_op->get_input_stream_by_name(stream_name); - CHECK(stream, HAILO_INTERNAL_FAILURE); - low_level_streams.emplace_back(dynamic_cast(stream.release().get())); + if (HailoRTCommon::is_vdma_stream_interface(stream_params.stream_interface)) { + std::map> low_level_streams; + for (const auto &pair : m_core_ops) { + auto &device_id = pair.first; + auto &core_op_vector = pair.second; + for (auto &core_op : core_op_vector) { + auto stream = core_op->get_input_stream_by_name(stream_name); + CHECK(stream, HAILO_INTERNAL_FAILURE); + low_level_streams.emplace(device_id, dynamic_cast(stream.release().get())); + } } - auto input_stream = InputVDeviceBaseStream::create(std::move(low_level_streams), edge_layer.value(), - scheduler_handle, m_core_op_activated_event, m_core_ops_scheduler); + auto input_stream = VDeviceInputStreamBase::create(std::move(low_level_streams), stream_params, + edge_layer.value(), scheduler_handle, m_core_op_activated_event, m_core_ops_scheduler); CHECK_EXPECTED_AS_STATUS(input_stream); - auto input_stream_wrapper = VDeviceInputStreamMultiplexerWrapper::create(input_stream.release(), edge_layer->network_name, multiplexer, scheduler_handle); - CHECK_EXPECTED_AS_STATUS(input_stream_wrapper); - m_input_streams.insert(make_pair(stream_name, input_stream_wrapper.release())); + + if (multiplexer) { + auto input_stream_wrapper = VDeviceInputStreamMultiplexerWrapper::create(input_stream.release(), edge_layer->network_name, multiplexer, scheduler_handle); + CHECK_EXPECTED_AS_STATUS(input_stream_wrapper); + m_input_streams.insert(make_pair(stream_name, input_stream_wrapper.release())); + } else { + m_input_streams.insert(make_pair(stream_name, input_stream.release())); + } + } else { assert(1 == m_core_ops.size()); - auto stream = m_core_ops[0]->get_input_stream_by_name(stream_name); + auto stream = (m_core_ops.begin()->second)[0]->get_input_stream_by_name(stream_name); CHECK(stream, HAILO_INTERNAL_FAILURE); assert(1 == m_core_ops.size()); - assert(contains(m_core_ops[0]->m_input_streams, stream_name)); - m_input_streams.insert(make_pair(stream_name, m_core_ops[0]->m_input_streams.at(stream_name))); + assert(contains((m_core_ops.begin()->second)[0]->m_input_streams, stream_name)); + m_input_streams.insert(make_pair(stream_name, m_core_ops.begin()->second[0]->m_input_streams.at(stream_name))); } return HAILO_SUCCESS; @@ -237,23 +258,32 @@ hailo_status VDeviceCoreOp::create_output_vdevice_stream_from_config_params(cons CHECK_EXPECTED_AS_STATUS(edge_layer); if (HailoRTCommon::is_vdma_stream_interface(stream_params.stream_interface)) { - std::vector> low_level_streams; - low_level_streams.reserve(m_core_ops.size()); - for (auto &core_op : m_core_ops) { - auto stream = core_op->get_output_stream_by_name(stream_name); - CHECK(stream, HAILO_INTERNAL_FAILURE); - low_level_streams.emplace_back(dynamic_cast(stream.release().get())); + std::map> low_level_streams; + for (const auto &pair : m_core_ops) { + auto &device_id = pair.first; + auto &core_op_vector = pair.second; + for (auto &core_op : core_op_vector) { + auto stream = core_op->get_output_stream_by_name(stream_name); + CHECK(stream, HAILO_INTERNAL_FAILURE); + low_level_streams.emplace(device_id, dynamic_cast(stream.release().get())); + } } - auto output_stream = OutputVDeviceBaseStream::create(std::move(low_level_streams), edge_layer.value(), - scheduler_handle, m_core_op_activated_event, m_core_ops_scheduler); + auto output_stream = VDeviceOutputStreamBase::create(std::move(low_level_streams), stream_params, + edge_layer.value(), scheduler_handle, m_core_op_activated_event, m_core_ops_scheduler); CHECK_EXPECTED_AS_STATUS(output_stream); - auto output_stream_wrapper = VDeviceOutputStreamMultiplexerWrapper::create(output_stream.release(), edge_layer->network_name, multiplexer, scheduler_handle); - CHECK_EXPECTED_AS_STATUS(output_stream_wrapper); - m_output_streams.insert(make_pair(stream_name, output_stream_wrapper.release())); + + if (multiplexer) { + // We allow multiplexer only on scheduled streams. + auto output_stream_wrapper = VDeviceOutputStreamMultiplexerWrapper::create(output_stream.release(), edge_layer->network_name, multiplexer, scheduler_handle); + CHECK_EXPECTED_AS_STATUS(output_stream_wrapper); + m_output_streams.insert(make_pair(stream_name, output_stream_wrapper.release())); + } else { + m_output_streams.insert(make_pair(stream_name, output_stream.release())); + } } else { assert(1 == m_core_ops.size()); - assert(contains(m_core_ops[0]->m_output_streams, stream_name)); - m_output_streams.insert(make_pair(stream_name, m_core_ops[0]->m_output_streams.at(stream_name))); + assert(contains((m_core_ops.begin()->second)[0]->m_output_streams, stream_name)); + m_output_streams.insert(make_pair(stream_name, (m_core_ops.begin()->second)[0]->m_output_streams.at(stream_name))); } return HAILO_SUCCESS; @@ -266,6 +296,7 @@ hailo_status VDeviceCoreOp::create_vdevice_streams_from_duplicate(std::shared_pt LOGGER__WARNING("Latency measurement is not supported on more than 1 physical device."); } + assert(other->m_multiplexer != nullptr); m_multiplexer = other->m_multiplexer; m_multiplexer_handle = other->multiplexer_duplicates_count() + 1; @@ -347,7 +378,7 @@ hailo_status VDeviceCoreOp::set_scheduler_priority(uint8_t priority, const std:: Expected> VDeviceCoreOp::get_latency_meters() { - return m_core_ops[0]->get_latency_meters(); + return m_core_ops.begin()->second[0]->get_latency_meters(); } Expected VDeviceCoreOp::get_boundary_vdma_channel_by_stream_name(const std::string &stream_name) @@ -355,7 +386,7 @@ Expected VDeviceCoreOp::get_boundary_vdma_channel_by_s CHECK_AS_EXPECTED(1 == m_core_ops.size(), HAILO_INVALID_OPERATION, "get_boundary_vdma_channel_by_stream_name function is not supported on more than 1 physical device."); - return m_core_ops[0]->get_boundary_vdma_channel_by_stream_name(stream_name); + return m_core_ops.begin()->second[0]->get_boundary_vdma_channel_by_stream_name(stream_name); } void VDeviceCoreOp::set_vstreams_multiplexer_callbacks(std::vector &output_vstreams) @@ -376,10 +407,10 @@ void VDeviceCoreOp::set_vstreams_multiplexer_callbacks(std::vector> VDeviceCoreOp::get_core_op_by_device_index(uint32_t device_index) +Expected> VDeviceCoreOp::get_core_op_by_device_id(const device_id_t &device_id) { - CHECK_AS_EXPECTED(device_index < m_core_ops.size(), HAILO_INVALID_ARGUMENT); - auto core_op = std::dynamic_pointer_cast(m_core_ops[device_index]); + CHECK_AS_EXPECTED(m_core_ops.count(device_id), HAILO_INVALID_ARGUMENT); + auto core_op = std::dynamic_pointer_cast(m_core_ops[device_id][0]); CHECK_NOT_NULL_AS_EXPECTED(core_op, HAILO_INTERNAL_FAILURE); return core_op; } @@ -407,4 +438,11 @@ Expected> VDeviceCoreOp::create_activated return res; } +Expected VDeviceCoreOp::run_hw_infer_estimator() +{ + CHECK_AS_EXPECTED(1 == m_core_ops.size(), HAILO_INVALID_OPERATION, + "run_hw_infer_estimator function is not supported on more than 1 physical device."); + return m_core_ops.begin()->second[0]->run_hw_infer_estimator(); +} + } /* namespace hailort */ diff --git a/hailort/libhailort/src/vdevice/vdevice_core_op.hpp b/hailort/libhailort/src/vdevice/vdevice_core_op.hpp index 6fb1837..e93c5e8 100644 --- a/hailort/libhailort/src/vdevice/vdevice_core_op.hpp +++ b/hailort/libhailort/src/vdevice/vdevice_core_op.hpp @@ -16,7 +16,7 @@ #include "hailo/network_group.hpp" #include "hailo/vstream.hpp" -#include "vdevice/scheduler/network_group_scheduler.hpp" +#include "vdevice/scheduler/scheduler.hpp" #include "vdevice/pipeline_multiplexer.hpp" #include @@ -28,7 +28,7 @@ namespace hailort class VDeviceActivatedCoreOp : public ActivatedCoreOp { public: - static Expected> create(std::vector> &core_ops, + static Expected> create(std::map>> &core_ops, std::map> &input_streams, std::map> &output_streams, const hailo_activate_network_group_params_t &network_group_params, EventPtr core_op_activated_event, @@ -76,7 +76,7 @@ private: class VDeviceCoreOp : public CoreOp { public: - static Expected> create(std::vector> core_ops, + static Expected> create(const std::map>> &core_ops, CoreOpsSchedulerWeakPtr core_ops_scheduler, const std::string &hef_hash); static Expected> duplicate(std::shared_ptr other); @@ -112,10 +112,19 @@ public: return false; } - uint32_t multiplexer_duplicates_count() + uint32_t multiplexer_duplicates_count() const { - assert(m_multiplexer->instances_count() > 0); - return static_cast(m_multiplexer->instances_count() - 1); + if (m_multiplexer) { + assert(m_multiplexer->instances_count() > 0); + return static_cast(m_multiplexer->instances_count() - 1); + } else { + return 0; + } + } + + bool multiplexer_supported() const + { + return nullptr != m_multiplexer; } virtual Expected get_default_streams_interface() override; @@ -155,13 +164,15 @@ public: const hailo_activate_network_group_params_t &network_group_params, uint16_t dynamic_batch_size, bool resume_pending_stream_transfers) override; - Expected> get_core_op_by_device_index(uint32_t device_index); + Expected> get_core_op_by_device_id(const device_id_t &device_bdf_id); + + virtual Expected run_hw_infer_estimator() override; private: - VDeviceCoreOp(std::vector> core_ops, CoreOpsSchedulerWeakPtr core_ops_scheduler, + VDeviceCoreOp(const std::map>> &core_ops, CoreOpsSchedulerWeakPtr core_ops_scheduler, const std::string &hef_hash, hailo_status &status); - std::vector> m_core_ops; + std::map>> m_core_ops; CoreOpsSchedulerWeakPtr m_core_ops_scheduler; scheduler_core_op_handle_t m_scheduler_handle; multiplexer_core_op_handle_t m_multiplexer_handle; diff --git a/hailort/libhailort/src/vdevice/vdevice_internal.hpp b/hailort/libhailort/src/vdevice/vdevice_internal.hpp index 22c2948..a2ba698 100644 --- a/hailort/libhailort/src/vdevice/vdevice_internal.hpp +++ b/hailort/libhailort/src/vdevice/vdevice_internal.hpp @@ -27,7 +27,7 @@ #include "vdma/vdma_device.hpp" #include "vdma/vdma_config_manager.hpp" #include "vdevice/vdevice_core_op.hpp" -#include "vdevice/scheduler/network_group_scheduler.hpp" +#include "vdevice/scheduler/scheduler.hpp" #ifdef HAILO_SUPPORT_MULTI_PROCESS #include "service/hailort_rpc_client.hpp" @@ -55,7 +55,8 @@ public: { // Return Expected for future functionality std::vector> devices_refs; - for (auto &device : m_devices) { + for (const auto &pair : m_devices) { + auto &device = pair.second; devices_refs.push_back(*device); } return devices_refs; @@ -65,8 +66,9 @@ public: { std::vector device_ids; device_ids.reserve(m_devices.size()); - for (auto &device : m_devices) { - device_ids.push_back(device.get()->get_dev_id()); + for (const auto &pair : m_devices) { + auto &id = pair.first; + device_ids.push_back(id); } return device_ids; } @@ -82,16 +84,18 @@ public: static hailo_status validate_params(const hailo_vdevice_params_t ¶ms); private: - VDeviceBase(std::vector> &&devices, CoreOpsSchedulerPtr core_ops_scheduler) : + VDeviceBase(std::map> &&devices, CoreOpsSchedulerPtr core_ops_scheduler) : m_devices(std::move(devices)), m_core_ops_scheduler(core_ops_scheduler) {} - static Expected>> create_devices(const hailo_vdevice_params_t ¶ms); + static Expected>> create_devices(const hailo_vdevice_params_t ¶ms); static Expected> get_device_ids(const hailo_vdevice_params_t ¶ms); Expected create_local_config_params(Hef &hef, const NetworkGroupsParamsMap &configure_params); - Expected> create_vdevice_network_group(Hef &hef, const std::pair ¶ms); + Expected> create_vdevice_network_group(Hef &hef, + const std::pair ¶ms, bool use_multiplexer); + bool should_use_multiplexer(const ConfigureNetworkParams ¶ms); - std::vector> m_devices; + std::map> m_devices; CoreOpsSchedulerPtr m_core_ops_scheduler; std::vector> m_vdevice_core_ops; std::vector> m_network_groups; // TODO: HRT-9547 - Remove when ConfiguredNetworkGroup will be kept in global context @@ -124,12 +128,13 @@ public: virtual hailo_status after_fork_in_child() override; private: - VDeviceClient(std::unique_ptr client, uint32_t handle); + VDeviceClient(std::unique_ptr client, uint32_t handle, std::vector> &&devices); hailo_status create_client(); std::unique_ptr m_client; uint32_t m_handle; + std::vector> m_devices; std::vector> m_network_groups; }; diff --git a/hailort/libhailort/src/vdevice/vdevice_native_stream.cpp b/hailort/libhailort/src/vdevice/vdevice_native_stream.cpp new file mode 100644 index 0000000..2022c18 --- /dev/null +++ b/hailort/libhailort/src/vdevice/vdevice_native_stream.cpp @@ -0,0 +1,259 @@ +/** + * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved. + * Distributed under the MIT license (https://opensource.org/licenses/MIT) + **/ +/** + * @file vdevice_native_stream.cpp + * @brief Internal stream implementation for native streams + * + **/ + +#include "vdevice_native_stream.hpp" + +namespace hailort { + +/** Input stream **/ +hailo_status VDeviceNativeInputStreamBase::abort() +{ + auto status = HAILO_SUCCESS; // Best effort + for (auto &pair: m_streams){ + auto &stream = pair.second; + auto abort_status = stream.get().abort(); + if (HAILO_SUCCESS != status) { + LOGGER__ERROR("Failed to abort input stream. (status: {} device: {})", status, stream.get().get_dev_id()); + status = abort_status; + } + } + return status; +} + +hailo_status VDeviceNativeInputStreamBase::clear_abort() +{ + auto status = HAILO_SUCCESS; // Best effort + for (auto &pair: m_streams){ + auto &stream = pair.second; + auto clear_abort_status = stream.get().clear_abort(); + if ((HAILO_SUCCESS != clear_abort_status) && (HAILO_STREAM_NOT_ACTIVATED != clear_abort_status)) { + LOGGER__ERROR("Failed to clear abort input stream. (status: {} device: {})", clear_abort_status, stream.get().get_dev_id()); + status = clear_abort_status; + } + } + + return status; +} + +Expected> VDeviceNativeInputStream::create( + std::map> &&streams, + EventPtr &&core_op_activated_event, + const LayerInfo &layer_info) +{ + auto status = HAILO_UNINITIALIZED; + auto stream = make_unique_nothrow(std::move(streams), + std::move(core_op_activated_event), layer_info, status); + CHECK_AS_EXPECTED((nullptr != stream), HAILO_OUT_OF_HOST_MEMORY); + CHECK_SUCCESS_AS_EXPECTED(status); + return stream; +} + +hailo_status VDeviceNativeInputStream::write_impl(const MemoryView &buffer, const std::function &should_cancel) +{ + if (should_cancel()) { + return HAILO_STREAM_ABORTED_BY_USER; + } + auto status = m_streams.at(m_next_transfer_stream).get().write_impl(buffer); + if (HAILO_SUCCESS != status) { + LOGGER__INFO("Write to stream has failed! status = {}", status); + return status; + } + + // Update m_next_transfer_stream only if 'batch' frames has been transferred + if (0 == (++m_acc_frames % m_streams.begin()->second.get().get_dynamic_batch_size())) { + auto it = m_streams.upper_bound(m_next_transfer_stream); + if (m_streams.end() == it) { + it = m_streams.begin(); + } + m_next_transfer_stream = it->first; + m_acc_frames = 0; + } + return HAILO_SUCCESS; +} + +Expected> VDeviceNativeAsyncInputStream::create( + std::map> &&streams, + EventPtr &&core_op_activated_event, + const LayerInfo &layer_info) +{ + auto max_queue_size_per_stream = streams.begin()->second.get().get_buffer_frames_size(); + CHECK_EXPECTED(max_queue_size_per_stream); + const auto max_queue_size = max_queue_size_per_stream.value() * streams.size(); + + auto status = HAILO_UNINITIALIZED; + auto stream = make_unique_nothrow(std::move(streams), + std::move(core_op_activated_event), layer_info, max_queue_size, status); + CHECK_AS_EXPECTED((nullptr != stream), HAILO_OUT_OF_HOST_MEMORY); + CHECK_SUCCESS_AS_EXPECTED(status); + return stream; +} + +hailo_status VDeviceNativeAsyncInputStream::wait_for_async_ready(size_t transfer_size, std::chrono::milliseconds timeout) +{ + return m_streams.at(m_next_transfer_stream).get().wait_for_async_ready(transfer_size, timeout); +} + +Expected VDeviceNativeAsyncInputStream::get_async_max_queue_size() const +{ + return Expected(m_max_queue_size); +} + +hailo_status VDeviceNativeAsyncInputStream::write_async(TransferRequest &&transfer_request) +{ + // TODO HRT-10583 - allow option to remove reorder queue + transfer_request.callback = m_callback_reorder_queue.wrap_callback(transfer_request.callback); + + auto status = m_streams.at(m_next_transfer_stream).get().write_async(std::move(transfer_request)); + if (HAILO_SUCCESS != status) { + m_callback_reorder_queue.cancel_last_callback(); + return status; + } + + // Update m_next_transfer_stream_index only if 'batch' frames has been transferred + if (0 == (++m_acc_frames % m_streams.begin()->second.get().get_dynamic_batch_size())) { + auto it = m_streams.upper_bound(m_next_transfer_stream); + if (m_streams.end() == it) { + it = m_streams.begin(); + } + m_next_transfer_stream = it->first; + m_acc_frames = 0; + } + return HAILO_SUCCESS; +} + +hailo_status VDeviceNativeAsyncInputStream::write_impl(const MemoryView &, const std::function &) +{ + LOGGER__ERROR("Sync write is not supported by async streams"); + return HAILO_INVALID_OPERATION; +} + +/** Output stream **/ +hailo_status VDeviceNativeOutputStreamBase::abort() +{ + auto status = HAILO_SUCCESS; // Best effort + for (const auto &pair : m_streams) { + auto &stream = pair.second; + auto abort_status = stream.get().abort(); + if (HAILO_SUCCESS != status) { + LOGGER__ERROR("Failed to abort output stream. (status: {} device: {})", status, stream.get().get_dev_id()); + status = abort_status; + } + } + + return status; +} + +hailo_status VDeviceNativeOutputStreamBase::clear_abort() +{ + auto status = HAILO_SUCCESS; // Best effort + for (const auto &pair : m_streams) { + auto &stream = pair.second; + auto clear_abort_status = stream.get().clear_abort(); + if ((HAILO_SUCCESS != clear_abort_status) && (HAILO_STREAM_NOT_ACTIVATED != clear_abort_status)) { + LOGGER__ERROR("Failed to clear abort output stream. (status: {} device: {})", clear_abort_status, stream.get().get_dev_id()); + status = clear_abort_status; + } + } + + return status; +} + +Expected> VDeviceNativeOutputStream::create( + std::map> &&streams, + EventPtr &&core_op_activated_event, + const LayerInfo &layer_info) +{ + auto status = HAILO_UNINITIALIZED; + auto stream = make_unique_nothrow(std::move(streams), + std::move(core_op_activated_event), layer_info, status); + CHECK_AS_EXPECTED((nullptr != stream), HAILO_OUT_OF_HOST_MEMORY); + CHECK_SUCCESS_AS_EXPECTED(status); + return stream; +} + +hailo_status VDeviceNativeOutputStream::read(MemoryView buffer) +{ + auto status = m_streams.at(m_next_transfer_stream).get().read(buffer); + if (HAILO_STREAM_ABORTED_BY_USER == status) { + // In case of aborted by user, don't show it as infer error + LOGGER__INFO("Stream aborted by user (device: {})", m_streams.at(m_next_transfer_stream).get().get_dev_id()); + return status; + } + CHECK_SUCCESS(status, "Read from stream has failed! status = {}", status); + + // Update m_next_transfer_stream_index only if 'batch' frames has been transferred + if (0 == (++m_acc_frames % m_streams.begin()->second.get().get_dynamic_batch_size())) { + auto it = m_streams.upper_bound(m_next_transfer_stream); + if (m_streams.end() == it) { + it = m_streams.begin(); + } + m_next_transfer_stream = it->first; + m_acc_frames = 0; + } + + return HAILO_SUCCESS; +} + +Expected> VDeviceNativeAsyncOutputStream::create( + std::map> &&streams, + EventPtr &&core_op_activated_event, + const LayerInfo &layer_info) +{ + auto max_queue_size_per_stream = streams.begin()->second.get().get_buffer_frames_size(); + CHECK_EXPECTED(max_queue_size_per_stream); + const auto max_queue_size = max_queue_size_per_stream.value() * streams.size(); + + auto status = HAILO_UNINITIALIZED; + auto stream = make_unique_nothrow(std::move(streams), + std::move(core_op_activated_event), layer_info, max_queue_size, status); + CHECK_AS_EXPECTED((nullptr != stream), HAILO_OUT_OF_HOST_MEMORY); + CHECK_SUCCESS_AS_EXPECTED(status); + return stream; +} + +hailo_status VDeviceNativeAsyncOutputStream::wait_for_async_ready(size_t transfer_size, std::chrono::milliseconds timeout) +{ + return m_streams.at(m_next_transfer_stream).get().wait_for_async_ready(transfer_size, timeout); +} + +Expected VDeviceNativeAsyncOutputStream::get_async_max_queue_size() const +{ + return Expected(m_max_queue_size); +} + +hailo_status VDeviceNativeAsyncOutputStream::read_async(TransferRequest &&transfer_request) +{ + // TODO HRT-10583 - allow option to remove reorder queue + transfer_request.callback = m_callback_reorder_queue.wrap_callback(transfer_request.callback); + auto status = m_streams.at(m_next_transfer_stream).get().read_async(std::move(transfer_request)); + if (HAILO_SUCCESS != status) { + m_callback_reorder_queue.cancel_last_callback(); + return status; + } + // Update m_next_transfer_stream_index only if 'batch' frames has been transferred + if (0 == (++m_acc_frames % m_streams.begin()->second.get().get_dynamic_batch_size())) { + auto it = m_streams.upper_bound(m_next_transfer_stream); + if (m_streams.end() == it) { + it = m_streams.begin(); + } + m_next_transfer_stream = it->first; + m_acc_frames = 0; + } + + return HAILO_SUCCESS; +} + +hailo_status VDeviceNativeAsyncOutputStream::read(MemoryView) +{ + LOGGER__ERROR("The read function is not supported by async streams"); + return HAILO_INVALID_OPERATION; +} + +} /* namespace hailort */ \ No newline at end of file diff --git a/hailort/libhailort/src/vdevice/vdevice_native_stream.hpp b/hailort/libhailort/src/vdevice/vdevice_native_stream.hpp index 5ddfe8c..61ce42c 100644 --- a/hailort/libhailort/src/vdevice/vdevice_native_stream.hpp +++ b/hailort/libhailort/src/vdevice/vdevice_native_stream.hpp @@ -16,48 +16,128 @@ #include "stream_common/stream_internal.hpp" #include "vdevice_stream.hpp" +#include "vdevice/callback_reorder_queue.hpp" namespace hailort { -class InputVDeviceNativeStream : public InputVDeviceBaseStream { + +class VDeviceNativeInputStreamBase : public VDeviceInputStreamBase { public: - InputVDeviceNativeStream( - std::vector> &&streams, + static Expected> create( + std::map> &&streams, + EventPtr &&core_op_activated_event, + const LayerInfo &layer_info); + + VDeviceNativeInputStreamBase( + std::map> &&streams, EventPtr &&core_op_activated_event, const LayerInfo &layer_info, hailo_status &status) : - InputVDeviceBaseStream(std::move(streams), std::move(core_op_activated_event), layer_info, status) + VDeviceInputStreamBase(std::move(streams), std::move(core_op_activated_event), layer_info, status) {} virtual hailo_status abort() override; virtual hailo_status clear_abort() override; virtual bool is_scheduled() override { return false; }; +}; + +class VDeviceNativeInputStream : public VDeviceNativeInputStreamBase { +public: + static Expected> create( + std::map> &&streams, + EventPtr &&core_op_activated_event, + const LayerInfo &layer_info); + + using VDeviceNativeInputStreamBase::VDeviceNativeInputStreamBase; protected: - virtual Expected sync_write_raw_buffer(const MemoryView &buffer, - const std::function &should_cancel = []() { return false; }) override; + virtual hailo_status write_impl(const MemoryView &buffer, const std::function &should_cancel) override;\ }; -class OutputVDeviceNativeStream : public OutputVDeviceBaseStream { +class VDeviceNativeAsyncInputStream : public VDeviceNativeInputStreamBase { public: - OutputVDeviceNativeStream( - std::vector> &&streams, + static Expected> create( + std::map> &&streams, + EventPtr &&core_op_activated_event, + const LayerInfo &layer_info); + + VDeviceNativeAsyncInputStream( + std::map> &&streams, + EventPtr &&core_op_activated_event, const LayerInfo &layer_info, + size_t max_queue_size, + hailo_status &status) : + VDeviceNativeInputStreamBase(std::move(streams), std::move(core_op_activated_event), layer_info, status), + m_callback_reorder_queue(max_queue_size), // TODO HRT-1058 - use reorder queue only when needed + m_max_queue_size(max_queue_size) + {} + + virtual hailo_status wait_for_async_ready(size_t transfer_size, std::chrono::milliseconds timeout) override; + virtual hailo_status write_async(TransferRequest &&transfer_request) override; + virtual Expected get_async_max_queue_size() const override; + +protected: + virtual hailo_status write_impl(const MemoryView &buffer, const std::function &should_cancel) override; + +private: + CallbackReorderQueue m_callback_reorder_queue; + const size_t m_max_queue_size; +}; + +class VDeviceNativeOutputStreamBase : public VDeviceOutputStreamBase { +public: + VDeviceNativeOutputStreamBase( + std::map> &&streams, EventPtr &&core_op_activated_event, + const LayerInfo &layer_info, hailo_status &status) : - OutputVDeviceBaseStream(std::move(streams), layer_info, std::move(core_op_activated_event), status) + VDeviceOutputStreamBase(std::move(streams), layer_info, std::move(core_op_activated_event), status) {} virtual hailo_status abort() override; virtual hailo_status clear_abort() override; virtual bool is_scheduled() override { return false; }; +}; -protected: - virtual hailo_status read(MemoryView buffer) override;; +class VDeviceNativeOutputStream : public VDeviceNativeOutputStreamBase { +public: + static Expected> create( + std::map> &&streams, + EventPtr &&core_op_activated_event, const LayerInfo &layer_info); + + using VDeviceNativeOutputStreamBase::VDeviceNativeOutputStreamBase; + virtual hailo_status read(MemoryView buffer) override; }; +class VDeviceNativeAsyncOutputStream : public VDeviceNativeOutputStreamBase { +public: + static Expected> create( + std::map> &&streams, + EventPtr &&core_op_activated_event, const LayerInfo &layer_info); + + VDeviceNativeAsyncOutputStream( + std::map> &&streams, + EventPtr &&core_op_activated_event, + const LayerInfo &layer_info, + size_t max_queue_size, + hailo_status &status) : + VDeviceNativeOutputStreamBase(std::move(streams), std::move(core_op_activated_event), layer_info, status), + m_callback_reorder_queue(max_queue_size), // TODO HRT-1058 - use reorder queue only when needed + m_max_queue_size(max_queue_size) + {} + + virtual hailo_status wait_for_async_ready(size_t transfer_size, std::chrono::milliseconds timeout) override; + virtual hailo_status read_async(TransferRequest &&transfer_request) override; + virtual Expected get_async_max_queue_size() const override; + virtual hailo_status read(MemoryView buffer) override; + +private: + CallbackReorderQueue m_callback_reorder_queue; + const size_t m_max_queue_size; + }; + } /* namespace hailort */ #endif /* HAILO_VDEVICE_NATIVE_STREAM_HPP_ */ diff --git a/hailort/libhailort/src/vdevice/vdevice_stream.cpp b/hailort/libhailort/src/vdevice/vdevice_stream.cpp index f50ec24..6123597 100644 --- a/hailort/libhailort/src/vdevice/vdevice_stream.cpp +++ b/hailort/libhailort/src/vdevice/vdevice_stream.cpp @@ -16,7 +16,6 @@ #include "common/utils.hpp" -#include "utils/profiler/tracer_macros.hpp" #include "vdevice/vdevice_stream.hpp" #include "vdevice/vdevice_native_stream.hpp" #include "vdevice/scheduler/multi_device_scheduled_stream.hpp" @@ -29,22 +28,8 @@ namespace hailort { -hailo_status InputVDeviceBaseStream::deactivate_stream() -{ - auto status = HAILO_SUCCESS; // Best effort - for (auto &stream : m_streams) { - auto deactivate_status = stream.get().deactivate_stream(); - if (HAILO_SUCCESS != deactivate_status) { - LOGGER__ERROR("Failed to deactivate input stream. (status: {} device: {})", deactivate_status, stream.get().get_dev_id()); - status = deactivate_status; - } - } - m_is_stream_activated = false; - return status; -} - /** Input stream **/ -InputVDeviceBaseStream::~InputVDeviceBaseStream() +VDeviceInputStreamBase::~VDeviceInputStreamBase() { // We want to stop the vdma channel before closing the stream in the firmware // because sending data to a closed stream may terminate the dma engine @@ -53,9 +38,10 @@ InputVDeviceBaseStream::~InputVDeviceBaseStream() } } -hailo_status InputVDeviceBaseStream::activate_stream(uint16_t dynamic_batch_size, bool resume_pending_stream_transfers) +hailo_status VDeviceInputStreamBase::activate_stream(uint16_t dynamic_batch_size, bool resume_pending_stream_transfers) { - for (auto &stream : m_streams) { + for (const auto &pair : m_streams) { + auto &stream = pair.second; auto status = stream.get().activate_stream(dynamic_batch_size, resume_pending_stream_transfers); if (HAILO_SUCCESS != status) { LOGGER__ERROR("Failed to activate input stream. (device: {})", stream.get().get_dev_id()); @@ -67,106 +53,118 @@ hailo_status InputVDeviceBaseStream::activate_stream(uint16_t dynamic_batch_size return HAILO_SUCCESS; } -hailo_status InputVDeviceBaseStream::sync_write_all_raw_buffer_no_transform_impl(void *buffer, size_t offset, size_t size) +hailo_status VDeviceInputStreamBase::deactivate_stream() { - ASSERT(NULL != buffer); - - return sync_write_raw_buffer(MemoryView(static_cast(buffer) + offset, size)).status(); + auto status = HAILO_SUCCESS; // Best effort + for (const auto &pair : m_streams) { + auto &stream = pair.second; + auto deactivate_status = stream.get().deactivate_stream(); + if (HAILO_SUCCESS != deactivate_status) { + LOGGER__ERROR("Failed to deactivate input stream. (status: {} device: {})", deactivate_status, stream.get().get_dev_id()); + status = deactivate_status; + } + } + m_is_stream_activated = false; + return status; } -hailo_status InputVDeviceBaseStream::send_pending_buffer(size_t device_index) +hailo_status VDeviceInputStreamBase::send_pending_buffer(const device_id_t &device_id) { assert(1 == m_streams.size()); - CHECK(0 == device_index, HAILO_INVALID_OPERATION); - VdmaInputStream &vdma_input = static_cast(m_streams[m_next_transfer_stream_index].get()); - return vdma_input.send_pending_buffer(); + auto &vdma_input = dynamic_cast(m_streams.at(m_next_transfer_stream).get()); + return vdma_input.send_pending_buffer(device_id); } -Expected InputVDeviceBaseStream::get_buffer_frames_size() const +Expected VDeviceInputStreamBase::get_buffer_frames_size() const { - size_t total_buffers_size = 0; - for (auto &stream : m_streams) { - auto stream_buffer_size = stream.get().get_buffer_frames_size(); - CHECK_EXPECTED(stream_buffer_size); - total_buffers_size += stream_buffer_size.value(); - } - - return total_buffers_size; + return m_streams.begin()->second.get().get_buffer_frames_size(); } -Expected InputVDeviceBaseStream::get_pending_frames_count() const +Expected VDeviceInputStreamBase::get_pending_frames_count() const { size_t total_pending_frames_count = 0; - for (auto &stream : m_streams) { + for (const auto &pair : m_streams) { + auto &stream = pair.second; auto stream_pending_frames_count = stream.get().get_pending_frames_count(); CHECK_EXPECTED(stream_pending_frames_count); total_pending_frames_count += stream_pending_frames_count.value(); } - return total_pending_frames_count; } -Expected> InputVDeviceBaseStream::create(std::vector> &&low_level_streams, - const LayerInfo &edge_layer, const scheduler_core_op_handle_t &core_op_handle, - EventPtr core_op_activated_event, CoreOpsSchedulerWeakPtr core_ops_scheduler) +Expected> VDeviceInputStreamBase::create( + std::map> &&low_level_streams, + const hailo_stream_parameters_t &stream_params, const LayerInfo &edge_layer, + const scheduler_core_op_handle_t &core_op_handle, EventPtr core_op_activated_event, + CoreOpsSchedulerWeakPtr core_ops_scheduler) { assert(0 < low_level_streams.size()); - auto status = HAILO_UNINITIALIZED; - - std::unique_ptr local_vdevice_stream; if (core_ops_scheduler.lock()) { - if (1 < low_level_streams.size()) { - auto buffer_frame_size = low_level_streams[0].get().get_buffer_frames_size(); - CHECK_EXPECTED(buffer_frame_size); - auto frame_size = low_level_streams[0].get().get_frame_size(); - auto buffers_queue_ptr = BuffersQueue::create_unique(frame_size, (low_level_streams.size() * buffer_frame_size.value())); - CHECK_EXPECTED(buffers_queue_ptr); - - local_vdevice_stream = make_unique_nothrow(std::move(low_level_streams), + if ((stream_params.flags & HAILO_STREAM_FLAGS_ASYNC) != 0) { + auto stream = ScheduledAsyncInputStream::create(std::move(low_level_streams), core_op_handle, std::move(core_op_activated_event), edge_layer, - core_ops_scheduler, buffers_queue_ptr.release(), status); + core_ops_scheduler); + CHECK_EXPECTED(stream); + return std::unique_ptr(stream.release()); } else { - local_vdevice_stream = make_unique_nothrow(std::move(low_level_streams), - core_op_handle, std::move(core_op_activated_event), edge_layer, - core_ops_scheduler, status); + if (1 < low_level_streams.size()) { + auto stream = MultiDeviceScheduledInputStream::create(std::move(low_level_streams), + core_op_handle, std::move(core_op_activated_event), edge_layer, + core_ops_scheduler); + CHECK_EXPECTED(stream); + return std::unique_ptr(stream.release()); + } else { + auto stream = ScheduledInputStream::create(std::move(low_level_streams), + core_op_handle, std::move(core_op_activated_event), edge_layer, + core_ops_scheduler); + CHECK_EXPECTED(stream); + return std::unique_ptr(stream.release()); + } } } else { - local_vdevice_stream = make_unique_nothrow(std::move(low_level_streams), - std::move(core_op_activated_event), edge_layer,status); - } - - CHECK_AS_EXPECTED((nullptr != local_vdevice_stream), HAILO_OUT_OF_HOST_MEMORY); - CHECK_SUCCESS_AS_EXPECTED(status); + if ((stream_params.flags & HAILO_STREAM_FLAGS_ASYNC) != 0) { + auto stream = VDeviceNativeAsyncInputStream::create(std::move(low_level_streams), + std::move(core_op_activated_event), edge_layer); + CHECK_EXPECTED(stream); + return std::unique_ptr(stream.release()); + } else { + auto stream = VDeviceNativeInputStream::create(std::move(low_level_streams), + std::move(core_op_activated_event), edge_layer); + CHECK_EXPECTED(stream); + return std::unique_ptr(stream.release()); + } - return local_vdevice_stream; + } } -hailo_status InputVDeviceBaseStream::set_timeout(std::chrono::milliseconds timeout) +hailo_status VDeviceInputStreamBase::set_timeout(std::chrono::milliseconds timeout) { - for (auto &stream : m_streams) { + for (const auto &pair : m_streams) { + auto &stream = pair.second; auto status = stream.get().set_timeout(timeout); CHECK_SUCCESS(status, "Failed to set timeout to input stream. (device: {})", stream.get().get_dev_id()); } return HAILO_SUCCESS; } -std::chrono::milliseconds InputVDeviceBaseStream::get_timeout() const +std::chrono::milliseconds VDeviceInputStreamBase::get_timeout() const { // All timeout values of m_streams should be the same - return m_streams[0].get().get_timeout(); + return m_streams.begin()->second.get().get_timeout(); } -hailo_stream_interface_t InputVDeviceBaseStream::get_interface() const +hailo_stream_interface_t VDeviceInputStreamBase::get_interface() const { // All interface values of m_streams should be the same - return m_streams[0].get().get_interface(); + return m_streams.begin()->second.get().get_interface(); } -hailo_status InputVDeviceBaseStream::flush() +hailo_status VDeviceInputStreamBase::flush() { auto status = HAILO_SUCCESS; // Best effort - for (auto &stream : m_streams) { + for (const auto &pair : m_streams) { + auto &stream = pair.second; auto flush_status = stream.get().flush(); if (HAILO_SUCCESS != status) { LOGGER__ERROR("Failed to flush input stream. (status: {} device: {})", status, stream.get().get_dev_id()); @@ -176,152 +174,17 @@ hailo_status InputVDeviceBaseStream::flush() return status; } -Expected ScheduledInputStream::sync_write_raw_buffer(const MemoryView &buffer, const std::function &should_cancel) -{ - return sync_write_raw_buffer_impl(buffer, m_core_op_handle, should_cancel); -} - -Expected InputVDeviceNativeStream::sync_write_raw_buffer(const MemoryView &buffer, const std::function &should_cancel) -{ - if (should_cancel()) { - return make_unexpected(HAILO_STREAM_ABORTED_BY_USER); - } - - auto expected_written_bytes = m_streams[m_next_transfer_stream_index].get().sync_write_raw_buffer(buffer); - if (HAILO_SUCCESS != expected_written_bytes.status()) { - LOGGER__INFO("Write to stream has failed! status = {}", expected_written_bytes.status()); - return make_unexpected(expected_written_bytes.status()); - } - auto written_bytes = expected_written_bytes.value(); - - // Update m_next_transfer_stream_index only if 'batch' frames has been transferred - if (0 == (++m_acc_frames % m_streams[0].get().get_dynamic_batch_size())) { - m_next_transfer_stream_index = static_cast((m_next_transfer_stream_index + 1) % m_streams.size()); - m_acc_frames = 0; - } - return written_bytes; -} - -Expected ScheduledInputStream::sync_write_raw_buffer_impl(const MemoryView &buffer, scheduler_core_op_handle_t core_op_handle, - const std::function &should_cancel) -{ - auto core_ops_scheduler = m_core_ops_scheduler.lock(); - CHECK_AS_EXPECTED(core_ops_scheduler, HAILO_INTERNAL_FAILURE); - - auto status = core_ops_scheduler->wait_for_write(core_op_handle, name(), get_timeout(), should_cancel); - if (HAILO_STREAM_ABORTED_BY_USER == status) { - LOGGER__INFO("Write to stream was aborted."); - return make_unexpected(status); - } - CHECK_SUCCESS_AS_EXPECTED(status); - - TRACE(WriteFrameTrace, "", core_op_handle, m_stream_info.name); - - assert(1 == m_streams.size()); - status = m_streams[0].get().write_buffer_only(buffer, should_cancel); - - auto write_finish_status = core_ops_scheduler->signal_write_finish(core_op_handle, name(), status != HAILO_SUCCESS); - if (HAILO_SUCCESS != status) { - LOGGER__INFO("Write to stream has failed! status = {}", status); - return make_unexpected(status); - } - - if (HAILO_STREAM_ABORTED_BY_USER == write_finish_status) { - return make_unexpected(write_finish_status); - } - CHECK_SUCCESS_AS_EXPECTED(write_finish_status); - - auto written_bytes = buffer.size(); - return written_bytes; -} - -hailo_status ScheduledInputStream::abort() -{ - return abort_impl(m_core_op_handle); -} - -hailo_status InputVDeviceNativeStream::abort() -{ - auto status = HAILO_SUCCESS; // Best effort - for (auto &stream : m_streams) { - auto abort_status = stream.get().abort(); - if (HAILO_SUCCESS != status) { - LOGGER__ERROR("Failed to abort input stream. (status: {} device: {})", status, stream.get().get_dev_id()); - status = abort_status; - } - } - - return status; -} - -hailo_status ScheduledInputStream::abort_impl(scheduler_core_op_handle_t core_op_handle) -{ - auto status = HAILO_SUCCESS; // Best effort - assert(1 == m_streams.size()); - auto abort_status = m_streams[0].get().abort(); - if (HAILO_SUCCESS != status) { - LOGGER__ERROR("Failed to abort input stream. (status: {} device: {})", status, m_streams[0].get().get_dev_id()); - status = abort_status; - } - - auto core_ops_scheduler = m_core_ops_scheduler.lock(); - CHECK(core_ops_scheduler, HAILO_INTERNAL_FAILURE); - - auto disable_status = core_ops_scheduler->disable_stream(core_op_handle, name()); - if (HAILO_SUCCESS != disable_status) { - LOGGER__ERROR("Failed to disable stream in the core-op scheduler. (status: {})", disable_status); - status = disable_status; - } - - return status; -} - -hailo_status ScheduledInputStream::clear_abort() +hailo_status VDeviceInputStreamBase::write_impl(const MemoryView &buffer) { - return clear_abort_impl(m_core_op_handle); -} - -hailo_status InputVDeviceNativeStream::clear_abort() -{ - auto status = HAILO_SUCCESS; // Best effort - for (auto &stream : m_streams) { - auto clear_abort_status = stream.get().clear_abort(); - if ((HAILO_SUCCESS != clear_abort_status) && (HAILO_STREAM_NOT_ACTIVATED != clear_abort_status)) { - LOGGER__ERROR("Failed to clear abort input stream. (status: {} device: {})", clear_abort_status, stream.get().get_dev_id()); - status = clear_abort_status; - } - } - - return status; -} - -hailo_status ScheduledInputStream::clear_abort_impl(scheduler_core_op_handle_t core_op_handle) -{ - auto status = HAILO_SUCCESS; // Best effort - assert(1 == m_streams.size()); - auto clear_abort_status = m_streams[0].get().clear_abort(); - if ((HAILO_SUCCESS != clear_abort_status) && (HAILO_STREAM_NOT_ACTIVATED != clear_abort_status)) { - LOGGER__ERROR("Failed to clear abort input stream. (status: {} device: {})", clear_abort_status, m_streams[0].get().get_dev_id()); - status = clear_abort_status; - } - - auto core_ops_scheduler = m_core_ops_scheduler.lock(); - CHECK(core_ops_scheduler, HAILO_INTERNAL_FAILURE); - - auto enable_status = core_ops_scheduler->enable_stream(core_op_handle, name()); - if (HAILO_SUCCESS != enable_status) { - LOGGER__ERROR("Failed to enable stream in the core-op scheduler. (status: {})", enable_status); - status = enable_status; - } - - return status; + return write_impl(buffer, []() { return false; }); } /** Output stream **/ -hailo_status OutputVDeviceBaseStream::deactivate_stream() +hailo_status VDeviceOutputStreamBase::deactivate_stream() { auto status = HAILO_SUCCESS; // Best effort - for (auto &stream : m_streams) { + for (const auto &pair : m_streams) { + auto &stream = pair.second; auto deactivate_status = stream.get().deactivate_stream(); if (HAILO_SUCCESS != status) { LOGGER__ERROR("Failed to deactivate output stream. (status: {} device: {})", status, stream.get().get_dev_id()); @@ -332,7 +195,7 @@ hailo_status OutputVDeviceBaseStream::deactivate_stream() return status; } -OutputVDeviceBaseStream::~OutputVDeviceBaseStream() +VDeviceOutputStreamBase::~VDeviceOutputStreamBase() { // We want to stop the vdma channel before closing the stream in the firmware // because sending data to a closed stream may terminate the dma engine @@ -341,9 +204,10 @@ OutputVDeviceBaseStream::~OutputVDeviceBaseStream() } } -hailo_status OutputVDeviceBaseStream::activate_stream(uint16_t dynamic_batch_size, bool resume_pending_stream_transfers) +hailo_status VDeviceOutputStreamBase::activate_stream(uint16_t dynamic_batch_size, bool resume_pending_stream_transfers) { - for (auto &stream : m_streams) { + for (const auto &pair : m_streams) { + auto &stream = pair.second; auto status = stream.get().activate_stream(dynamic_batch_size, resume_pending_stream_transfers); if (HAILO_SUCCESS != status) { LOGGER__ERROR("Failed to activate output stream. (device: {})", stream.get().get_dev_id()); @@ -355,222 +219,84 @@ hailo_status OutputVDeviceBaseStream::activate_stream(uint16_t dynamic_batch_siz return HAILO_SUCCESS; } -hailo_status OutputVDeviceBaseStream::read_all(MemoryView &/*buffer*/) +hailo_status VDeviceOutputStreamBase::read_impl(MemoryView &/*buffer*/) { - LOGGER__ERROR("read_all should not be called in vdevice flow"); + LOGGER__ERROR("read_impl should not be called in vdevice flow"); return HAILO_INTERNAL_FAILURE; } -Expected OutputVDeviceBaseStream::sync_read_raw_buffer(MemoryView &/*buffer*/) -{ - LOGGER__ERROR("sync_read_raw_buffer should not be called in vdevice flow"); - return make_unexpected(HAILO_INTERNAL_FAILURE); -} - -hailo_status ScheduledOutputStream::read(MemoryView buffer) -{ - return read_impl(buffer, m_core_op_handle); -} - -hailo_status OutputVDeviceNativeStream::read(MemoryView buffer) -{ - auto status = m_streams[m_next_transfer_stream_index].get().read(buffer); - if (HAILO_SUCCESS != status) { - LOGGER__INFO("Read from stream has failed! status = {}", status); - return status; - } - - // Update m_next_transfer_stream_index only if 'batch' frames has been transferred - if (0 == (++m_acc_frames % m_streams[0].get().get_dynamic_batch_size())) { - m_next_transfer_stream_index = static_cast((m_next_transfer_stream_index + 1) % m_streams.size()); - m_acc_frames = 0; - } - - return HAILO_SUCCESS; -} - -hailo_status ScheduledOutputStream::read_impl(MemoryView buffer, scheduler_core_op_handle_t core_op_handle) -{ - auto core_ops_scheduler = m_core_ops_scheduler.lock(); - CHECK(core_ops_scheduler, HAILO_INTERNAL_FAILURE); - - auto device_id = core_ops_scheduler->wait_for_read(core_op_handle, name(), get_timeout()); - if (HAILO_STREAM_ABORTED_BY_USER == device_id.status()) { - LOGGER__INFO("Read from stream was aborted."); - return device_id.status(); - } - CHECK_EXPECTED_AS_STATUS(device_id); - - TRACE(ReadFrameTrace, "", core_op_handle, m_stream_info.name); - auto status = m_streams[device_id.value()].get().read(buffer); - if (HAILO_SUCCESS != status) { - LOGGER__INFO("Read from stream has failed! status = {}", status); - return status; - } - - status = core_ops_scheduler->signal_read_finish(core_op_handle, name(), device_id.value()); - if (HAILO_STREAM_ABORTED_BY_USER == status) { - return status; - } - CHECK_SUCCESS(status); - - return HAILO_SUCCESS; -} - -Expected> OutputVDeviceBaseStream::create(std::vector> &&low_level_streams, - const LayerInfo &edge_layer, const scheduler_core_op_handle_t &core_op_handle, EventPtr core_op_activated_event, +Expected> VDeviceOutputStreamBase::create( + std::map> &&low_level_streams, + const hailo_stream_parameters_t &stream_params, const LayerInfo &edge_layer, + const scheduler_core_op_handle_t &core_op_handle, EventPtr core_op_activated_event, CoreOpsSchedulerWeakPtr core_ops_scheduler) { assert(0 < low_level_streams.size()); - auto status = HAILO_UNINITIALIZED; - - std::unique_ptr local_vdevice_stream; + if (core_ops_scheduler.lock()) { - local_vdevice_stream = make_unique_nothrow(std::move(low_level_streams), core_op_handle, - edge_layer, std::move(core_op_activated_event), core_ops_scheduler, status); + if ((stream_params.flags & HAILO_STREAM_FLAGS_ASYNC) != 0) { + LOGGER__ERROR("Async output streams are not supported with scheduler"); + return make_unexpected(HAILO_NOT_IMPLEMENTED); + } else { + auto stream = ScheduledOutputStream::create(std::move(low_level_streams), core_op_handle, + edge_layer, std::move(core_op_activated_event), core_ops_scheduler); + CHECK_EXPECTED(stream); + return std::unique_ptr(stream.release()); + } } else { - local_vdevice_stream = make_unique_nothrow(std::move(low_level_streams), edge_layer, - std::move(core_op_activated_event), status); + if ((stream_params.flags & HAILO_STREAM_FLAGS_ASYNC) != 0) { + auto stream = VDeviceNativeAsyncOutputStream::create(std::move(low_level_streams), + std::move(core_op_activated_event), edge_layer); + CHECK_EXPECTED(stream); + return std::unique_ptr(stream.release()); + } else { + auto stream = VDeviceNativeOutputStream::create(std::move(low_level_streams), + std::move(core_op_activated_event), edge_layer); + CHECK_EXPECTED(stream); + return std::unique_ptr(stream.release()); + } } - - CHECK_AS_EXPECTED((nullptr != local_vdevice_stream), HAILO_OUT_OF_HOST_MEMORY); - CHECK_SUCCESS_AS_EXPECTED(status); - - return local_vdevice_stream; } -hailo_status OutputVDeviceBaseStream::set_timeout(std::chrono::milliseconds timeout) +hailo_status VDeviceOutputStreamBase::set_timeout(std::chrono::milliseconds timeout) { - for (auto &stream : m_streams) { + for (const auto &pair : m_streams) { + auto &stream = pair.second; auto status = stream.get().set_timeout(timeout); CHECK_SUCCESS(status, "Failed to set timeout to output stream. (device: {})", stream.get().get_dev_id()); } return HAILO_SUCCESS; } -std::chrono::milliseconds OutputVDeviceBaseStream::get_timeout() const +std::chrono::milliseconds VDeviceOutputStreamBase::get_timeout() const { // All timeout values of m_streams should be the same - return m_streams[0].get().get_timeout(); + return m_streams.begin()->second.get().get_timeout(); } -hailo_stream_interface_t OutputVDeviceBaseStream::get_interface() const +hailo_stream_interface_t VDeviceOutputStreamBase::get_interface() const { // All interface values of m_streams should be the same - return m_streams[0].get().get_interface(); + return m_streams.begin()->second.get().get_interface(); } -hailo_status ScheduledOutputStream::abort() +Expected VDeviceOutputStreamBase::get_buffer_frames_size() const { - return abort_impl(m_core_op_handle); + return m_streams.begin()->second.get().get_buffer_frames_size(); } -hailo_status OutputVDeviceNativeStream::abort() -{ - auto status = HAILO_SUCCESS; // Best effort - for (auto &stream : m_streams) { - auto abort_status = stream.get().abort(); - if (HAILO_SUCCESS != status) { - LOGGER__ERROR("Failed to abort output stream. (status: {} device: {})", status, stream.get().get_dev_id()); - status = abort_status; - } - } - - return status; -} - -hailo_status ScheduledOutputStream::abort_impl(scheduler_core_op_handle_t core_op_handle) -{ - auto status = HAILO_SUCCESS; // Best effort - for (auto& stream : m_streams) { - auto abort_status = stream.get().abort(); - if (HAILO_SUCCESS != status) { - LOGGER__ERROR("Failed to abort output stream. (status: {} device: {})", status, stream.get().get_dev_id()); - status = abort_status; - } - } - - auto core_ops_scheduler = m_core_ops_scheduler.lock(); - CHECK(core_ops_scheduler, HAILO_INTERNAL_FAILURE); - - auto disable_status = core_ops_scheduler->disable_stream(core_op_handle, name()); - if (HAILO_SUCCESS != disable_status) { - LOGGER__ERROR("Failed to disable stream in the core-op scheduler. (status: {})", disable_status); - status = disable_status; - } - - return status; -} - -hailo_status ScheduledOutputStream::clear_abort() -{ - return clear_abort_impl(m_core_op_handle); -} - -hailo_status OutputVDeviceNativeStream::clear_abort() -{ - auto status = HAILO_SUCCESS; // Best effort - for (auto &stream : m_streams) { - auto clear_abort_status = stream.get().clear_abort(); - if ((HAILO_SUCCESS != clear_abort_status) && (HAILO_STREAM_NOT_ACTIVATED != clear_abort_status)) { - LOGGER__ERROR("Failed to clear abort output stream. (status: {} device: {})", clear_abort_status, stream.get().get_dev_id()); - status = clear_abort_status; - } - } - - return status; -} - -hailo_status ScheduledOutputStream::clear_abort_impl(scheduler_core_op_handle_t core_op_handle) -{ - auto status = HAILO_SUCCESS; // Best effort - for (auto& stream : m_streams) { - auto clear_abort_status = stream.get().clear_abort(); - if ((HAILO_SUCCESS != clear_abort_status) && (HAILO_STREAM_NOT_ACTIVATED != clear_abort_status)) { - LOGGER__ERROR("Failed to clear abort output stream. (status: {} device: {})", clear_abort_status, stream.get().get_dev_id()); - status = clear_abort_status; - } - } - - auto core_ops_scheduler = m_core_ops_scheduler.lock(); - CHECK(core_ops_scheduler, HAILO_INTERNAL_FAILURE); - - auto enable_status = core_ops_scheduler->enable_stream(core_op_handle, name()); - if (HAILO_SUCCESS != enable_status) { - LOGGER__ERROR("Failed to enable stream in the core-op scheduler. (status: {})", enable_status); - status = enable_status; - } - - return status; -} - -Expected OutputVDeviceBaseStream::get_buffer_frames_size() const -{ - size_t total_buffers_size = 0; - for (auto &stream : m_streams) { - auto stream_buffer_size = stream.get().get_buffer_frames_size(); - if (HAILO_NOT_AVAILABLE == stream_buffer_size.status()) { - return make_unexpected(HAILO_NOT_AVAILABLE); - } - CHECK_EXPECTED(stream_buffer_size); - total_buffers_size += stream_buffer_size.value(); - } - - return total_buffers_size; -} - -Expected OutputVDeviceBaseStream::get_pending_frames_count() const +Expected VDeviceOutputStreamBase::get_pending_frames_count() const { size_t total_pending_frames_count = 0; - for (auto &stream : m_streams) { + for (const auto &pair : m_streams) { + auto &stream = pair.second; auto stream_pending_frames_count = stream.get().get_pending_frames_count(); if (HAILO_NOT_AVAILABLE == stream_pending_frames_count.status()) { return make_unexpected(HAILO_NOT_AVAILABLE); } - CHECK_EXPECTED(stream_pending_frames_count); - total_pending_frames_count += stream_pending_frames_count.value(); + CHECK_EXPECTED(stream_pending_frames_count); + total_pending_frames_count += stream_pending_frames_count.value(); } - return total_pending_frames_count; } diff --git a/hailort/libhailort/src/vdevice/vdevice_stream.hpp b/hailort/libhailort/src/vdevice/vdevice_stream.hpp index e1aa294..c5cb88a 100644 --- a/hailort/libhailort/src/vdevice/vdevice_stream.hpp +++ b/hailort/libhailort/src/vdevice/vdevice_stream.hpp @@ -6,17 +6,25 @@ * @file vdevice_stream.hpp * @brief Internal stream implementation for VDevice * - * InputStream (External "interface") - * |-- InputStreamBase (Base class) - * |-- InputVDeviceBaseStream (Base class for vdevice streams) - * | |-- InputVDeviceNativeStream - * | |-- ScheduledInputStream + * InputStream (External "interface") + * |-- InputStreamBase (Base class) + * |-- VDeviceInputStreamBase (Base class for vdevice streams) + * | |-- VDeviceNativeInputStreamBase + * | | |-- VDeviceNativeInputStream (Sync api) + * | | |-- VDeviceNativeAsyncInputStream (Async api) + * | |-- ScheduledInputStreamBase + * | | |-- ScheduledInputStream (Sync api) + * | | |-- ScheduledAsyncInputStream (Async api) * - * OutputStream (External "interface") - * |-- OutputStreamBase (Base class) - * |-- OutputVDeviceBaseStream (Base class for vdevice streams) - * | |-- OutputVDeviceNativeStream - * | |-- ScheduledOutputStream + * OutputStream (External "interface") + * |-- OutputStreamBase (Base class) + * |-- VDeviceOutputStreamBase (Base class for vdevice streams) + * | |-- VDeviceNativeOutputStreamBase + * | | |-- VDeviceNativeOutputStream (Sync api) + * | | |-- VDeviceNativeAsyncOutputStream (Async api) + * | |-- ScheduledOutputStreamBase + * | | |-- ScheduledOutputStream (Sync api) + * | | |-- ScheduledAsyncOutputStream (Async api) **/ #ifndef HAILO_VDEVICE_STREAM_HPP_ @@ -34,14 +42,16 @@ namespace hailort { -class InputVDeviceBaseStream : public InputStreamBase { +class VDeviceInputStreamBase : public InputStreamBase { public: - static Expected> create(std::vector> &&low_level_streams, - const LayerInfo &edge_layer, const scheduler_core_op_handle_t &core_op_handle, - EventPtr core_op_activated_event, CoreOpsSchedulerWeakPtr core_ops_scheduler); + static Expected> create( + std::map> &&low_level_streams, + const hailo_stream_parameters_t &stream_params, const LayerInfo &edge_layer, + const scheduler_core_op_handle_t &core_op_handle, EventPtr core_op_activated_event, + CoreOpsSchedulerWeakPtr core_ops_scheduler); - virtual ~InputVDeviceBaseStream(); + virtual ~VDeviceInputStreamBase(); virtual hailo_status activate_stream(uint16_t dynamic_batch_size, bool resume_pending_stream_transfers) override; virtual hailo_status deactivate_stream() override; @@ -49,21 +59,13 @@ public: virtual std::chrono::milliseconds get_timeout() const override; virtual hailo_status set_timeout(std::chrono::milliseconds timeout) override; - virtual hailo_status send_pending_buffer(size_t device_index = 0) override; + virtual hailo_status send_pending_buffer(const device_id_t &device_id) override; virtual Expected get_buffer_frames_size() const override; virtual Expected get_pending_frames_count() const override; virtual bool is_scheduled() override = 0; virtual hailo_status abort() override = 0; virtual hailo_status clear_abort() override = 0; - - virtual hailo_status register_interrupt_callback(const vdma::ProcessingCompleteCallback &callback) override - { - for (auto &stream : m_streams) { - auto status = stream.get().register_interrupt_callback(callback); - CHECK_SUCCESS(status); - } - return HAILO_SUCCESS; - } + virtual hailo_status flush() override; virtual void notify_all() { @@ -72,43 +74,39 @@ public: } protected: - virtual hailo_status sync_write_all_raw_buffer_no_transform_impl(void *buffer, size_t offset, size_t size) override; - virtual Expected sync_write_raw_buffer(const MemoryView &buffer) override - { - return sync_write_raw_buffer(buffer, []() { return false; }); - } - virtual Expected sync_write_raw_buffer(const MemoryView &buffer, const std::function &should_cancel) = 0; + virtual hailo_status write_impl(const MemoryView &buffer) final override; + virtual hailo_status write_impl(const MemoryView &buffer, const std::function &should_cancel) = 0; - explicit InputVDeviceBaseStream( - std::vector> &&streams, + VDeviceInputStreamBase( + std::map> &&streams, EventPtr &&core_op_activated_event, const LayerInfo &layer_info, hailo_status &status) : - InputStreamBase(layer_info, streams[0].get().get_interface(), std::move(core_op_activated_event), status), + InputStreamBase(layer_info, streams.begin()->second.get().get_interface(), std::move(core_op_activated_event), status), m_streams(std::move(streams)), m_is_stream_activated(false), - m_next_transfer_stream_index(0), + m_next_transfer_stream(m_streams.begin()->first), m_acc_frames(0) {} - std::vector> m_streams; + std::map> m_streams; bool m_is_stream_activated; - uint32_t m_next_transfer_stream_index; + device_id_t m_next_transfer_stream; uint32_t m_acc_frames; private: friend class VDeviceInputStreamMultiplexerWrapper; - - virtual hailo_status flush() override; }; -class OutputVDeviceBaseStream : public OutputStreamBase { +class VDeviceOutputStreamBase : public OutputStreamBase { public: - virtual ~OutputVDeviceBaseStream(); + virtual ~VDeviceOutputStreamBase(); - static Expected> create(std::vector> &&low_level_streams, - const LayerInfo &edge_layer, const scheduler_core_op_handle_t &core_op_handle, - EventPtr core_op_activated_event, CoreOpsSchedulerWeakPtr core_ops_scheduler); + static Expected> create( + std::map> &&low_level_streams, + const hailo_stream_parameters_t &stream_params, const LayerInfo &edge_layer, + const scheduler_core_op_handle_t &core_op_handle, EventPtr core_op_activated_event, + CoreOpsSchedulerWeakPtr core_ops_scheduler); virtual hailo_status activate_stream(uint16_t dynamic_batch_size, bool resume_pending_stream_transfers) override; virtual hailo_status deactivate_stream() override; @@ -116,40 +114,30 @@ public: virtual std::chrono::milliseconds get_timeout() const override; virtual hailo_status set_timeout(std::chrono::milliseconds timeout) override; virtual Expected get_buffer_frames_size() const override; - virtual Expected get_pending_frames_count() const override; + virtual Expected get_pending_frames_count() const override; // Returns the accumulated pending frames virtual hailo_status abort() override = 0; virtual hailo_status clear_abort() override = 0; virtual bool is_scheduled() override = 0; - virtual hailo_status register_interrupt_callback(const vdma::ProcessingCompleteCallback &callback) override - { - for (auto &stream : m_streams) { - auto status = stream.get().register_interrupt_callback(callback); - CHECK_SUCCESS(status); - } - return HAILO_SUCCESS; - } - protected: - virtual Expected sync_read_raw_buffer(MemoryView &buffer) override; - - explicit OutputVDeviceBaseStream( - std::vector> &&streams, + VDeviceOutputStreamBase( + std::map> &&streams, const LayerInfo &layer_info, EventPtr &&core_op_activated_event, hailo_status &status) : - OutputStreamBase(layer_info, std::move(core_op_activated_event), status), + OutputStreamBase(layer_info, streams.begin()->second.get().get_interface(), + std::move(core_op_activated_event), status), m_streams(std::move(streams)), m_is_stream_activated(false), - m_next_transfer_stream_index(0), + m_next_transfer_stream(m_streams.begin()->first), m_acc_frames(0) {} - virtual hailo_status read_all(MemoryView &buffer) override; + virtual hailo_status read_impl(MemoryView &buffer) override final; - std::vector> m_streams; + std::map> m_streams; bool m_is_stream_activated; - uint32_t m_next_transfer_stream_index; + device_id_t m_next_transfer_stream; uint32_t m_acc_frames; private: diff --git a/hailort/libhailort/src/vdevice/vdevice_stream_multiplexer_wrapper.cpp b/hailort/libhailort/src/vdevice/vdevice_stream_multiplexer_wrapper.cpp index b9d9b00..1b7b0a1 100644 --- a/hailort/libhailort/src/vdevice/vdevice_stream_multiplexer_wrapper.cpp +++ b/hailort/libhailort/src/vdevice/vdevice_stream_multiplexer_wrapper.cpp @@ -40,19 +40,12 @@ hailo_status VDeviceInputStreamMultiplexerWrapper::abort() } *m_is_aborted = true; - if (is_scheduled()) { - auto status = m_multiplexer->disable_stream(m_core_op_multiplexer_handle, name()); - CHECK_SUCCESS(status); - - m_vdevice_input_stream->notify_all(); - - status = m_multiplexer->run_once_for_stream(name(), INPUT_RUN_ONCE_HANDLE__ABORT, m_core_op_multiplexer_handle); - CHECK_SUCCESS(status); + auto status = m_multiplexer->disable_stream(m_core_op_multiplexer_handle, name()); + CHECK_SUCCESS(status); - return HAILO_SUCCESS; - } + m_vdevice_input_stream->notify_all(); - auto status = m_vdevice_input_stream->abort(); + status = m_multiplexer->run_once_for_stream(name(), INPUT_RUN_ONCE_HANDLE__ABORT, m_core_op_multiplexer_handle); CHECK_SUCCESS(status); return HAILO_SUCCESS; @@ -65,32 +58,27 @@ hailo_status VDeviceInputStreamMultiplexerWrapper::clear_abort() } *m_is_aborted = false; - if (is_scheduled()) { - auto status = m_multiplexer->enable_stream(m_core_op_multiplexer_handle, name()); - CHECK_SUCCESS(status); - - status = m_multiplexer->run_once_for_stream(name(), INPUT_RUN_ONCE_HANDLE__CLEAR_ABORT, m_core_op_multiplexer_handle); - CHECK_SUCCESS(status); - - m_vdevice_input_stream->notify_all(); - - return HAILO_SUCCESS; - } + auto status = m_multiplexer->enable_stream(m_core_op_multiplexer_handle, name()); + CHECK_SUCCESS(status); - auto status = m_vdevice_input_stream->clear_abort(); + status = m_multiplexer->run_once_for_stream(name(), INPUT_RUN_ONCE_HANDLE__CLEAR_ABORT, m_core_op_multiplexer_handle); CHECK_SUCCESS(status); + m_vdevice_input_stream->notify_all(); + return HAILO_SUCCESS; } bool VDeviceInputStreamMultiplexerWrapper::is_scheduled() { - return m_vdevice_input_stream->is_scheduled(); + // Multiplexer can only work with scheduler + assert(m_vdevice_input_stream->is_scheduled()); + return true; } -hailo_status VDeviceInputStreamMultiplexerWrapper::send_pending_buffer(size_t device_index) +hailo_status VDeviceInputStreamMultiplexerWrapper::send_pending_buffer(const device_id_t &device_id) { - return m_vdevice_input_stream->send_pending_buffer(device_index); + return m_vdevice_input_stream->send_pending_buffer(device_id); } Expected VDeviceInputStreamMultiplexerWrapper::get_buffer_frames_size() const @@ -103,34 +91,23 @@ Expected VDeviceInputStreamMultiplexerWrapper::get_pending_frames_count( return m_vdevice_input_stream->get_pending_frames_count(); } -Expected VDeviceInputStreamMultiplexerWrapper::sync_write_raw_buffer(const MemoryView &buffer) +hailo_status VDeviceInputStreamMultiplexerWrapper::write_impl(const MemoryView &buffer) { - if (is_scheduled()) { - auto status = m_multiplexer->wait_for_write(m_core_op_multiplexer_handle); - if (HAILO_STREAM_ABORTED_BY_USER == status) { - return make_unexpected(status); - } - CHECK_SUCCESS_AS_EXPECTED(status); + auto status = m_multiplexer->wait_for_write(m_core_op_multiplexer_handle); + if (HAILO_STREAM_ABORTED_BY_USER == status) { + return status; } + CHECK_SUCCESS(status); - auto exp = m_vdevice_input_stream->sync_write_raw_buffer(buffer, [this]() { return m_is_aborted->load(); }); - if (is_scheduled()) { - auto status = m_multiplexer->signal_write_finish(m_core_op_multiplexer_handle, exp.status() != HAILO_SUCCESS); - CHECK_SUCCESS_AS_EXPECTED(status); - } - if (HAILO_STREAM_ABORTED_BY_USER == exp.status()) { - return make_unexpected(exp.status()); + auto write_status = m_vdevice_input_stream->write_impl(buffer, [this]() { return m_is_aborted->load(); }); + status = m_multiplexer->signal_write_finish(m_core_op_multiplexer_handle, write_status != HAILO_SUCCESS); + CHECK_SUCCESS(status); + if (HAILO_STREAM_ABORTED_BY_USER == write_status) { + return write_status; } - CHECK_EXPECTED(exp); - - return exp; -} - -hailo_status VDeviceInputStreamMultiplexerWrapper::sync_write_all_raw_buffer_no_transform_impl(void *buffer, size_t offset, size_t size) -{ - ASSERT(NULL != buffer); + CHECK_SUCCESS(write_status); - return sync_write_raw_buffer(MemoryView(static_cast(buffer) + offset, size)).status(); + return HAILO_SUCCESS; } hailo_status VDeviceInputStreamMultiplexerWrapper::set_timeout(std::chrono::milliseconds timeout) @@ -140,20 +117,14 @@ hailo_status VDeviceInputStreamMultiplexerWrapper::set_timeout(std::chrono::mill hailo_status VDeviceInputStreamMultiplexerWrapper::flush() { - if (is_scheduled()) { - auto status = m_multiplexer->run_once_for_stream(name(), INPUT_RUN_ONCE_HANDLE__FLUSH, m_core_op_multiplexer_handle); - CHECK_SUCCESS(status); - - return HAILO_SUCCESS; - } - - return m_vdevice_input_stream->flush(); + return m_multiplexer->run_once_for_stream(name(), INPUT_RUN_ONCE_HANDLE__FLUSH, m_core_op_multiplexer_handle); } -Expected> VDeviceInputStreamMultiplexerWrapper::create(std::shared_ptr vdevice_input_stream, +Expected> VDeviceInputStreamMultiplexerWrapper::create(std::shared_ptr vdevice_input_stream, std::string network_name, std::shared_ptr multiplexer, scheduler_core_op_handle_t core_ops_scheduler_handle, multiplexer_core_op_handle_t core_op_multiplexer_handle) { + assert(vdevice_input_stream->is_scheduled()); hailo_status status = HAILO_UNINITIALIZED; std::unique_ptr wrapper(new (std::nothrow) VDeviceInputStreamMultiplexerWrapper(vdevice_input_stream, network_name, multiplexer, core_ops_scheduler_handle, core_op_multiplexer_handle, status)); @@ -171,7 +142,7 @@ Expected> VDeviceInputStre return wrapper; } -VDeviceInputStreamMultiplexerWrapper::VDeviceInputStreamMultiplexerWrapper(std::shared_ptr &vdevice_input_stream, +VDeviceInputStreamMultiplexerWrapper::VDeviceInputStreamMultiplexerWrapper(std::shared_ptr &vdevice_input_stream, std::string network_name, std::shared_ptr multiplexer, scheduler_core_op_handle_t core_ops_scheduler_handle, multiplexer_core_op_handle_t core_op_multiplexer_handle, hailo_status &status) : InputStreamBase(vdevice_input_stream->get_info(), @@ -247,6 +218,11 @@ std::chrono::milliseconds VDeviceOutputStreamMultiplexerWrapper::get_timeout() c return m_vdevice_output_stream->get_timeout(); } +hailo_status VDeviceOutputStreamMultiplexerWrapper::set_next_device_to_read(const device_id_t &device_id) +{ + return m_vdevice_output_stream->set_next_device_to_read(device_id); +} + hailo_status VDeviceOutputStreamMultiplexerWrapper::abort() { if (*m_is_aborted) { @@ -254,17 +230,10 @@ hailo_status VDeviceOutputStreamMultiplexerWrapper::abort() } *m_is_aborted = true; - if (is_scheduled()) { - auto status = m_multiplexer->disable_stream(m_core_op_multiplexer_handle, name()); - CHECK_SUCCESS(status); - - status = m_multiplexer->run_once_for_stream(name(), OUTPUT_RUN_ONCE_HANDLE__ABORT, m_core_op_multiplexer_handle); - CHECK_SUCCESS(status); - - return HAILO_SUCCESS; - } + auto status = m_multiplexer->disable_stream(m_core_op_multiplexer_handle, name()); + CHECK_SUCCESS(status); - auto status = m_vdevice_output_stream->abort(); + status = m_multiplexer->run_once_for_stream(name(), OUTPUT_RUN_ONCE_HANDLE__ABORT, m_core_op_multiplexer_handle); CHECK_SUCCESS(status); return HAILO_SUCCESS; @@ -277,17 +246,10 @@ hailo_status VDeviceOutputStreamMultiplexerWrapper::clear_abort() } *m_is_aborted = false; - if (is_scheduled()) { - auto status = m_multiplexer->enable_stream(m_core_op_multiplexer_handle, name()); - CHECK_SUCCESS(status); - - status = m_multiplexer->run_once_for_stream(name(), OUTPUT_RUN_ONCE_HANDLE__CLEAR_ABORT, m_core_op_multiplexer_handle); - CHECK_SUCCESS(status); - - return HAILO_SUCCESS; - } + auto status = m_multiplexer->enable_stream(m_core_op_multiplexer_handle, name()); + CHECK_SUCCESS(status); - auto status = m_vdevice_output_stream->clear_abort(); + status = m_multiplexer->run_once_for_stream(name(), OUTPUT_RUN_ONCE_HANDLE__CLEAR_ABORT, m_core_op_multiplexer_handle); CHECK_SUCCESS(status); return HAILO_SUCCESS; @@ -295,7 +257,9 @@ hailo_status VDeviceOutputStreamMultiplexerWrapper::clear_abort() bool VDeviceOutputStreamMultiplexerWrapper::is_scheduled() { - return m_vdevice_output_stream->is_scheduled(); + // Multiplexer can only work with scheduler + assert(m_vdevice_output_stream->is_scheduled()); + return true; } Expected VDeviceOutputStreamMultiplexerWrapper::get_buffer_frames_size() const @@ -307,29 +271,22 @@ Expected VDeviceOutputStreamMultiplexerWrapper::get_pending_frames_count return m_vdevice_output_stream->get_pending_frames_count(); } -Expected VDeviceOutputStreamMultiplexerWrapper::sync_read_raw_buffer(MemoryView &buffer) -{ - return m_vdevice_output_stream->sync_read_raw_buffer(buffer); -} - -hailo_status VDeviceOutputStreamMultiplexerWrapper::read_all(MemoryView &buffer) +hailo_status VDeviceOutputStreamMultiplexerWrapper::read_impl(MemoryView &buffer) { - return m_vdevice_output_stream->read_all(buffer); + return m_vdevice_output_stream->read_impl(buffer); } hailo_status VDeviceOutputStreamMultiplexerWrapper::read(MemoryView buffer) { uint32_t frames_to_drain_count = 0; - if (is_scheduled()) { - auto expected_drain_count = m_multiplexer->wait_for_read(m_core_op_multiplexer_handle, name(), - m_vdevice_output_stream->get_timeout()); - if (HAILO_STREAM_ABORTED_BY_USER == expected_drain_count.status()) { - return expected_drain_count.status(); - } - CHECK_EXPECTED_AS_STATUS(expected_drain_count); - - frames_to_drain_count = expected_drain_count.release(); + auto expected_drain_count = m_multiplexer->wait_for_read(m_core_op_multiplexer_handle, name(), + m_vdevice_output_stream->get_timeout()); + if (HAILO_STREAM_ABORTED_BY_USER == expected_drain_count.status()) { + return expected_drain_count.status(); } + CHECK_EXPECTED_AS_STATUS(expected_drain_count); + + frames_to_drain_count = expected_drain_count.release(); for (uint32_t i = 0; i < frames_to_drain_count; i++) { auto status = m_vdevice_output_stream->read(buffer); @@ -345,10 +302,8 @@ hailo_status VDeviceOutputStreamMultiplexerWrapper::read(MemoryView buffer) } CHECK_SUCCESS(status); - if (is_scheduled()) { - status = m_multiplexer->signal_read_finish(); - CHECK_SUCCESS(status); - } + status = m_multiplexer->signal_read_finish(); + CHECK_SUCCESS(status); return HAILO_SUCCESS; } @@ -358,10 +313,11 @@ hailo_status VDeviceOutputStreamMultiplexerWrapper::set_timeout(std::chrono::mil return m_vdevice_output_stream->set_timeout(timeout); } -Expected> VDeviceOutputStreamMultiplexerWrapper::create(std::shared_ptr vdevice_output_stream, +Expected> VDeviceOutputStreamMultiplexerWrapper::create(std::shared_ptr vdevice_output_stream, std::string network_name, std::shared_ptr multiplexer, scheduler_core_op_handle_t core_ops_scheduler_handle, multiplexer_core_op_handle_t core_op_multiplexer_handle) { + assert(vdevice_output_stream->is_scheduled()); hailo_status status = HAILO_UNINITIALIZED; std::unique_ptr wrapper(new (std::nothrow) VDeviceOutputStreamMultiplexerWrapper(vdevice_output_stream, network_name, multiplexer, core_ops_scheduler_handle, core_op_multiplexer_handle, status)); @@ -378,7 +334,7 @@ Expected> VDeviceOutputSt return wrapper; } -VDeviceOutputStreamMultiplexerWrapper::VDeviceOutputStreamMultiplexerWrapper(std::shared_ptr &vdevice_output_stream, +VDeviceOutputStreamMultiplexerWrapper::VDeviceOutputStreamMultiplexerWrapper(std::shared_ptr &vdevice_output_stream, std::string network_name, std::shared_ptr multiplexer, scheduler_core_op_handle_t core_ops_scheduler_handle, multiplexer_core_op_handle_t core_op_multiplexer_handle, hailo_status &status) : OutputStreamBase(vdevice_output_stream->get_layer_info(), vdevice_output_stream->get_info(), diff --git a/hailort/libhailort/src/vdevice/vdevice_stream_multiplexer_wrapper.hpp b/hailort/libhailort/src/vdevice/vdevice_stream_multiplexer_wrapper.hpp index 0876d29..92e054b 100644 --- a/hailort/libhailort/src/vdevice/vdevice_stream_multiplexer_wrapper.hpp +++ b/hailort/libhailort/src/vdevice/vdevice_stream_multiplexer_wrapper.hpp @@ -34,7 +34,7 @@ enum output_run_once_handle_t { class VDeviceInputStreamMultiplexerWrapper : public InputStreamBase { public: virtual ~VDeviceInputStreamMultiplexerWrapper() = default; - static Expected> create(std::shared_ptr vdevice_input_stream, + static Expected> create(std::shared_ptr vdevice_input_stream, std::string network_name, std::shared_ptr multiplexer, scheduler_core_op_handle_t core_ops_scheduler_handle, multiplexer_core_op_handle_t core_op_multiplexer_handle = 0); Expected> clone(multiplexer_core_op_handle_t core_op_multiplexer_handle); @@ -49,28 +49,22 @@ public: virtual hailo_status clear_abort() override; virtual bool is_scheduled() override; - virtual hailo_status send_pending_buffer(size_t device_index = 0) override; + virtual hailo_status send_pending_buffer(const device_id_t &device_id) override; virtual Expected get_buffer_frames_size() const override; virtual Expected get_pending_frames_count() const override; - virtual hailo_status register_interrupt_callback(const vdma::ProcessingCompleteCallback &callback) override - { - return m_vdevice_input_stream->register_interrupt_callback(callback); - } - protected: - virtual Expected sync_write_raw_buffer(const MemoryView &buffer) override; - virtual hailo_status sync_write_all_raw_buffer_no_transform_impl(void *buffer, size_t offset, size_t size) override; + virtual hailo_status write_impl(const MemoryView &buffer) override; private: - VDeviceInputStreamMultiplexerWrapper(std::shared_ptr &vdevice_input_stream, + VDeviceInputStreamMultiplexerWrapper(std::shared_ptr &vdevice_input_stream, std::string network_name, std::shared_ptr multiplexer, scheduler_core_op_handle_t core_ops_scheduler_handle, multiplexer_core_op_handle_t core_op_multiplexer_handle, hailo_status &status); virtual hailo_status set_timeout(std::chrono::milliseconds timeout) override; virtual hailo_status flush() override; - - std::shared_ptr m_vdevice_input_stream; + + std::shared_ptr m_vdevice_input_stream; std::shared_ptr m_multiplexer; scheduler_core_op_handle_t m_core_ops_scheduler_handle; multiplexer_core_op_handle_t m_core_op_multiplexer_handle; @@ -83,7 +77,7 @@ class VDeviceOutputStreamMultiplexerWrapper : public OutputStreamBase { public: virtual ~VDeviceOutputStreamMultiplexerWrapper() noexcept = default; - static Expected> create(std::shared_ptr vdevice_output_stream, + static Expected> create(std::shared_ptr vdevice_output_stream, std::string network_name, std::shared_ptr multiplexer, scheduler_core_op_handle_t core_ops_scheduler_handle, multiplexer_core_op_handle_t core_op_multiplexer_handle = 0); Expected> clone(multiplexer_core_op_handle_t core_op_multiplexer_handle); @@ -94,30 +88,23 @@ public: virtual hailo_status deactivate_stream() override; virtual hailo_stream_interface_t get_interface() const override; virtual std::chrono::milliseconds get_timeout() const override; + virtual hailo_status set_next_device_to_read(const device_id_t &device_id) override; virtual hailo_status abort() override; virtual hailo_status clear_abort() override; virtual bool is_scheduled() override; virtual Expected get_buffer_frames_size() const override; virtual Expected get_pending_frames_count() const override; - virtual hailo_status register_interrupt_callback(const vdma::ProcessingCompleteCallback &callback) override - { - return m_vdevice_output_stream->register_interrupt_callback(callback); - } - -protected: - virtual Expected sync_read_raw_buffer(MemoryView &buffer) override; - private: - VDeviceOutputStreamMultiplexerWrapper(std::shared_ptr &vdevice_output_stream, + VDeviceOutputStreamMultiplexerWrapper(std::shared_ptr &vdevice_output_stream, std::string network_name, std::shared_ptr multiplexer, scheduler_core_op_handle_t core_ops_scheduler_handle, multiplexer_core_op_handle_t core_op_multiplexer_handle, hailo_status &status); virtual hailo_status set_timeout(std::chrono::milliseconds timeout) override; - virtual hailo_status read_all(MemoryView &buffer) override; + virtual hailo_status read_impl(MemoryView &buffer) override; virtual hailo_status read(MemoryView buffer) override; - std::shared_ptr m_vdevice_output_stream; + std::shared_ptr m_vdevice_output_stream; std::shared_ptr m_multiplexer; scheduler_core_op_handle_t m_core_ops_scheduler_handle; multiplexer_core_op_handle_t m_core_op_multiplexer_handle; diff --git a/hailort/libhailort/src/vdma/CMakeLists.txt b/hailort/libhailort/src/vdma/CMakeLists.txt index 4111464..2964998 100644 --- a/hailort/libhailort/src/vdma/CMakeLists.txt +++ b/hailort/libhailort/src/vdma/CMakeLists.txt @@ -21,11 +21,11 @@ set(SRC_FILES ${CMAKE_CURRENT_SOURCE_DIR}/memory/descriptor_list.cpp ${CMAKE_CURRENT_SOURCE_DIR}/memory/vdma_buffer.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/memory/dma_mapped_buffer.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/memory/mapped_buffer_impl.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/memory/mapped_buffer_factory.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/memory/mapped_buffer.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/memory/dma_able_buffer.cpp ${CMAKE_CURRENT_SOURCE_DIR}/memory/sg_buffer.cpp ${CMAKE_CURRENT_SOURCE_DIR}/memory/continuous_buffer.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/memory/buffer_requirements.cpp ) set(HAILORT_CPP_SOURCES ${HAILORT_CPP_SOURCES} ${SRC_FILES} PARENT_SCOPE) diff --git a/hailort/libhailort/src/vdma/channel/async_channel.cpp b/hailort/libhailort/src/vdma/channel/async_channel.cpp index 890390e..d104b39 100644 --- a/hailort/libhailort/src/vdma/channel/async_channel.cpp +++ b/hailort/libhailort/src/vdma/channel/async_channel.cpp @@ -44,34 +44,60 @@ AsyncChannel::AsyncChannel(vdma::ChannelId channel_id, Direction direction, Hail status = HAILO_SUCCESS; } -hailo_status AsyncChannel::transfer(std::shared_ptr buffer, const TransferDoneCallback &user_callback, void *opaque) -{ - CHECK_NOT_NULL(buffer, HAILO_INVALID_ARGUMENT); - CHECK(0 != buffer->size(), HAILO_INVALID_ARGUMENT); +hailo_status AsyncChannel::transfer_async(TransferRequest &&transfer_request) +{ + CHECK_ARG_NOT_NULL(transfer_request.buffer.data()); + CHECK(0 != transfer_request.buffer.size(), HAILO_INVALID_ARGUMENT, "Buffer is empty (size 0)"); + + auto is_new_mapping = true; + MappedBufferPtr mapped_buffer = nullptr; + if (transfer_request.mapped_buffer != nullptr) { + assert(transfer_request.buffer.data() == transfer_request.mapped_buffer->data()); + assert(transfer_request.buffer.size() == transfer_request.mapped_buffer->size()); + CHECK(transfer_request.mapped_buffer->storage().type() == BufferStorage::Type::DMA, HAILO_INVALID_ARGUMENT, + "Buffer must be dma-able (provided buffer type {})", transfer_request.mapped_buffer->storage().type()); + + // Map if not already mapped + const auto mapping_direction = (m_direction == Direction::H2D) ? HAILO_DMA_BUFFER_DIRECTION_H2D : HAILO_DMA_BUFFER_DIRECTION_D2H; + auto is_new_mapping_exp = transfer_request.mapped_buffer->storage().dma_map(m_driver, mapping_direction); + CHECK_EXPECTED_AS_STATUS(is_new_mapping_exp); + is_new_mapping = is_new_mapping_exp.release(); + + auto mapped_buffer_exp = transfer_request.mapped_buffer->storage().get_dma_mapped_buffer(m_driver.device_id()); + CHECK_EXPECTED_AS_STATUS(mapped_buffer_exp); + mapped_buffer = mapped_buffer_exp.release(); + } else { + auto mapped_buffer_exp = MappedBuffer::create_shared(m_driver, m_direction, + transfer_request.buffer.size(), transfer_request.buffer.data()); + CHECK_EXPECTED_AS_STATUS(mapped_buffer_exp); + mapped_buffer = mapped_buffer_exp.release(); + } + + if (!is_new_mapping) { + // The buffer has been previously mapped, so it needs to be sync'd from host to device. + // * If the buffer is mapped H2D/BOTH, then synchronize will make sure the device "sees" the most "up to date" + // version of the buffer. + // * If the buffer is mapped D2H, it might have been changed by the host between the time it was mapped and the + // current async transfer. Synchronizing will transfer ownership to the device, so that when the transfer is + // complete, the host will "see" an "up to date" version of the buffer. + auto status = mapped_buffer->synchronize(HailoRTDriver::DmaSyncDirection::TO_DEVICE); + CHECK_SUCCESS(status); + } std::lock_guard state_guard(m_state->mutex()); + if (!m_state->m_is_channel_activated) { + return HAILO_STREAM_NOT_ACTIVATED; + } if (m_state->m_is_aborted) { LOGGER__INFO("Tried to write to aborted channel {}", m_channel_id); return HAILO_STREAM_ABORTED_BY_USER; } - hailo_status status = HAILO_UNINITIALIZED; if (Direction::H2D == m_direction) { - status = transfer_h2d(buffer, user_callback, opaque); + return transfer_h2d(mapped_buffer, transfer_request.callback); } else { - status = transfer_d2h(buffer, user_callback, opaque); + return transfer_d2h(mapped_buffer, transfer_request.callback); } - - if (HAILO_STREAM_NOT_ACTIVATED == status) { - LOGGER__INFO("Transfer failed because Channel {} is not activated", m_channel_id); - return HAILO_STREAM_NOT_ACTIVATED; - } - if (HAILO_SUCCESS != status) { - LOGGER__ERROR("Transfer failed for channel {} with status {}", m_channel_id, status); - return status; - } - - return HAILO_SUCCESS; } hailo_status AsyncChannel::cancel_pending_transfers() @@ -79,10 +105,8 @@ hailo_status AsyncChannel::cancel_pending_transfers() std::lock_guard state_guard(m_state->mutex()); for (auto &pending_buffer_info : m_state->m_pending_buffers) { if (pending_buffer_info.on_transfer_done) { - pending_buffer_info.on_transfer_done(pending_buffer_info.buffer, - hailo_async_transfer_completion_info_t{HAILO_STREAM_NOT_ACTIVATED}, - pending_buffer_info.opaque); - // Release our references to user buffer, callback and opaque + pending_buffer_info.on_transfer_done(HAILO_STREAM_ABORTED_BY_USER); + // Release our references to user buffer and callback. pending_buffer_info = PendingBuffer{}; } else { LOGGER__WARNING("No transfer done callback found for transfer (channel {}); skipping", m_channel_id); @@ -105,7 +129,7 @@ hailo_status AsyncChannel::complete_channel_deactivation() return HAILO_SUCCESS; } -hailo_status AsyncChannel::transfer(void */* buf */, size_t /* count */) +hailo_status AsyncChannel::transfer_sync(void */* buf */, size_t /* count */, std::chrono::milliseconds /* timeout */) { return HAILO_NOT_IMPLEMENTED; } @@ -139,14 +163,18 @@ Expected AsyncChannel::get_d2h_pending_descs_count() return make_unexpected(HAILO_NOT_IMPLEMENTED); } -hailo_status AsyncChannel::transfer_d2h(std::shared_ptr buffer, const TransferDoneCallback &user_callback, void *opaque) +hailo_status AsyncChannel::transfer_d2h(MappedBufferPtr mapped_buffer, const InternalTransferDoneCallback &callback) { InterruptsDomain first_desc_interrupts_domain = InterruptsDomain::NONE; // Provide FW interrupt only in the end of the last transfer in the batch - InterruptsDomain last_desc_interrupts_domain = (m_state->m_accumulated_transfers + 1 == m_transfers_per_axi_intr) ? + InterruptsDomain last_desc_interrupts_domain = (m_state->m_accumulated_transfers + 1 == m_transfers_per_axi_intr) ? InterruptsDomain::BOTH : InterruptsDomain::HOST; - const auto status = prepare_descriptors(buffer, user_callback, opaque, first_desc_interrupts_domain, last_desc_interrupts_domain); + const auto status = prepare_descriptors(mapped_buffer, callback, first_desc_interrupts_domain, + last_desc_interrupts_domain); + if (HAILO_QUEUE_IS_FULL == status) { + return status; + } CHECK_SUCCESS(status); m_state->m_accumulated_transfers = (m_state->m_accumulated_transfers + 1) % m_transfers_per_axi_intr; @@ -154,7 +182,7 @@ hailo_status AsyncChannel::transfer_d2h(std::shared_ptr buffer, return HAILO_SUCCESS; } -hailo_status AsyncChannel::transfer_h2d(std::shared_ptr buffer, const TransferDoneCallback &user_callback, void *opaque) +hailo_status AsyncChannel::transfer_h2d(MappedBufferPtr mapped_buffer, const InternalTransferDoneCallback &callback) { // For h2d, only the host need to get transfer done interrupts InterruptsDomain last_desc_interrupts_domain = InterruptsDomain::HOST; @@ -162,46 +190,64 @@ hailo_status AsyncChannel::transfer_h2d(std::shared_ptr buffer, InterruptsDomain first_desc_interrupts_domain = (m_latency_meter != nullptr) ? InterruptsDomain::HOST : InterruptsDomain::NONE; - return prepare_descriptors(buffer, user_callback, opaque, first_desc_interrupts_domain, last_desc_interrupts_domain); + return prepare_descriptors(mapped_buffer, callback, first_desc_interrupts_domain, + last_desc_interrupts_domain); } -hailo_status AsyncChannel::prepare_descriptors(std::shared_ptr buffer, const TransferDoneCallback &user_callback, - void *opaque, InterruptsDomain first_desc_interrupts_domain, InterruptsDomain last_desc_interrupts_domain) +hailo_status AsyncChannel::prepare_descriptors(MappedBufferPtr mapped_buffer, + const InternalTransferDoneCallback &callback, InterruptsDomain first_desc_interrupts_domain, + InterruptsDomain last_desc_interrupts_domain) { - const auto desired_desc_num = m_desc_list->descriptors_in_buffer(buffer->size()); + assert(mapped_buffer != nullptr); + + const auto desired_desc_num = m_desc_list->descriptors_in_buffer(mapped_buffer->size()); CHECK(desired_desc_num <= MAX_DESCS_COUNT, HAILO_INTERNAL_FAILURE); const uint16_t desc_num = static_cast(desired_desc_num); - int num_available = get_num_available(); - int num_processed = CB_TAIL(m_state->m_descs); - int num_free = CB_AVAIL(m_state->m_descs, num_available, num_processed); + const auto num_available = get_num_available(); + const auto num_processed = CB_TAIL(m_state->m_descs); + const auto num_free = CB_AVAIL(m_state->m_descs, num_available, num_processed); if (num_free < desc_num) { - // TODO: do we want to block here? - return HAILO_OUT_OF_DESCRIPTORS; + return HAILO_QUEUE_IS_FULL; } - const auto status = m_desc_list->configure_to_use_buffer(*buffer, m_channel_id, num_available); + const auto status = m_desc_list->configure_to_use_buffer(*mapped_buffer, m_channel_id, num_available); CHECK_SUCCESS(status); + if (nullptr != m_latency_meter) { // Program first descriptor m_desc_list->program_single_descriptor((*m_desc_list)[num_available], m_desc_list->desc_page_size(), first_desc_interrupts_domain); } - auto actual_desc_count = m_desc_list->program_last_descriptor(buffer->size(), last_desc_interrupts_domain, - num_available, true); + auto actual_desc_count = m_desc_list->program_last_descriptor(mapped_buffer->size(), last_desc_interrupts_domain, + num_available); CHECK_EXPECTED_AS_STATUS(actual_desc_count, "Failed to program desc_list for channel {}", m_channel_id); assert (actual_desc_count.value() == desc_num); - int last_desc_avail = ((num_available + desc_num - 1) & m_state->m_descs.size_mask); + assert(desc_num > 0); + const auto last_desc_avail = static_cast((num_available + desc_num - 1) & m_state->m_descs.size_mask); + + const auto wrapped_callback = [this, mapped_buffer, callback](hailo_status callback_status) { + if (HAILO_SUCCESS != callback_status) { + // No need to sync, just forward the callback. + callback(callback_status); + return; + } - const auto callback = [this, user_callback](std::shared_ptr buffer, const hailo_async_transfer_completion_info_t &status, void *opaque) { - user_callback(buffer, status, opaque); + // The device may only change the contents of mapped_buffer, if it was mapped in Direction::D2H + // (not Direction::BOTH because channels are either D2H or H2D). Hence, we don't need to sync H2D + // buffers to the host (the host's "view" of the buffer is "up to date"). + if (m_direction == Direction::D2H) { + auto sync_status = mapped_buffer->synchronize(HailoRTDriver::DmaSyncDirection::TO_HOST); + if (sync_status != HAILO_SUCCESS) { + LOGGER__ERROR("Failed to sync buffer to host with status {}", sync_status); + callback_status = sync_status; + } + } - // opaque is only for the user callback - static constexpr void *NO_CONTEXT = nullptr; - m_transfer_done_callback(buffer, status, NO_CONTEXT); + callback(callback_status); }; - m_state->add_pending_buffer(num_available, last_desc_avail, m_direction, callback, buffer, opaque); + m_state->add_pending_buffer(num_available, last_desc_avail, m_direction, wrapped_callback, mapped_buffer); return inc_num_available(desc_num); } diff --git a/hailort/libhailort/src/vdma/channel/async_channel.hpp b/hailort/libhailort/src/vdma/channel/async_channel.hpp index a161ced..d2ae258 100644 --- a/hailort/libhailort/src/vdma/channel/async_channel.hpp +++ b/hailort/libhailort/src/vdma/channel/async_channel.hpp @@ -32,7 +32,7 @@ public: static Expected create(vdma::ChannelId channel_id, Direction direction, HailoRTDriver &driver, uint32_t descs_count, uint16_t desc_page_size, const std::string &stream_name = "", LatencyMeterPtr latency_meter = nullptr, uint16_t transfers_per_axi_intr = 1); - + AsyncChannel(vdma::ChannelId channel_id, Direction direction, HailoRTDriver &driver, uint32_t descs_count, uint16_t desc_page_size, const std::string &stream_name, LatencyMeterPtr latency_meter, uint16_t transfers_per_axi_intr, hailo_status &status); @@ -45,10 +45,10 @@ public: virtual hailo_status complete_channel_activation(uint32_t transfer_size, bool resume_pending_transfers) override; virtual hailo_status complete_channel_deactivation() override; - virtual hailo_status transfer(std::shared_ptr buffer, const TransferDoneCallback &user_callback, void *opaque) override; + virtual hailo_status transfer_async(TransferRequest &&transfer_request) override; virtual hailo_status cancel_pending_transfers() override; - virtual hailo_status transfer(void *buf, size_t count) override; + virtual hailo_status transfer_sync(void *buf, size_t count, std::chrono::milliseconds timeout) override; // TODO: don't want virtual hailo_status write_buffer(const MemoryView &buffer, std::chrono::milliseconds timeout, const std::function &should_cancel) override; @@ -65,10 +65,10 @@ public: virtual Expected get_d2h_pending_descs_count() override; private: - hailo_status transfer_d2h(std::shared_ptr buffer, const TransferDoneCallback &user_callback, void *opaque); - hailo_status transfer_h2d(std::shared_ptr buffer, const TransferDoneCallback &user_callback, void *opaque); - hailo_status prepare_descriptors(std::shared_ptr buffer, const TransferDoneCallback &user_callback, - void *opaque, InterruptsDomain first_desc_interrupts_domain, InterruptsDomain last_desc_interrupts_domain); + hailo_status transfer_d2h(MappedBufferPtr mapped_buffer, const InternalTransferDoneCallback &user_callback); + hailo_status transfer_h2d(MappedBufferPtr mapped_buffer, const InternalTransferDoneCallback &user_callback); + hailo_status prepare_descriptors(MappedBufferPtr mapped_buffer, const InternalTransferDoneCallback &user_callback, + InterruptsDomain first_desc_interrupts_domain, InterruptsDomain last_desc_interrupts_domain); }; } /* namespace vdma */ diff --git a/hailort/libhailort/src/vdma/channel/boundary_channel.cpp b/hailort/libhailort/src/vdma/channel/boundary_channel.cpp index c5652a8..9a298d0 100644 --- a/hailort/libhailort/src/vdma/channel/boundary_channel.cpp +++ b/hailort/libhailort/src/vdma/channel/boundary_channel.cpp @@ -81,10 +81,6 @@ BoundaryChannel::BoundaryChannel(Type type, vdma::ChannelId channel_id, Directio status = HAILO_INVALID_ARGUMENT; return; } - - m_transfer_done_callback = [this](std::shared_ptr, const hailo_async_transfer_completion_info_t &, void *) { - m_user_interrupt_callback(1); - }; } void BoundaryChannel::clear_pending_buffers_descriptors() @@ -103,22 +99,13 @@ void BoundaryChannel::clear_pending_buffers_descriptors() hailo_status BoundaryChannel::trigger_channel_completion(uint16_t hw_num_processed) { - size_t processed_no = 0; + PendingBuffersQueue completed_buffers{PENDING_BUFFERS_SIZE}; { // NOTE: right now, we can retake the 'completion' descriptor for a new transfer before handling the interrupt. // we should have our own pointers indicating whats free instead of reading from HW. - // TODO: consider calculating the last descriptor using the src_desc_avail and src_desc_proc instead of using - // status? - // TODO: we might free a pending buffer which we didn't get an interrupt for yet. we should still handle this - // situation correctly. - - std::lock_guard state_guard(m_state->mutex()); - // Although the hw_num_processed should be a number between 0 and m_descs.size-1, if m_desc.size < 0x10000 - // (the maximum desc size), the actual hw_num_processed is a number between 1 and m_descs.size. Therefore the - // value can be m_descs.size, in this case we change it to zero. - hw_num_processed = static_cast(hw_num_processed & m_state->m_descs.size_mask); + std::unique_lock state_guard(m_state->mutex()); if (m_state->m_is_aborted) { return HAILO_STREAM_ABORTED_BY_USER; @@ -128,6 +115,11 @@ hailo_status BoundaryChannel::trigger_channel_completion(uint16_t hw_num_process return HAILO_STREAM_NOT_ACTIVATED; } + // Although the hw_num_processed should be a number between 0 and m_descs.size-1, if m_desc.size < 0x10000 + // (the maximum desc size), the actual hw_num_processed is a number between 1 and m_descs.size. Therefore the + // value can be m_descs.size, in this case we change it to zero. + hw_num_processed = static_cast(hw_num_processed & m_state->m_descs.size_mask); + if (m_latency_meter != nullptr) { // The latency meter gets an updated hw_num_processed via a call to vdma_interrupts_read_timestamps // (the desc index of the last measured timestamp returned from that ioctl). Since update_latency_meter @@ -141,61 +133,38 @@ hailo_status BoundaryChannel::trigger_channel_completion(uint16_t hw_num_process hw_num_processed = latency_meter_hw_num_processed.value(); } - const auto last_num_processed = static_cast(CB_TAIL(m_state->m_descs)); + const auto previous_num_processed = static_cast(CB_TAIL(m_state->m_descs)); - // Calculate pending_buffers_count before iteration, because the iteration removes done transfers + // Calculate pending_buffers_count before iteration, because the iteration removes done transfers. const auto pending_buffers_count = m_state->m_pending_buffers.size(); for (size_t i = 0; i < pending_buffers_count; i++) { - auto &last_pending_buffer_info = m_state->m_pending_buffers.front(); - const auto last_desc_index = static_cast(last_pending_buffer_info.last_desc); - // Transfer is complete if its last descriptor is in [last_num_processed, hw_num_processed) or - // the the buffer is empty (hw_num_processed == get_num_available()) - const bool is_complete = is_desc_between(last_num_processed, hw_num_processed, last_desc_index) || - (hw_num_processed == get_num_available()); - - #ifndef NDEBUG - static constexpr auto STATUS_MASK = 0xFF; - static constexpr auto ERROR_BIT = 1; - const auto status = (*m_desc_list)[last_desc_index].RemainingPageSize_Status & STATUS_MASK; - CHECK(!is_bit_set(status, ERROR_BIT), HAILO_INTERNAL_FAILURE, - "Error while processing descriptor {} of DMA {} on board {}.", - last_desc_index, m_channel_id, m_driver.dev_path()); - - // status is read after hw_num_processed, so we want is_complete -> (status == 1). - assert(!is_complete || ((status & 0x1) == 1)); - #endif - - if (!is_complete) { + if (!is_complete(m_state->m_pending_buffers.front(), previous_num_processed, hw_num_processed)) { break; } - // Clear relevant descriptors from previous transfer - if (nullptr != m_latency_meter) { - const auto latency_desc_index = last_pending_buffer_info.latency_measure_desc; - m_desc_list->clear_descriptor(latency_desc_index); - } - m_desc_list->clear_descriptor(last_desc_index); - - _CB_SET(m_state->m_descs.tail, (last_pending_buffer_info.last_desc + 1) & m_state->m_descs.size_mask); - last_pending_buffer_info.on_transfer_done(last_pending_buffer_info.buffer, - hailo_async_transfer_completion_info_t{HAILO_SUCCESS}, last_pending_buffer_info.opaque); - processed_no++; + // Move item from pending_buffers to completed_buffers + completed_buffers.push_back(std::move(m_state->m_pending_buffers.front())); m_state->m_pending_buffers.pop_front(); } } - if (0 < processed_no) { + // completed_buffers were copied from m_pending_buffers inside the lock. Now we are free to process them and call + // the right completion callbacks without state mutex held. + for (auto &pending_buffer : completed_buffers) { + on_pending_buffer_irq(pending_buffer); + } + + if (!completed_buffers.empty()) { m_state->transfer_buffer_cv().notify_all(); } return HAILO_SUCCESS; } -hailo_status BoundaryChannel::register_interrupt_callback(const ProcessingCompleteCallback &callback) +void BoundaryChannel::register_interrupt_callback(const ProcessingCompleteCallback &callback) { std::lock_guard state_guard(m_state->mutex()); m_user_interrupt_callback = callback; - return HAILO_SUCCESS; } CONTROL_PROTOCOL__host_buffer_info_t BoundaryChannel::get_boundary_buffer_info(uint32_t transfer_size) @@ -247,20 +216,19 @@ hailo_status BoundaryChannel::activate(uint32_t transfer_size, bool resume_pendi hailo_status BoundaryChannel::deactivate() { std::unique_lock state_guard(m_state->mutex()); + { + CHECK(m_state->m_is_channel_activated, HAILO_INTERNAL_FAILURE, + "Vdma channel {} is not activated", m_channel_id); + m_state->m_is_channel_activated = false; - CHECK(m_state->m_is_channel_activated, HAILO_INTERNAL_FAILURE, - "Vdma channel {} is not activated", m_channel_id); - m_state->m_is_channel_activated = false; - - // Reset the user callback, so as not to keep objects provided by the user alive (they may lead to a chain of refs - // back to this channel causing it to be leaked). - // Note: PendingBuffers held by m_pending_buffers may still hold copies of the current m_transfer_done_callback, - // which in turn holds a reference to *this. Since we stop the m_wait_interrupts_thread there's no risk that - // these callbacks will be called and we don't need to reset this callback. - m_user_interrupt_callback = ignore_processing_complete; + // Note: PendingBuffers held by m_pending_buffers may still hold copies of the current m_transfer_done_callback, + // which in turn holds a reference to *this. Since we stop the m_wait_interrupts_thread there's no risk that + // these callbacks will be called and we don't need to reset this callback. - auto status = complete_channel_deactivation(); - CHECK_SUCCESS(status); + auto status = complete_channel_deactivation(); + CHECK_SUCCESS(status); + } + m_state->m_can_transfer_buffer_cv.notify_all(); return HAILO_SUCCESS; } @@ -270,6 +238,13 @@ BoundaryChannel::Type BoundaryChannel::type() const return m_type; } +hailo_status BoundaryChannel::set_transfers_per_axi_intr(uint16_t transfers_per_axi_intr) +{ + CHECK(0 != transfers_per_axi_intr, HAILO_INVALID_ARGUMENT, "Invalid transfers per axi interrupt"); + m_transfers_per_axi_intr = transfers_per_axi_intr; + return HAILO_SUCCESS; +} + hailo_status BoundaryChannel::flush(const std::chrono::milliseconds &timeout) { if (Direction::D2H == m_direction) { @@ -284,6 +259,10 @@ hailo_status BoundaryChannel::flush(const std::chrono::milliseconds &timeout) status = HAILO_STREAM_ABORTED_BY_USER; return true; // return true so that the wait will finish } + if (!m_state->m_is_channel_activated) { + status = HAILO_STREAM_NOT_ACTIVATED; + return true; // return true so that the wait will finish + } return m_state->m_pending_buffers.empty(); }); CHECK(was_successful, HAILO_TIMEOUT, "Got HAILO_TIMEOUT while waiting for channel {} interrupts on flush", m_channel_id); @@ -315,7 +294,7 @@ bool BoundaryChannel::has_room_in_desc_list(size_t buffer_size) if (desc_num == m_state->m_descs.size) { // Special case when the checking if the buffer is empty - return num_available == num_processed; + return num_available == num_processed; } int num_free = CB_AVAIL(m_state->m_descs, num_available, num_processed); @@ -326,8 +305,12 @@ bool BoundaryChannel::has_room_in_desc_list(size_t buffer_size) return true; } -hailo_status BoundaryChannel::wait(size_t buffer_size, std::chrono::milliseconds timeout) +hailo_status BoundaryChannel::wait(size_t buffer_size, std::chrono::milliseconds timeout, + bool stop_if_deactivated) { + std::unique_lock state_guard(m_state->mutex()); + assert(state_guard.owns_lock()); + const auto max_transfer_size = m_desc_list->desc_page_size() * m_desc_list->count(); CHECK(buffer_size < max_transfer_size, HAILO_INVALID_ARGUMENT, "Requested transfer size ({}) must be smaller than ({})", buffer_size, max_transfer_size); @@ -336,25 +319,73 @@ hailo_status BoundaryChannel::wait(size_t buffer_size, std::chrono::milliseconds std::bind(&BoundaryChannel::is_ready_for_transfer_h2d, this, buffer_size) : std::bind(&BoundaryChannel::is_ready_for_transfer_d2h, this, buffer_size); - std::unique_lock state_guard(m_state->mutex()); - hailo_status status = HAILO_SUCCESS; // Best effort - bool was_successful = m_state->transfer_buffer_cv().wait_for(state_guard, timeout, [this, is_ready_for_transfer, &status] () { - if (m_state->m_is_aborted) { - status = HAILO_STREAM_ABORTED_BY_USER; - return true; // return true so that the wait will finish - } + auto status = HAILO_SUCCESS; // Best effort + bool was_successful = m_state->transfer_buffer_cv().wait_for(state_guard, timeout, + [this, is_ready_for_transfer, stop_if_deactivated, &status] () { + if (m_state->m_is_aborted) { + status = HAILO_STREAM_ABORTED_BY_USER; + return true; // return true so that the wait will finish + } + if (stop_if_deactivated && !m_state->m_is_channel_activated) { + status = HAILO_STREAM_NOT_ACTIVATED; + return true; // return true so that the wait will finish + } - return is_ready_for_transfer(); - }); + return is_ready_for_transfer(); + } + ); CHECK(was_successful, HAILO_TIMEOUT, "Got HAILO_TIMEOUT while waiting for channel {} interrupts", m_channel_id); return status; } -hailo_status BoundaryChannel::set_transfers_per_axi_intr(uint16_t transfers_per_axi_intr) +bool BoundaryChannel::is_complete(const PendingBuffer &pending_buffer, uint16_t previous_num_processed, + uint16_t current_num_processed) { - CHECK(0 != transfers_per_axi_intr, HAILO_INVALID_ARGUMENT, "Invalid transfers per axi interrupt"); - m_transfers_per_axi_intr = transfers_per_axi_intr; - return HAILO_SUCCESS; + // Transfer is complete if its last descriptor is in [previous_num_processed, current_num_processed) or + // the the buffer is empty (previous_num_processed == get_num_available()) + return is_desc_between(previous_num_processed, current_num_processed, pending_buffer.last_desc) || + (current_num_processed == get_num_available()); +} + + +void BoundaryChannel::on_pending_buffer_irq(PendingBuffer &pending_buffer) +{ +#ifndef NDEBUG + auto &last_desc = (*m_desc_list)[pending_buffer.last_desc]; + if (!last_desc.is_done() || last_desc.is_error()) { + LOGGER__ERROR("Error while processing descriptor {} of DMA {} on device {} DESC_STATUS=0x{:x}.", + pending_buffer.last_desc, m_channel_id, m_driver.device_id(), last_desc.status()); + pending_buffer.on_transfer_done(HAILO_INTERNAL_FAILURE); + return; + } +#endif + + { + std::unique_lock state_guard(m_state->mutex()); + + // First, we want to call m_user_interrupt_callback. This callback is meant to be called right after we + // got an interrupt and before the user can read the frame or write a new frame. + // We call this callback inside the lock to make sure it wont be called when the channel is aborted. + if (!m_state->m_is_aborted) { + m_user_interrupt_callback(); + } + + // Then we increase desc num_proc (can happen only in this flow). After it is increased - + // 1. On D2H channels - the output can be read by the user. + // 2. On H2D channels - new input can be written to the buffer. + // Clear relevant descriptors from previous transfer + if (nullptr != m_latency_meter) { + m_desc_list->clear_descriptor(pending_buffer.latency_measure_desc); + } + m_desc_list->clear_descriptor(pending_buffer.last_desc); + + _CB_SET(m_state->m_descs.tail, (pending_buffer.last_desc + 1) & m_state->m_descs.size_mask); + } + + // Finally, we notify user callbacks registered with the transfer. + // We want to make sure that the callbacks are called after the descriptors can be reused (So the user will + // be able to start new transfer). + pending_buffer.on_transfer_done(HAILO_SUCCESS); } } /* namespace vdma */ diff --git a/hailort/libhailort/src/vdma/channel/boundary_channel.hpp b/hailort/libhailort/src/vdma/channel/boundary_channel.hpp index d578a24..6b7580e 100644 --- a/hailort/libhailort/src/vdma/channel/boundary_channel.hpp +++ b/hailort/libhailort/src/vdma/channel/boundary_channel.hpp @@ -32,7 +32,7 @@ namespace vdma { class BoundaryChannel; using BoundaryChannelPtr = std::shared_ptr; -using ProcessingCompleteCallback = std::function; +using ProcessingCompleteCallback = std::function; class BoundaryChannel : public ChannelBase { @@ -63,10 +63,15 @@ public: hailo_status deactivate(); Type type() const; + hailo_status set_transfers_per_axi_intr(uint16_t transfers_per_axi_intr); void clear_pending_buffers_descriptors(); hailo_status trigger_channel_completion(uint16_t hw_num_processed); - virtual hailo_status register_interrupt_callback(const ProcessingCompleteCallback &callback); + + // Register some new interrupt callback (and reset previous). + // Note - when reseting an old callback, it may still be called (until interrupts are stopped). + void register_interrupt_callback(const ProcessingCompleteCallback &callback); + CONTROL_PROTOCOL__host_buffer_info_t get_boundary_buffer_info(uint32_t transfer_size); virtual hailo_status abort(); virtual hailo_status clear_abort(); @@ -74,28 +79,31 @@ public: // For D2H channels, we don't buffer data // Hence there's nothing to be "flushed" and the function will return with HAILO_SUCCESS virtual hailo_status flush(const std::chrono::milliseconds &timeout); - virtual hailo_status wait(size_t buffer_size, std::chrono::milliseconds timeout); - hailo_status set_transfers_per_axi_intr(uint16_t transfers_per_axi_intr); - virtual hailo_status transfer(void *buf, size_t count) = 0; + // Blocks until buffer_size bytes can transferred to/from the channel or until timeout has elapsed. + // If stop_if_deactivated is true, this function will return HAILO_STREAM_NOT_ACTIVATED after deactivate() + // is called. Otherwise, this function can be used to access the buffer while the channel is not active. + hailo_status wait(size_t buffer_size, std::chrono::milliseconds timeout, bool stop_if_deactivated=false); + + // Transfers count bytes to/from buf via the channel. + // Blocks until the transfer can be registered or timeout has elapsed. Hence, calling 'wait(buffer_size, timeout)' + // prior to 'transfer(buf, buffer_size)' is redundant. + virtual hailo_status transfer_sync(void *buf, size_t count, std::chrono::milliseconds timeout) = 0; + // TODO: can write_buffer + send_pending_buffer move to BufferedChannel? (HRT-9105) // Either write_buffer + send_pending_buffer or transfer (h2d) should be used on a given channel, not both virtual hailo_status write_buffer(const MemoryView &buffer, std::chrono::milliseconds timeout, const std::function &should_cancel) = 0; virtual hailo_status send_pending_buffer() = 0; - - // TODO: move buffer? - // TODO: If the same callback is used for different buffers we need a way to tell the transfers appart - // - Passing buffer to callback could do the trick. However, what will happen if the same buffer has been transferred twice? - // - Maybe add a unique transfer_id? At least unique in the context of the maximum number of ongoing transfers - // TODO: What if there's no more room in desc list so the transfer can't be programmed? Should the function block - // - Maybe define that if more than max_concurrent_transfers() (based on a param passed to create) the function will return a failure? + // When the transfer is complete (i.e. data is written to/from buffer with a D2H/H2D channel) callback is called - // buffer can't be freed until callback is called - virtual hailo_status transfer(std::shared_ptr buffer, const TransferDoneCallback &user_callback, void *opaque) = 0; + // transfer_request.buffer can't be freed/changed until callback is called. + virtual hailo_status transfer_async(TransferRequest &&transfer_request) = 0; - // Calls all pending transfer callbacks (if they exist), marking them as canceled by passing hailo_async_transfer_completion_info_t{HAILO_STREAM_NOT_ACTIVATED}. - // Note: This function is to be called on a deactivated channel object. Calling on an active channel will lead to unexpected results + // Calls all pending transfer callbacks (if they exist), marking them as canceled by passing + // HAILO_STREAM_ABORTED_BY_USER as a status to the callbacks. + // Note: This function is to be called on a deactivated channel object. Calling on an active channel will lead to + // unexpected results virtual hailo_status cancel_pending_transfers() = 0; virtual void notify_all() = 0; @@ -117,7 +125,7 @@ public: virtual Expected get_d2h_pending_descs_count() = 0; protected: - static void ignore_processing_complete(uint32_t) {} + static void ignore_processing_complete() {} void stop_interrupts_thread(std::unique_lock &lock); virtual bool is_ready_for_transfer_h2d(size_t buffer_size); virtual bool is_ready_for_transfer_d2h(size_t buffer_size); @@ -127,12 +135,14 @@ protected: virtual hailo_status complete_channel_deactivation() = 0; const Type m_type; - TransferDoneCallback m_transfer_done_callback; ProcessingCompleteCallback m_user_interrupt_callback; uint16_t m_transfers_per_axi_intr; private: bool has_room_in_desc_list(size_t buffer_size); + bool is_complete(const PendingBuffer &pending_buffer, uint16_t previous_num_processed, + uint16_t current_num_processed); + void on_pending_buffer_irq(PendingBuffer &buffer); }; } /* namespace vdma */ diff --git a/hailort/libhailort/src/vdma/channel/buffered_channel.cpp b/hailort/libhailort/src/vdma/channel/buffered_channel.cpp index d1176ee..55602d0 100644 --- a/hailort/libhailort/src/vdma/channel/buffered_channel.cpp +++ b/hailort/libhailort/src/vdma/channel/buffered_channel.cpp @@ -12,8 +12,6 @@ #include "common/logger_macros.hpp" #include "vdma/channel/buffered_channel.hpp" -#include "vdma/memory/mapped_buffer_factory.hpp" -#include "vdma/memory/mapped_buffer_impl.hpp" #include "hw_consts.hpp" #include @@ -53,7 +51,7 @@ BufferedChannel::BufferedChannel(vdma::ChannelId channel_id, Direction direction return; } - auto mapped_buffer = create_mapped_buffer(descs_count, desc_page_size, direction, driver); + auto mapped_buffer = MappedBuffer::create_shared(driver, direction, descs_count * desc_page_size); if (!mapped_buffer) { LOGGER__ERROR("Failed building mapped vdma buffer"); status = mapped_buffer.status(); @@ -72,21 +70,6 @@ BufferedChannel::BufferedChannel(vdma::ChannelId channel_id, Direction direction status = HAILO_SUCCESS; } -Expected> BufferedChannel::create_mapped_buffer(uint32_t descs_count, uint16_t desc_page_size, - Direction direction, HailoRTDriver &driver) -{ - auto desc_page_size_value = driver.calc_desc_page_size(desc_page_size); - CHECK_AS_EXPECTED(is_powerof2(desc_page_size_value), HAILO_INVALID_ARGUMENT, "Descriptor page_size must be a power of two."); - - auto mapped_buffer_exp = MappedBufferFactory::create_mapped_buffer(descs_count * desc_page_size_value, direction, driver); - CHECK_EXPECTED(mapped_buffer_exp); - - auto mapped_buffer = make_shared_nothrow(mapped_buffer_exp.release()); - CHECK_NOT_NULL_AS_EXPECTED(mapped_buffer, HAILO_OUT_OF_HOST_MEMORY); - - return mapped_buffer; -} - hailo_status BufferedChannel::complete_channel_deactivation() { const auto status = store_channel_buffer_state(); @@ -189,20 +172,19 @@ hailo_status BufferedChannel::complete_channel_activation(uint32_t transfer_size } if ((Direction::D2H == m_direction) && (transfer_size != 0)) { - const auto transfers_in_buffer = get_transfers_count_in_buffer(transfer_size); + const auto max_transfers_in_buffer = get_transfers_count_in_buffer(transfer_size); + const auto transfers_in_buffer = std::min(max_transfers_in_buffer, m_state->m_pending_buffers.capacity()); const auto pending_descs = get_d2h_pending_descs_count(); const auto descs_in_transfer = m_desc_list->descriptors_in_buffer(transfer_size); const auto pending_transfers = pending_descs.value() / descs_in_transfer; // We prepare descs in advance for D2H channels: - // (1) The channel's buffer can store up to 'transfers_in_buffer' frames of size transfer_size - // (2) There are 'pending_transfers' frames from the previous channel activation (we assume that the same - // 'transfer_size' was used) - // (3) Hence, we have room for 'transfers_in_buffer - pending_transfers' frames in the buffer currently. - // (4) However, we can allow at most 'm_state->m_pending_buffers.capacity()' transfers. We can't store more than + // (1) The channel's buffer can store up to 'max_transfers_in_buffer' frames of size transfer_size + // (2) However, we can allow at most 'm_state->m_pending_buffers.capacity()' transfers. We can't store more than // that in the pending buffers circular array. - // (5) Hence, we'll take the minimum between (3) and (4). - const auto transfers_count = std::min(transfers_in_buffer - pending_transfers, - m_state->m_pending_buffers.capacity()); + // (3) There are 'pending_transfers' frames from the previous channel activation (we assume that the same + // 'transfer_size' was used) + // (4) Hence, we have room for 'min(transfers_in_buffer, pending_buffers.capacity()) - pending_transfers' frames in the buffer currently. + const auto transfers_count = transfers_in_buffer - pending_transfers; status = prepare_d2h_pending_descriptors(transfer_size, static_cast(transfers_count)); CHECK_SUCCESS(status); } @@ -210,32 +192,31 @@ hailo_status BufferedChannel::complete_channel_activation(uint32_t transfer_size return HAILO_SUCCESS; } -hailo_status BufferedChannel::transfer(void *buf, size_t count) +hailo_status BufferedChannel::transfer_sync(void *buf, size_t count, std::chrono::milliseconds timeout) { CHECK_NOT_NULL(buf, HAILO_INVALID_ARGUMENT); CHECK(0 != count, HAILO_INVALID_ARGUMENT); - std::lock_guard state_guard(m_state->mutex()); - if (m_state->m_is_aborted) { - LOGGER__INFO("Tried to write to aborted channel {}", m_channel_id); - return HAILO_STREAM_ABORTED_BY_USER; + auto status = wait(count, timeout); + if ((HAILO_STREAM_NOT_ACTIVATED == status) || (HAILO_STREAM_ABORTED_BY_USER == status)) { + LOGGER__INFO("wait failed because channel {} is not activated/aborted (status {})", m_channel_id, status); + return status; } + CHECK_SUCCESS(status, "wait failed with status {} (channel id: {}, timeout: {}ms)", status, m_channel_id, timeout.count()); - hailo_status status = HAILO_UNINITIALIZED; + std::unique_lock state_guard(m_state->mutex()); if (Direction::H2D == m_direction) { status = transfer_h2d(buf, count); } else { status = transfer_d2h(buf, count); } - if (HAILO_STREAM_NOT_ACTIVATED == status) { - LOGGER__INFO("Transfer failed because Channel {} is not activated", m_channel_id); - return HAILO_STREAM_NOT_ACTIVATED; - } - if (HAILO_SUCCESS != status) { - LOGGER__ERROR("Transfer failed for channel {} with status {}", m_channel_id, status); + if ((HAILO_STREAM_NOT_ACTIVATED == status) || (HAILO_STREAM_ABORTED_BY_USER == status)) { + LOGGER__INFO("transfer failed because channel {} is not activated/aborted (status {})", m_channel_id, status); return status; } + CHECK_SUCCESS(status, "transfer failed with status {} (channel id: {}, timeout: {}ms)", status, m_channel_id, timeout.count()); + return HAILO_SUCCESS; } @@ -263,20 +244,21 @@ hailo_status BufferedChannel::write_to_channel_buffer_cyclic(const MemoryView &b "Can't write {} bytes to channel buffer (channel buffer size {})", buffer.size(), m_channel_buffer->size()); + static const auto SYNC_TO_DEIVCE = HailoRTDriver::DmaSyncDirection::TO_DEVICE; const auto size_to_end = m_channel_buffer->size() - channel_buffer_write_offset; const auto first_chunk_size = std::min(size_to_end, buffer.size()); const auto first_chunk_addr = static_cast(m_channel_buffer->user_address()) + channel_buffer_write_offset; // Copy from buffer to m_channel_buffer and then synchronize std::memcpy(first_chunk_addr, buffer.data(), first_chunk_size); - auto status = m_channel_buffer->pimpl->synchronize(channel_buffer_write_offset, first_chunk_size); + auto status = m_channel_buffer->synchronize(channel_buffer_write_offset, first_chunk_size, SYNC_TO_DEIVCE); CHECK_SUCCESS(status); const auto remaining_size = buffer.size() - first_chunk_size; if (remaining_size > 0) { // Copy the remainder from buffer to m_channel_buffer and then synchronize std::memcpy(m_channel_buffer->user_address(), buffer.data() + first_chunk_size, remaining_size); - status = m_channel_buffer->pimpl->synchronize(0, remaining_size); + status = m_channel_buffer->synchronize(0, remaining_size, SYNC_TO_DEIVCE); CHECK_SUCCESS(status); } @@ -289,19 +271,20 @@ hailo_status BufferedChannel::read_from_channel_buffer_cyclic(uint8_t *dest_buff "Can't read {} bytes from channel buffer (channel buffer size {})", read_size, m_channel_buffer->size()); + static const auto SYNC_TO_HOST = HailoRTDriver::DmaSyncDirection::TO_HOST; const auto size_to_end = m_channel_buffer->size() - channel_buffer_read_offset; const auto first_chunk_size = std::min(size_to_end, read_size); const auto first_chunk_addr = static_cast(m_channel_buffer->user_address()) + channel_buffer_read_offset; // Synchronize m_channel_buffer and copy to dest_buffer - auto status = m_channel_buffer->pimpl->synchronize(channel_buffer_read_offset, first_chunk_size); + auto status = m_channel_buffer->synchronize(channel_buffer_read_offset, first_chunk_size, SYNC_TO_HOST); CHECK_SUCCESS(status); std::memcpy(dest_buffer, first_chunk_addr, first_chunk_size); const auto remaining_size = read_size - first_chunk_size; if (remaining_size > 0) { // Synchronize m_channel_buffer and copy remainder to dest_buffer - status = m_channel_buffer->pimpl->synchronize(0, remaining_size); + status = m_channel_buffer->synchronize(0, remaining_size, SYNC_TO_HOST); CHECK_SUCCESS(status); std::memcpy(dest_buffer + first_chunk_size, m_channel_buffer->user_address(), remaining_size); } @@ -431,7 +414,7 @@ hailo_status BufferedChannel::send_pending_buffer() return HAILO_SUCCESS; } -hailo_status BufferedChannel::transfer(std::shared_ptr, const TransferDoneCallback &, void *) +hailo_status BufferedChannel::transfer_async(TransferRequest &&) { return HAILO_NOT_IMPLEMENTED; } @@ -507,9 +490,9 @@ hailo_status BufferedChannel::prepare_descriptors(size_t transfer_size, Interrup assert(desired_desc_num <= MAX_DESCS_COUNT); uint16_t desc_num = static_cast(desired_desc_num); - int num_available = get_num_available(); - int num_processed = CB_TAIL(m_state->m_descs); - int num_free = CB_AVAIL(m_state->m_descs, num_available, num_processed); + const auto num_available = get_num_available(); + const auto num_processed = CB_TAIL(m_state->m_descs); + const auto num_free = CB_AVAIL(m_state->m_descs, num_available, num_processed); if (num_free < desc_num) { return HAILO_OUT_OF_DESCRIPTORS; } @@ -520,15 +503,16 @@ hailo_status BufferedChannel::prepare_descriptors(size_t transfer_size, Interrup first_desc_interrupts_domain); } auto actual_desc_count = m_desc_list->program_last_descriptor(transfer_size, last_desc_interrupts_domain, - num_available, true); + num_available); if (!actual_desc_count) { LOGGER__ERROR("Failed to program desc_list for channel {}", m_channel_id); return actual_desc_count.status(); } - assert (actual_desc_count.value() == desc_num); - int last_desc_avail = ((num_available + desc_num - 1) & m_state->m_descs.size_mask); + assert(actual_desc_count.value() == desc_num); + assert(desc_num > 0); + const auto last_desc_avail = static_cast((num_available + desc_num - 1) & m_state->m_descs.size_mask); - m_state->add_pending_buffer(num_available, last_desc_avail, m_direction, m_transfer_done_callback); + m_state->add_pending_buffer(num_available, last_desc_avail, m_direction); return inc_num_available(desc_num); } diff --git a/hailort/libhailort/src/vdma/channel/buffered_channel.hpp b/hailort/libhailort/src/vdma/channel/buffered_channel.hpp index d46ba7f..ac0d8c4 100644 --- a/hailort/libhailort/src/vdma/channel/buffered_channel.hpp +++ b/hailort/libhailort/src/vdma/channel/buffered_channel.hpp @@ -11,9 +11,9 @@ #ifndef _HAILO_VDMA_BUFFERED_CHANNEL_HPP_ #define _HAILO_VDMA_BUFFERED_CHANNEL_HPP_ -#include "hailo/hailort.h" -#include "hailo/dma_mapped_buffer.hpp" +#include "hailo/hailort.h" +#include "vdma/memory/mapped_buffer.hpp" #include "vdma/channel/boundary_channel.hpp" @@ -38,12 +38,15 @@ public: BufferedChannel &operator=(BufferedChannel &&other) = delete; virtual ~BufferedChannel() = default; - virtual hailo_status transfer(void *buf, size_t count) override; + // Writes/reads from the channel buffer. This function can work even if the channel is not activated (for example - + // reading data if it is ready). + virtual hailo_status transfer_sync(void *buf, size_t count, std::chrono::milliseconds timeout) override; // Either write_buffer + send_pending_buffer or transfer (h2d) should be used on a given channel, not both virtual hailo_status write_buffer(const MemoryView &buffer, std::chrono::milliseconds timeout, const std::function &should_cancel) override; virtual hailo_status send_pending_buffer() override; - virtual hailo_status transfer(std::shared_ptr, const TransferDoneCallback &, void *) override; + // TODO: merge with "transfer_sync(void *buf, size_t count)"? (HRT-10207) + virtual hailo_status transfer_async(TransferRequest &&) override; virtual hailo_status cancel_pending_transfers() override; virtual hailo_status complete_channel_activation(uint32_t transfer_size, bool resume_pending_transfers) override; virtual hailo_status complete_channel_deactivation() override; @@ -57,8 +60,6 @@ public: virtual void notify_all() override; private: - static Expected> create_mapped_buffer(uint32_t descs_count, uint16_t desc_page_size, - Direction direction, HailoRTDriver &driver); hailo_status transfer_h2d(void *buf, size_t count); hailo_status write_buffer_impl(const MemoryView &buffer); @@ -76,9 +77,9 @@ private: // TODO: m_channel_buffer gets bound to ChannelBase::m_desc_list meaning the desc in that list point to dma addrs // that back m_channel_buffer. Because ChannelBase gets dtor'd after BufferedChannel, m_channel_buffer ChannelBase::m_desc_list // will point to a freed buffer. This is ok because the channel objects only get dtor'd after they are deactivated by the fw. - // Might want to enforce this in hailort as well (e.g. desc lists can hold shared_ptrs to DmaMappedBuffer while they are bound). + // Might want to enforce this in hailort as well (e.g. desc lists can hold shared_ptrs to MappedBuffer while they are bound). // (HRT-9110) - std::shared_ptr m_channel_buffer; + std::shared_ptr m_channel_buffer; // Using CircularArray because it won't allocate or free memory wile pushing and popping. The fact that it is circular is not relevant here CircularArray m_pending_buffers_sizes; std::atomic_uint16_t m_pending_num_avail_offset; diff --git a/hailort/libhailort/src/vdma/channel/channel_base.cpp b/hailort/libhailort/src/vdma/channel/channel_base.cpp index 4c233fd..e872e73 100644 --- a/hailort/libhailort/src/vdma/channel/channel_base.cpp +++ b/hailort/libhailort/src/vdma/channel/channel_base.cpp @@ -41,12 +41,6 @@ ChannelBase::ChannelBase(vdma::ChannelId channel_id, Direction direction, HailoR return; } - if (descs_count > MAX_DESCS_COUNT) { - LOGGER__ERROR("Vdma channel descs_count mustn't be larger than {}", MAX_DESCS_COUNT); - status = HAILO_INVALID_ARGUMENT; - return; - } - auto state = VdmaChannelState::create(descs_count, (nullptr != m_latency_meter)); if(!state) { LOGGER__ERROR("Failed to create channel's state"); @@ -55,7 +49,6 @@ ChannelBase::ChannelBase(vdma::ChannelId channel_id, Direction direction, HailoR } m_state = state.release(); - // Allocate descriptor list (host side) status = allocate_descriptor_list(descs_count, desc_page_size); if (HAILO_SUCCESS != status) { LOGGER__ERROR("Failed to allocate Vdma buffer for channel transfer! status={}", status); @@ -134,6 +127,12 @@ uint16_t ChannelBase::get_num_available() return num_available; } +void ChannelBase::set_num_proc_value(uint16_t new_value) +{ + assert(new_value < m_state->m_descs.size); + _CB_SET(m_state->m_descs.tail, new_value); +} + Expected ChannelBase::get_hw_num_processed() { auto hw_num_processed = m_host_registers.get_num_processed(); @@ -153,10 +152,8 @@ ChannelBase::Direction ChannelBase::other_direction(Direction direction) hailo_status ChannelBase::allocate_descriptor_list(uint32_t descs_count, uint16_t desc_page_size) { - auto desc_page_size_value = m_driver.calc_desc_page_size(desc_page_size); - CHECK(is_powerof2(desc_page_size_value), HAILO_INVALID_ARGUMENT, "Descriptor page_size must be a power of two."); - - auto desc_list_exp = DescriptorList::create(descs_count, desc_page_size_value, m_driver); + static const bool CIRCULAR = true; + auto desc_list_exp = DescriptorList::create(descs_count, desc_page_size, CIRCULAR, m_driver); CHECK_EXPECTED_AS_STATUS(desc_list_exp); m_desc_list = make_shared_nothrow(desc_list_exp.release()); diff --git a/hailort/libhailort/src/vdma/channel/channel_base.hpp b/hailort/libhailort/src/vdma/channel/channel_base.hpp index 5f56b81..8ae5342 100644 --- a/hailort/libhailort/src/vdma/channel/channel_base.hpp +++ b/hailort/libhailort/src/vdma/channel/channel_base.hpp @@ -89,6 +89,7 @@ protected: Expected is_aborted(); hailo_status set_num_avail_value(uint16_t new_value); uint16_t get_num_available(); + void set_num_proc_value(uint16_t new_value); Expected get_hw_num_processed(); hailo_status inc_num_available(uint16_t value); static Direction other_direction(const Direction direction); diff --git a/hailort/libhailort/src/vdma/channel/channel_id.hpp b/hailort/libhailort/src/vdma/channel/channel_id.hpp index 2934456..09fb043 100644 --- a/hailort/libhailort/src/vdma/channel/channel_id.hpp +++ b/hailort/libhailort/src/vdma/channel/channel_id.hpp @@ -36,6 +36,12 @@ struct ChannelId { return std::make_pair(a.engine_index, a.channel_index) < std::make_pair(b.engine_index, b.channel_index); } + + // Allow channel Id's to be compared + friend bool operator==(const ChannelId &a, const ChannelId &b) + { + return ((a.channel_index == b.channel_index) && (a.engine_index == b.engine_index)); + } }; } /* namespace vdma */ diff --git a/hailort/libhailort/src/vdma/channel/channel_state.cpp b/hailort/libhailort/src/vdma/channel/channel_state.cpp index 0880f04..2afebb2 100644 --- a/hailort/libhailort/src/vdma/channel/channel_state.cpp +++ b/hailort/libhailort/src/vdma/channel/channel_state.cpp @@ -220,19 +220,19 @@ void VdmaChannelState::reset_previous_state_counters() m_d2h_read_desc_index_abs = 0; } -void VdmaChannelState::add_pending_buffer(uint32_t first_desc, uint32_t last_desc, HailoRTDriver::DmaDirection direction, - const TransferDoneCallback &on_transfer_done, std::shared_ptr buffer, void *opaque) +void VdmaChannelState::add_pending_buffer(uint16_t first_desc, uint16_t last_desc, HailoRTDriver::DmaDirection direction, + const InternalTransferDoneCallback &on_transfer_done, MappedBufferPtr mapped_buffer) { if (m_pending_buffers.full()) { // TODO- HRT-8900 : Fix log and check if should return error LOGGER__ERROR("no avail space"); } + PendingBuffer pending_buffer{}; pending_buffer.last_desc = last_desc; pending_buffer.latency_measure_desc = (direction == HailoRTDriver::DmaDirection::H2D) ? first_desc : last_desc; pending_buffer.on_transfer_done = on_transfer_done; - pending_buffer.buffer = buffer; - pending_buffer.opaque = opaque; + pending_buffer.mapped_buffer = mapped_buffer; m_pending_buffers.push_back(std::move(pending_buffer)); } diff --git a/hailort/libhailort/src/vdma/channel/channel_state.hpp b/hailort/libhailort/src/vdma/channel/channel_state.hpp index ece1e27..5bc964e 100644 --- a/hailort/libhailort/src/vdma/channel/channel_state.hpp +++ b/hailort/libhailort/src/vdma/channel/channel_state.hpp @@ -15,8 +15,8 @@ #include "hailo/hailort.h" #include "os/hailort_driver.hpp" #include "common/circular_buffer.hpp" -#include "hailo/dma_mapped_buffer.hpp" -#include "hailo/stream.hpp" +#include "vdma/memory/mapped_buffer.hpp" +#include "stream_common/async_common.hpp" #include #include @@ -30,13 +30,17 @@ namespace hailort { namespace vdma { struct PendingBuffer { - uint32_t last_desc; - uint32_t latency_measure_desc; - TransferDoneCallback on_transfer_done; - std::shared_ptr buffer; - void *opaque; + uint16_t last_desc; + uint16_t latency_measure_desc; + InternalTransferDoneCallback on_transfer_done; + MappedBufferPtr mapped_buffer; }; +// We use std::array for PendingBuffersQueue to avoid dynamic allocations allocations. We are doing it for two reasons: +// 1. It relies on memory shared between process (so we can't have dynamic allocation). +// 2. We put it on interrupt handler stack - we want to avoid allocations. +using PendingBuffersQueue = CircularArray>; + class ChannelBase; class BoundaryChannel; class AsyncChannel; @@ -91,6 +95,7 @@ using RecursiveSharedMutex = std::recursive_mutex; using SharedConditionVariable = std::condition_variable_any; #endif + class VdmaChannelState final { public: @@ -101,16 +106,19 @@ public: VdmaChannelState(VdmaChannelState &&other) = delete; ~VdmaChannelState() = default; + static void empty_transfer_done_callback(hailo_status){} + void reset_counters(); void reset_previous_state_counters(); // Each transfer on the channel is logged by a PendingBuffer: // - first_desc/last_desc - first and last descriptors of the transfer // - direction - transfer's direction // - on_transfer_done - callback to be called once the transfer is complete (i.e. when an interrupt is received on last_desc) - // - buffer - points to the vdma mapped buffer being transferred (may be null) - // - opaque - context to be transferred to the callback (may be null) - void add_pending_buffer(uint32_t first_desc, uint32_t last_desc, HailoRTDriver::DmaDirection direction, - const TransferDoneCallback &on_transfer_done, std::shared_ptr buffer = nullptr, void *opaque = nullptr); + // - context - transfer context + // - mapped_buffer - buffer's dma mapping (may be null) + void add_pending_buffer(uint16_t first_desc, uint16_t last_desc, HailoRTDriver::DmaDirection direction, + const InternalTransferDoneCallback &on_transfer_done = empty_transfer_done_callback, + MappedBufferPtr mapped_buffer = nullptr); RecursiveSharedMutex &mutex() { @@ -152,8 +160,7 @@ private: bool m_is_channel_activated; - // On pending buffer with must use std::array because it relays on the shared memory (and std::vector uses new malloc) - CircularArray> m_pending_buffers; + PendingBuffersQueue m_pending_buffers; // TODO: describe why we must have our own num_available and num_proc. // it's not just for efficiency but its critical to avoid a potential bug - see Avigail email. // TODO: Consider C11 stdatomic diff --git a/hailort/libhailort/src/vdma/channel/interrupts_dispatcher.cpp b/hailort/libhailort/src/vdma/channel/interrupts_dispatcher.cpp index 99ad909..a59699f 100644 --- a/hailort/libhailort/src/vdma/channel/interrupts_dispatcher.cpp +++ b/hailort/libhailort/src/vdma/channel/interrupts_dispatcher.cpp @@ -23,84 +23,110 @@ Expected> InterruptsDispatcher::create(std InterruptsDispatcher::InterruptsDispatcher(std::reference_wrapper driver) : m_driver(driver), - m_is_running(false), - m_channels_bitmap() + m_interrupts_thread([this] { wait_interrupts(); }) {} InterruptsDispatcher::~InterruptsDispatcher() { - if (m_is_running) { - stop(); + if (m_wait_context != nullptr) { + auto status = stop(); + if (status != HAILO_SUCCESS) { + LOGGER__ERROR("Failed stopping interrupts dispatcher on destructor"); + } + } + + if (m_interrupts_thread.joinable()) { + signal_thread_quit(); + m_interrupts_thread.join(); } } hailo_status InterruptsDispatcher::start(const ChannelsBitmap &channels_bitmap, bool enable_timestamp_measure, const ProcessIrqCallback &process_irq) { - CHECK(!m_is_running, HAILO_INVALID_OPERATION, "Interrupt thread already running"); - assert(m_channel_threads.empty()); - assert(m_channels_bitmap == ChannelsBitmap{}); + { + std::unique_lock lock(m_mutex); + CHECK(m_wait_context == nullptr, HAILO_INVALID_OPERATION, "Interrupt thread already running"); - m_channels_bitmap = channels_bitmap; + auto wait_context = make_unique_nothrow(WaitContext{channels_bitmap, process_irq}); + CHECK_NOT_NULL(wait_context, HAILO_OUT_OF_HOST_MEMORY); + m_wait_context = std::move(wait_context); - auto status = m_driver.get().vdma_interrupts_enable(m_channels_bitmap, enable_timestamp_measure); - CHECK_SUCCESS(status, "Failed to enable vdma interrupts"); - - // Setting m_is_running will allow the threads to run - m_is_running = true; - m_channel_threads.emplace_back([this, process_irq]() { - // m_channels_bitmap may be changed by InterruptsDispatcher::stop. To avoid wait for 0 channels, - // we use copy of m_channels_bitmap. - ChannelsBitmap channels_bitmap_local = m_channels_bitmap; - wait_interrupts(channels_bitmap_local, process_irq); - }); + auto status = m_driver.get().vdma_interrupts_enable(m_wait_context->bitmap, enable_timestamp_measure); + CHECK_SUCCESS(status, "Failed to enable vdma interrupts"); + } + m_cond.notify_one(); return HAILO_SUCCESS; } hailo_status InterruptsDispatcher::stop() { - CHECK(m_is_running, HAILO_INVALID_OPERATION, "Interrupts thread not started"); - assert(!m_channel_threads.empty()); - assert(m_channels_bitmap != ChannelsBitmap{}); + std::unique_lock lock(m_mutex); + CHECK(m_wait_context != nullptr, HAILO_INVALID_OPERATION, "Interrupt thread not running"); - // Signal threads to stop execution - m_is_running = false; + // Nullify wait context so the thread will pause + const auto bitmap = m_wait_context->bitmap; + m_wait_context = nullptr; // Calling disable interrupts will cause the vdma_interrupts_wait to return. - auto status = m_driver.get().vdma_interrupts_disable(m_channels_bitmap); + auto status = m_driver.get().vdma_interrupts_disable(bitmap); CHECK_SUCCESS(status, "Failed to disable vdma interrupts"); - m_channels_bitmap = ChannelsBitmap{}; - for (auto &thread : m_channel_threads) { - if (thread.joinable()) { - thread.join(); - } - } - m_channel_threads.clear(); + // Needs to make sure that the interrupts thread is disabled. + // The wait is needed because otherwise, on a fast stop() and start(), the next start() may accept + // interrupts from previous run. + m_cond.wait(lock, [&]{ return m_thread_state == ThreadState::not_active; }); return HAILO_SUCCESS; } -void InterruptsDispatcher::wait_interrupts(const ChannelsBitmap &channels_bitmap, const ProcessIrqCallback &process_irq) +void InterruptsDispatcher::wait_interrupts() { OsUtils::set_current_thread_name("CHANNEL_INTR"); - while (m_is_running) { + + std::unique_lock lock(m_mutex); + while (true) { + + m_thread_state = ThreadState::not_active; + m_cond.notify_one(); // Wake up stop() + + m_cond.wait(lock, [&]{ return m_should_quit || (m_wait_context != nullptr); }); + if (m_should_quit) { + break; + } + + m_thread_state = ThreadState::active; + auto wait_context = *m_wait_context; + // vdma_interrupts_wait is a blocking function that returns in this scenarios: // 1. We got a new interrupts, irq_data will be passed to the process_irq callback // 2. vdma_interrupts_disable will be called, vdma_interrupts_wait will return with an empty list. // 3. Other error returns - shouldn't really happen, we exit the interrupt thread. - auto irq_data = m_driver.get().vdma_interrupts_wait(channels_bitmap); + lock.unlock(); + auto irq_data = m_driver.get().vdma_interrupts_wait(wait_context.bitmap); + lock.lock(); + if (!irq_data.has_value()) { LOGGER__ERROR("Interrupt thread exit with {}", irq_data.status()); break; } if (irq_data->channels_count > 0) { - process_irq(irq_data.release()); + wait_context.process_irq(irq_data.release()); } } } +void InterruptsDispatcher::signal_thread_quit() +{ + { + std::unique_lock lock(m_mutex); + assert(m_thread_state == ThreadState::not_active); + m_should_quit = true; + } + m_cond.notify_one(); +} + } /* namespace vdma */ } /* namespace hailort */ diff --git a/hailort/libhailort/src/vdma/channel/interrupts_dispatcher.hpp b/hailort/libhailort/src/vdma/channel/interrupts_dispatcher.hpp index c02f428..b039e41 100644 --- a/hailort/libhailort/src/vdma/channel/interrupts_dispatcher.hpp +++ b/hailort/libhailort/src/vdma/channel/interrupts_dispatcher.hpp @@ -13,11 +13,11 @@ #include "os/hailort_driver.hpp" #include #include +#include namespace hailort { namespace vdma { - /// When needed, creates thread (or threads) that waits for interrupts on all channels. class InterruptsDispatcher final { public: @@ -33,19 +33,40 @@ public: InterruptsDispatcher(InterruptsDispatcher &&) = delete; InterruptsDispatcher &operator=(InterruptsDispatcher &&) = delete; - // TODO: HRT-9590 remove interrupt_thread_per_channel, use it by default hailo_status start(const ChannelsBitmap &channels_bitmap, bool enable_timestamp_measure, const ProcessIrqCallback &process_irq); hailo_status stop(); private: - void wait_interrupts(const ChannelsBitmap &channels_bitmap, const ProcessIrqCallback &process_irq); + void wait_interrupts(); + void signal_thread_quit(); + + struct WaitContext { + ChannelsBitmap bitmap; + ProcessIrqCallback process_irq; + }; + + enum class ThreadState { + // The interrupts thread is actually waiting for interrupts + active, + + // The interrupts thread is done waiting for interrupts, it is waiting to be active. + not_active, + }; + + std::mutex m_mutex; + std::condition_variable m_cond; const std::reference_wrapper m_driver; - std::atomic m_is_running; - ChannelsBitmap m_channels_bitmap; - std::vector m_channel_threads; + + ThreadState m_thread_state = ThreadState::not_active; + // When m_wait_context is not nullptr, the thread should start waiting for interrupts. + std::unique_ptr m_wait_context; + + // m_should_quit is used to quit the thread (called on destruction) + bool m_should_quit = false; + std::thread m_interrupts_thread; }; } /* namespace vdma */ diff --git a/hailort/libhailort/src/vdma/integrated/integrated_device.cpp b/hailort/libhailort/src/vdma/integrated/integrated_device.cpp index b6406d5..c574d3d 100644 --- a/hailort/libhailort/src/vdma/integrated/integrated_device.cpp +++ b/hailort/libhailort/src/vdma/integrated/integrated_device.cpp @@ -28,19 +28,19 @@ Expected> IntegratedDevice::create() { hailo_status status = HAILO_UNINITIALIZED; - auto driver = HailoRTDriver::create(INTEGRATED_NNC_DRIVER_PATH); + const HailoRTDriver::DeviceInfo device_info {INTEGRATED_NNC_DRIVER_PATH, DEVICE_ID}; + auto driver = HailoRTDriver::create(device_info); CHECK_EXPECTED(driver, "Failed to initialize HailoRTDriver"); - auto device = std::unique_ptr(new (std::nothrow) IntegratedDevice(driver.release(), status, DEVICE_ID)); + auto device = std::unique_ptr(new (std::nothrow) IntegratedDevice(driver.release(), status)); CHECK_AS_EXPECTED((nullptr != device), HAILO_OUT_OF_HOST_MEMORY); CHECK_SUCCESS_AS_EXPECTED(status, "Failed creating IntegratedDevice"); return device; } - -IntegratedDevice::IntegratedDevice(HailoRTDriver &&driver, hailo_status &status, const std::string &device_id) : - VdmaDevice::VdmaDevice(std::move(driver), Device::Type::INTEGRATED, device_id) +IntegratedDevice::IntegratedDevice(HailoRTDriver &&driver, hailo_status &status) : + VdmaDevice::VdmaDevice(std::move(driver), Device::Type::INTEGRATED) { status = update_fw_state(); if (HAILO_SUCCESS != status) { @@ -51,10 +51,6 @@ IntegratedDevice::IntegratedDevice(HailoRTDriver &&driver, hailo_status &status, status = HAILO_SUCCESS; } -Expected IntegratedDevice::get_architecture() const { - return Expected(m_device_architecture); -} - hailo_status IntegratedDevice::reset_impl(CONTROL_PROTOCOL__reset_type_t reset_type) { if (CONTROL_PROTOCOL__RESET_TYPE__NN_CORE == reset_type) { diff --git a/hailort/libhailort/src/vdma/integrated/integrated_device.hpp b/hailort/libhailort/src/vdma/integrated/integrated_device.hpp index 5bb07fe..856994b 100644 --- a/hailort/libhailort/src/vdma/integrated/integrated_device.hpp +++ b/hailort/libhailort/src/vdma/integrated/integrated_device.hpp @@ -24,12 +24,11 @@ namespace hailort class IntegratedDevice : public VdmaDevice { public: - virtual ~IntegratedDevice() = default; static bool is_loaded(); static Expected> create(); - virtual Expected get_architecture() const override; - virtual const char* get_dev_id() const override {return DEVICE_ID;} + virtual ~IntegratedDevice() = default; + Expected read_log(MemoryView &buffer, hailo_cpu_id_t cpu_id); virtual bool is_stream_interface_supported(const hailo_stream_interface_t &stream_interface) const override @@ -53,7 +52,7 @@ protected: virtual hailo_status reset_impl(CONTROL_PROTOCOL__reset_type_t reset_type) override; private: - IntegratedDevice(HailoRTDriver &&driver, hailo_status &status, const std::string &device_id); + IntegratedDevice(HailoRTDriver &&driver, hailo_status &status); }; diff --git a/hailort/libhailort/src/vdma/memory/buffer_requirements.cpp b/hailort/libhailort/src/vdma/memory/buffer_requirements.cpp new file mode 100644 index 0000000..c48ecb3 --- /dev/null +++ b/hailort/libhailort/src/vdma/memory/buffer_requirements.cpp @@ -0,0 +1,146 @@ +/** + * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved. + * Distributed under the MIT license (https://opensource.org/licenses/MIT) +**/ +/** + * @file buffer_requirements.cpp + **/ + +#include "buffer_requirements.hpp" +#include "vdma/memory/descriptor_list.hpp" +#include "utils.h" + +namespace hailort { +namespace vdma { + +// Minimum size of ccb buffers in descriptors, taken from the CCB spec. +static constexpr uint32_t MIN_CCB_DESCRIPTORS_COUNT = 16; + +Expected BufferSizesRequirements::get_sg_buffer_requirements_single_transfer( + uint16_t max_desc_page_size, uint16_t min_batch_size, uint16_t max_batch_size, uint32_t transfer_size, + bool is_circular, const bool force_default_page_size) +{ + // First, get the result for the min size + auto results = get_sg_buffer_requirements_multiple_transfers(max_desc_page_size, min_batch_size, + {transfer_size}, is_circular, force_default_page_size); + CHECK_EXPECTED(results); + + // In order to fetch all descriptors, the amount of active descs is lower by one that the amount + // of descs given (Otherwise we won't be able to determine if the buffer is empty or full). + // Therefore we add 1 in order to compensate. + const uint32_t descs_per_transfer = DIV_ROUND_UP(transfer_size, results->desc_page_size()); + uint32_t descs_count = std::min((descs_per_transfer * max_batch_size) + 1, MAX_DESCS_COUNT); + if (is_circular) { + descs_count = get_nearest_powerof_2(descs_count, MIN_DESCS_COUNT); + } + + return BufferSizesRequirements{ descs_count, results->desc_page_size() }; +} + +Expected BufferSizesRequirements::get_sg_buffer_requirements_multiple_transfers( + uint16_t max_desc_page_size, uint16_t batch_size, const std::vector &transfer_sizes, + bool is_circular, const bool force_default_page_size) +{ + const uint16_t initial_desc_page_size = force_default_page_size ? + DEFAULT_DESC_PAGE_SIZE : find_initial_desc_page_size(transfer_sizes); + + CHECK_AS_EXPECTED(max_desc_page_size <= MAX_DESC_PAGE_SIZE, HAILO_INTERNAL_FAILURE, + "max_desc_page_size given {} is bigger than hw max desc page size {}", + max_desc_page_size, MAX_DESC_PAGE_SIZE); + CHECK_AS_EXPECTED(MIN_DESC_PAGE_SIZE <= max_desc_page_size, HAILO_INTERNAL_FAILURE, + "max_desc_page_size given {} is lower that hw min desc page size {}", + max_desc_page_size, MIN_DESC_PAGE_SIZE); + + const uint16_t min_desc_page_size = MIN_DESC_PAGE_SIZE; + CHECK_AS_EXPECTED(initial_desc_page_size <= max_desc_page_size, HAILO_INTERNAL_FAILURE, + "Initial descriptor page size ({}) is larger than maximum descriptor page size ({})", + initial_desc_page_size, max_desc_page_size); + CHECK_AS_EXPECTED(initial_desc_page_size >= min_desc_page_size, HAILO_INTERNAL_FAILURE, + "Initial descriptor page size ({}) is smaller than minimum descriptor page size ({})", + initial_desc_page_size, min_desc_page_size); + + // Defined as uint32_t to prevent overflow (as we multiply it by two in each iteration of the while loop bellow) + uint32_t local_desc_page_size = initial_desc_page_size; + + uint32_t descs_count = get_required_descriptor_count(transfer_sizes, initial_desc_page_size); + // Too many descriptors; try a larger desc_page_size which will lead to less descriptors used + while ((descs_count * batch_size) > (MAX_DESCS_COUNT - 1)) { + CHECK_AS_EXPECTED(IS_FIT_IN_UINT16(local_desc_page_size << 1), HAILO_INTERNAL_FAILURE, + "Descriptor page size needs to fit in 16B"); + local_desc_page_size = static_cast(local_desc_page_size << 1); + + CHECK_AS_EXPECTED(local_desc_page_size <= max_desc_page_size, HAILO_OUT_OF_DESCRIPTORS, + "Network shapes and batch size exceeds driver descriptors capabilities. " + "Required descriptors count: {}, max allowed on the driver: {}. " + "(A common cause for this error could be the batch size - which is {}).", + (batch_size * descs_count), (MAX_DESCS_COUNT - 1), batch_size); + + descs_count = get_required_descriptor_count(transfer_sizes, static_cast(local_desc_page_size)); + } + + // Found desc_page_size and descs_count + const auto desc_page_size = static_cast(local_desc_page_size); + if (initial_desc_page_size != desc_page_size) { + LOGGER__WARNING("Desc page size value ({}) is not optimal for performance.", desc_page_size); + } + + if (is_circular) { + // The length of a descriptor list is always a power of 2. Therefore, on circular buffers the hw will have to + // access all descriptors. + descs_count = get_nearest_powerof_2(descs_count, MIN_DESCS_COUNT); + CHECK_AS_EXPECTED(descs_count <= MAX_DESCS_COUNT, HAILO_OUT_OF_DESCRIPTORS); + } + + return BufferSizesRequirements{descs_count, desc_page_size}; +} + +Expected BufferSizesRequirements::get_ccb_buffer_requirements_single_transfer(uint16_t batch_size, + uint32_t transfer_size, bool is_circular) +{ + const uint16_t desc_page_size = DEFAULT_DESC_PAGE_SIZE; + const auto desc_per_transfer = DIV_ROUND_UP(transfer_size, desc_page_size); + auto descs_count = desc_per_transfer * batch_size; + descs_count = std::max(descs_count, MIN_CCB_DESCRIPTORS_COUNT); + if (is_circular) { + // The first 12 channels in D2H CCB ("regular channels") requires that the amount of descriptors will be a power + // of 2. + // We can optimize it by checking that channel index is one of the last 4 channels ("enhanced channels"), or + // even allocate those indexes. + // Meanwhile however, we always use power of 2 + descs_count = get_nearest_powerof_2(descs_count, MIN_CCB_DESCRIPTORS_COUNT); + } + + return BufferSizesRequirements{descs_count, desc_page_size}; +} + + +uint16_t BufferSizesRequirements::find_initial_desc_page_size(const std::vector &transfer_sizes) +{ + const auto max_transfer_size = *std::max_element(transfer_sizes.begin(), transfer_sizes.end()); + // Note: If the pages pointed to by the descriptors are copied in their entirety, then DEFAULT_DESC_PAGE_SIZE + // is the optimal value. For transfer_sizes smaller than DEFAULT_DESC_PAGE_SIZE using smaller descriptor page + // sizes will save memory consuption without harming performance. In the case of nms for example, only one bbox + // is copied from each page. Hence, we'll use MIN_DESC_PAGE_SIZE for nms. + const uint16_t initial_desc_page_size = (DEFAULT_DESC_PAGE_SIZE > max_transfer_size) ? + static_cast(get_nearest_powerof_2(max_transfer_size, MIN_DESC_PAGE_SIZE)) : + DEFAULT_DESC_PAGE_SIZE; + if (DEFAULT_DESC_PAGE_SIZE != initial_desc_page_size) { + LOGGER__INFO("Using non-default initial_desc_page_size of {}, due to a small transfer size ({})", + initial_desc_page_size, max_transfer_size); + } + return initial_desc_page_size; +} + +uint32_t BufferSizesRequirements::get_required_descriptor_count(const std::vector &transfer_sizes, + uint16_t desc_page_size) +{ + uint32_t desc_count = 0; + for (auto &transfer_size : transfer_sizes) { + desc_count += DIV_ROUND_UP(transfer_size, desc_page_size); + } + // One extra descriptor is needed, because the amount of available descriptors is (desc_count - 1) + return desc_count + 1; +} + +} /* namespace vdma */ +} /* namespace hailort */ diff --git a/hailort/libhailort/src/vdma/memory/buffer_requirements.hpp b/hailort/libhailort/src/vdma/memory/buffer_requirements.hpp new file mode 100644 index 0000000..03568f8 --- /dev/null +++ b/hailort/libhailort/src/vdma/memory/buffer_requirements.hpp @@ -0,0 +1,59 @@ +/** + * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved. + * Distributed under the MIT license (https://opensource.org/licenses/MIT) +**/ +/** + * @file buffer_requirements.hpp + * @brief Calculate all vdma buffer size requirements, including actual size, amount of descriptors and the actual desc + * count. + **/ + +#ifndef _HAILO_BUFFER_REQUIREMENTS_HPP_ +#define _HAILO_BUFFER_REQUIREMENTS_HPP_ + +#include "hailo/expected.hpp" + +#include +#include +#include + + +namespace hailort { +namespace vdma { + +class BufferSizesRequirements final { +public: + BufferSizesRequirements(uint32_t descs_count, uint16_t desc_page_size) : + m_descs_count(descs_count), + m_desc_page_size(desc_page_size) + { + assert(m_descs_count > 0); + assert(m_desc_page_size > 0); + } + + uint32_t descs_count() const { return m_descs_count; } + uint16_t desc_page_size() const { return m_desc_page_size; } + uint32_t buffer_size() const { return m_descs_count * m_desc_page_size; } + + static Expected get_sg_buffer_requirements_single_transfer(uint16_t max_desc_page_size, + uint16_t min_batch_size, uint16_t max_batch_size, uint32_t transfer_size, bool is_circular, + const bool force_default_page_size); + static Expected get_sg_buffer_requirements_multiple_transfers(uint16_t max_desc_page_size, + uint16_t batch_size, const std::vector &transfer_sizes, bool is_circular, + const bool force_default_page_size); + + static Expected get_ccb_buffer_requirements_single_transfer(uint16_t batch_size, + uint32_t transfer_size, bool is_circular); + +private: + static uint16_t find_initial_desc_page_size(const std::vector &transfer_sizes); + static uint32_t get_required_descriptor_count(const std::vector &transfer_sizes, uint16_t desc_page_size); + + const uint32_t m_descs_count; + const uint16_t m_desc_page_size; +}; + +} /* namespace vdma */ +} /* namespace hailort */ + +#endif /* _HAILO_BUFFER_REQUIREMENTS_HPP_ */ diff --git a/hailort/libhailort/src/vdma/memory/continuous_buffer.cpp b/hailort/libhailort/src/vdma/memory/continuous_buffer.cpp index bff2809..beac646 100644 --- a/hailort/libhailort/src/vdma/memory/continuous_buffer.cpp +++ b/hailort/libhailort/src/vdma/memory/continuous_buffer.cpp @@ -12,20 +12,16 @@ namespace hailort { namespace vdma { -// Minimum size of ccb buffers in descriptors, taken from the CCB spec. -#define MIN_CCB_DESCRIPTORS_COUNT (16) - -static uint32_t align(uint32_t size, uint32_t align) -{ - assert(is_powerof2(align)); - const uint32_t mask = align - 1; - return (size + mask) & ~mask; -} - Expected ContinuousBuffer::create(size_t size, HailoRTDriver &driver) { auto result = driver.vdma_continuous_buffer_alloc(size); - CHECK_EXPECTED(result, "Failed allocating continuous buffer, size {}", size); + /* Don't print error here since this might be expected error that the libhailoRT can recover from + (out of host memory). If it's not the case, there is a print in hailort_driver.cpp file */ + if (HAILO_OUT_OF_HOST_CMA_MEMORY == result.status()) { + return make_unexpected(result.status()); + } else { + CHECK_EXPECTED(result); + } uintptr_t handle = 0; uint64_t dma_address = 0; @@ -41,23 +37,6 @@ Expected ContinuousBuffer::create(size_t size, HailoRTDriver & return ContinuousBuffer(size, driver, handle, dma_address, mmap.release()); } -uint32_t ContinuousBuffer::get_buffer_size(uint32_t buffer_size) -{ - const uint16_t page_size = DEFAULT_DESC_PAGE_SIZE; - const auto aligned_buffer_size = align(buffer_size, page_size); - - const uint32_t min_buffer_size = page_size * MIN_CCB_DESCRIPTORS_COUNT; - return std::max(aligned_buffer_size, min_buffer_size); -} - -uint32_t ContinuousBuffer::get_buffer_size_desc_power2(uint32_t buffer_size) -{ - const uint16_t page_size = DEFAULT_DESC_PAGE_SIZE; - const auto descriptors_in_buffer = DIV_ROUND_UP(buffer_size, page_size); - const auto actual_descriptors_count = get_nearest_powerof_2(descriptors_in_buffer, MIN_CCB_DESCRIPTORS_COUNT); - return actual_descriptors_count * page_size; -} - ContinuousBuffer::~ContinuousBuffer() { if (0 != m_handle) { @@ -96,7 +75,7 @@ uint32_t ContinuousBuffer::descs_count() const return descriptors_in_buffer(m_size); } -hailo_status ContinuousBuffer::read(void *buf_dst, size_t count, size_t offset, bool /* should_sync */) +hailo_status ContinuousBuffer::read(void *buf_dst, size_t count, size_t offset) { CHECK((count + offset) <= m_size, HAILO_INSUFFICIENT_BUFFER, "Requested size {} from offset {} is more than the buffer size {}", count, offset, m_size); @@ -117,11 +96,10 @@ hailo_status ContinuousBuffer::write(const void *buf_src, size_t count, size_t o } Expected ContinuousBuffer::program_descriptors(size_t transfer_size, InterruptsDomain last_desc_interrupts_domain, - size_t desc_offset, bool is_circular) + size_t desc_offset) { (void)last_desc_interrupts_domain; (void)desc_offset; - (void)is_circular; // The descriptors in continuous mode are programmed by the hw, nothing to do here. return descriptors_in_buffer(transfer_size); diff --git a/hailort/libhailort/src/vdma/memory/continuous_buffer.hpp b/hailort/libhailort/src/vdma/memory/continuous_buffer.hpp index 58afefb..57b3ed5 100644 --- a/hailort/libhailort/src/vdma/memory/continuous_buffer.hpp +++ b/hailort/libhailort/src/vdma/memory/continuous_buffer.hpp @@ -22,10 +22,6 @@ class ContinuousBuffer final : public VdmaBuffer { public: static Expected create(size_t size, HailoRTDriver &driver); - static uint32_t get_buffer_size(uint32_t buffer_size); - // Get buffer size with the requirment that the amount of descriptors is a power of 2. - static uint32_t get_buffer_size_desc_power2(uint32_t buffer_size); - ContinuousBuffer(const ContinuousBuffer &) = delete; ContinuousBuffer& operator=(const ContinuousBuffer &) = delete; ContinuousBuffer& operator=(ContinuousBuffer &&) = delete; @@ -51,11 +47,11 @@ public: virtual uint16_t desc_page_size() const override; virtual uint32_t descs_count() const override; - virtual hailo_status read(void *buf_dst, size_t count, size_t offset, bool should_sync) override; + virtual hailo_status read(void *buf_dst, size_t count, size_t offset) override; virtual hailo_status write(const void *buf_src, size_t count, size_t offset) override; virtual Expected program_descriptors(size_t transfer_size, InterruptsDomain last_desc_interrupts_domain, - size_t desc_offset, bool is_circular) override; + size_t desc_offset) override; virtual hailo_status reprogram_device_interrupts_for_end_of_batch(size_t transfer_size, uint16_t batch_size, InterruptsDomain new_interrupts_domain) override; diff --git a/hailort/libhailort/src/vdma/memory/descriptor_list.cpp b/hailort/libhailort/src/vdma/memory/descriptor_list.cpp index 995a27c..a264098 100644 --- a/hailort/libhailort/src/vdma/memory/descriptor_list.cpp +++ b/hailort/libhailort/src/vdma/memory/descriptor_list.cpp @@ -8,7 +8,6 @@ **/ #include "vdma/memory/descriptor_list.hpp" -#include "vdma/memory/mapped_buffer_impl.hpp" #include "utils.h" @@ -32,117 +31,90 @@ namespace hailort { namespace vdma { -Expected DescriptorList::create(uint32_t desc_count, uint16_t requested_desc_page_size, +Expected DescriptorList::create(uint32_t desc_count, uint16_t desc_page_size, bool is_circular, HailoRTDriver &driver) { hailo_status status = HAILO_UNINITIALIZED; - auto desc_page_size_value = driver.calc_desc_page_size(requested_desc_page_size); - DescriptorList object(desc_count, driver, desc_page_size_value, status); + assert(desc_page_size <= driver.desc_max_page_size()); + + CHECK_AS_EXPECTED(desc_count <= MAX_DESCS_COUNT, HAILO_INVALID_ARGUMENT, + "descs_count {} must be smaller/equal to {}", desc_count, MAX_DESCS_COUNT); + + DescriptorList object(desc_count, desc_page_size, is_circular, driver, status); if (HAILO_SUCCESS != status) { return make_unexpected(status); } - // No need to initialize descripotrs here because they are initialized in driver in hailo_vdma_program_descriptor() + // No need to initialize descriptors here because they are initialized in driver in hailo_vdma_program_descriptor() return object; } -DescriptorList::DescriptorList(uint32_t desc_count, HailoRTDriver &driver, uint16_t desc_page_size, - hailo_status &status) : - m_mapped_list(), - m_count(desc_count), - m_depth(0), - m_desc_handle(0), - m_dma_address(0), +DescriptorList::DescriptorList(uint32_t desc_count, uint16_t desc_page_size, bool is_circular, HailoRTDriver &driver, + hailo_status &status) : + m_desc_list_info(), + m_is_circular(is_circular), m_driver(driver), m_desc_page_size(desc_page_size) { - if (!is_powerof2(desc_count)) { - LOGGER__ERROR("Descriptor count ({}) must be power of 2", desc_count); + if (m_is_circular && !is_powerof2(desc_count)) { + LOGGER__ERROR("Descriptor count ({}) for circular descriptor list must be power of 2", desc_count); status = HAILO_INVALID_ARGUMENT; return; } - auto depth = calculate_desc_list_depth(desc_count); - if (!depth) { - status = depth.status(); - return; - } - m_depth = depth.value(); - - auto desc_handle_phys_addr_pair = m_driver.descriptors_list_create(desc_count); - if (!desc_handle_phys_addr_pair) { - status = desc_handle_phys_addr_pair.status(); + auto desc_list_info = m_driver.descriptors_list_create(desc_count, m_is_circular); + if (!desc_list_info) { + status = desc_list_info.status(); return; } - m_desc_handle = desc_handle_phys_addr_pair->first; - m_dma_address = desc_handle_phys_addr_pair->second; - - auto mapped_list = MmapBuffer::create_file_map(desc_count * sizeof(VdmaDescriptor), m_driver.fd(), m_desc_handle); - if (!mapped_list) { - LOGGER__ERROR("Failed to memory map descriptors. desc handle: {:X}", m_desc_handle); - status = mapped_list.status(); - return; - } + m_desc_list_info = desc_list_info.release(); - m_mapped_list = mapped_list.release(); status = HAILO_SUCCESS; } DescriptorList::~DescriptorList() { - if (HAILO_SUCCESS != m_mapped_list.unmap()) { - LOGGER__ERROR("Failed to release descriptors mapping"); - } - - // Note: The descriptors_list is freed by the desc_handle (no need to use the phys_address to free) - if (0 != m_desc_handle) { - if(HAILO_SUCCESS != m_driver.descriptors_list_release(m_desc_handle)) { - LOGGER__ERROR("Failed to release descriptor list {}", m_desc_handle); + if (0 != m_desc_list_info.handle) { + auto status = m_driver.descriptors_list_release(m_desc_list_info); + if(HAILO_SUCCESS != status) { + LOGGER__ERROR("Failed to release descriptor list {} with status {}", m_desc_list_info.handle, status); } } } -DescriptorList::DescriptorList(DescriptorList &&other) noexcept : - m_mapped_list(std::move(other.m_mapped_list)), - m_count(std::move(other.m_count)), - m_depth(std::move(other.m_depth)), - m_desc_handle(std::exchange(other.m_desc_handle, 0)), - m_dma_address(std::exchange(other.m_dma_address, 0)), +DescriptorList::DescriptorList(DescriptorList &&other) noexcept : + m_desc_list_info(), + m_is_circular(std::move(other.m_is_circular)), m_driver(other.m_driver), - m_desc_page_size(other.m_desc_page_size) {} - -Expected DescriptorList::calculate_desc_list_depth(size_t count) + m_desc_page_size(other.m_desc_page_size) { - // Calculate log2 of m_count (by finding the offset of the MSB) - uint32_t depth = 0; - while (count >>= 1) { - ++depth; - } - CHECK_AS_EXPECTED(IS_FIT_IN_UINT8(depth), HAILO_INTERNAL_FAILURE, "Calculated desc_list_depth is too big: {}", depth); - return static_cast(depth); + m_desc_list_info.handle = std::exchange(other.m_desc_list_info.handle, 0); + m_desc_list_info.dma_address = std::exchange(other.m_desc_list_info.dma_address, 0); + m_desc_list_info.desc_count = std::move(other.m_desc_list_info.desc_count); + m_desc_list_info.user_address = std::exchange(other.m_desc_list_info.user_address, nullptr); } -hailo_status DescriptorList::configure_to_use_buffer(DmaMappedBuffer& buffer, ChannelId channel_id, uint32_t starting_desc) +hailo_status DescriptorList::configure_to_use_buffer(MappedBuffer& buffer, ChannelId channel_id, uint32_t starting_desc) { - const auto desc_list_capacity = m_desc_page_size * m_count; + const auto desc_list_capacity = m_desc_page_size * count(); CHECK(buffer.size() <= desc_list_capacity, HAILO_INVALID_ARGUMENT, "Can't bind a buffer larger than the descriptor list's capacity. Buffer size {}, descriptor list capacity {}", buffer.size(), desc_list_capacity); - return m_driver.descriptors_list_bind_vdma_buffer(m_desc_handle, buffer.pimpl->handle(), m_desc_page_size, + return m_driver.descriptors_list_bind_vdma_buffer(m_desc_list_info.handle, buffer.handle(), m_desc_page_size, channel_id.channel_index, starting_desc); } Expected DescriptorList::program_last_descriptor(size_t transfer_size, - InterruptsDomain last_desc_interrupts_domain, size_t desc_offset, bool is_circular) + InterruptsDomain last_desc_interrupts_domain, size_t desc_offset) { assert(transfer_size > 0); const auto required_descriptors = descriptors_in_buffer(transfer_size); // Required_descriptors + desc_offset can't reach m_count. - if ((!is_circular) && ((required_descriptors + desc_offset) > m_count)){ - LOGGER__ERROR("Requested transfer size ({}) result in more descriptors than available ({})", transfer_size, m_count); + if ((!m_is_circular) && ((required_descriptors + desc_offset) > count())){ + LOGGER__ERROR("Requested transfer size ({}) result in more descriptors than available ({})", transfer_size, count()); return make_unexpected(HAILO_OUT_OF_DESCRIPTORS); } @@ -150,7 +122,7 @@ Expected DescriptorList::program_last_descriptor(size_t transfer_size, /* write residue page with the remaining buffer size*/ auto resuide = transfer_size - (required_descriptors - 1) * m_desc_page_size; assert(IS_FIT_IN_UINT16(resuide)); - size_t last_desc = (desc_offset + required_descriptors - 1) & (m_count - 1); + size_t last_desc = (desc_offset + required_descriptors - 1) % count(); program_single_descriptor((*this)[last_desc], static_cast(resuide), last_desc_interrupts_domain); return std::move(static_cast(required_descriptors)); @@ -159,8 +131,8 @@ Expected DescriptorList::program_last_descriptor(size_t transfer_size, hailo_status DescriptorList::reprogram_descriptor_interrupts_domain(size_t desc_index, InterruptsDomain interrupts_domain) { - if (desc_index >= m_count){ - LOGGER__ERROR("Requested desc (index={}) exceeds the number of descriptors in the list ({})", desc_index, m_count); + if (desc_index >= count()){ + LOGGER__ERROR("Requested desc (index={}) exceeds the number of descriptors in the list ({})", desc_index, count()); return HAILO_OUT_OF_DESCRIPTORS; } reprogram_single_descriptor_interrupts_domain((*this)[desc_index], interrupts_domain); @@ -189,111 +161,6 @@ uint32_t DescriptorList::calculate_descriptors_count(uint32_t buffer_size, uint1 return get_nearest_powerof_2(descs_count, MIN_DESCS_COUNT); } -Expected> DescriptorList::get_desc_buffer_sizes_for_single_transfer( - const HailoRTDriver &driver, uint16_t min_batch_size, uint16_t max_batch_size, uint32_t transfer_size) -{ - // Note: If the pages pointed to by the descriptors are copied in their entirety, then DEFAULT_DESC_PAGE_SIZE - // is the optimal value. For transfer_sizes smaller than DEFAULT_DESC_PAGE_SIZE using smaller descriptor page - // sizes will save memory consuption without harming performance. In the case of nms for example, only one bbox - // is copied from each page. Hence, we'll use MIN_DESC_PAGE_SIZE for nms. - const uint32_t initial_desc_page_size = (DEFAULT_DESC_PAGE_SIZE > transfer_size) ? - get_nearest_powerof_2(transfer_size, MIN_DESC_PAGE_SIZE) : DEFAULT_DESC_PAGE_SIZE; - if (DEFAULT_DESC_PAGE_SIZE != initial_desc_page_size) { - LOGGER__INFO("Using non-default initial_desc_page_size of {}, due to a small transfer size ({})", - initial_desc_page_size, transfer_size); - } - CHECK_AS_EXPECTED(IS_FIT_IN_UINT16(initial_desc_page_size), HAILO_INTERNAL_FAILURE, - "Descriptor page size needs to fit in 16B"); - - return get_desc_buffer_sizes_for_single_transfer_impl(driver, min_batch_size, max_batch_size, transfer_size, - static_cast(initial_desc_page_size)); -} - -Expected> DescriptorList::get_desc_buffer_sizes_for_multiple_transfers( - const HailoRTDriver &driver, uint16_t batch_size, const std::vector &transfer_sizes) -{ - return get_desc_buffer_sizes_for_multiple_transfers_impl(driver, batch_size, transfer_sizes, - DEFAULT_DESC_PAGE_SIZE); -} - -Expected> DescriptorList::get_desc_buffer_sizes_for_single_transfer_impl( - const HailoRTDriver &driver, uint16_t min_batch_size, uint16_t max_batch_size, uint32_t transfer_size, - uint16_t initial_desc_page_size) -{ - auto results = DescriptorList::get_desc_buffer_sizes_for_multiple_transfers_impl(driver, min_batch_size, - {transfer_size}, initial_desc_page_size); - CHECK_EXPECTED(results); - - auto page_size = results->first; - - auto desc_count = std::min(MAX_DESCS_COUNT, - DescriptorList::calculate_descriptors_count(transfer_size, max_batch_size, page_size)); - - return std::make_pair(page_size, desc_count); -} - -Expected> DescriptorList::get_desc_buffer_sizes_for_multiple_transfers_impl( - const HailoRTDriver &driver, uint16_t batch_size, const std::vector &transfer_sizes, - uint16_t initial_desc_page_size) -{ - const uint16_t min_desc_page_size = driver.calc_desc_page_size(MIN_DESC_PAGE_SIZE); - const uint16_t max_desc_page_size = driver.calc_desc_page_size(MAX_DESC_PAGE_SIZE); - // Defined as uint32_t to prevent overflow (as we multiply it by two in each iteration of the while loop bellow) - uint32_t local_desc_page_size = driver.calc_desc_page_size(initial_desc_page_size); - CHECK_AS_EXPECTED(IS_FIT_IN_UINT16(local_desc_page_size), HAILO_INTERNAL_FAILURE, - "Descriptor page size needs to fit in 16B"); - CHECK_AS_EXPECTED(local_desc_page_size <= max_desc_page_size, HAILO_INTERNAL_FAILURE, - "Initial descriptor page size ({}) is larger than maximum descriptor page size ({})", - local_desc_page_size, max_desc_page_size); - CHECK_AS_EXPECTED(local_desc_page_size >= min_desc_page_size, HAILO_INTERNAL_FAILURE, - "Initial descriptor page size ({}) is smaller than minimum descriptor page size ({})", - local_desc_page_size, min_desc_page_size); - - uint32_t acc_desc_count = get_descriptors_count_needed(transfer_sizes, static_cast(local_desc_page_size)); - - // Too many descriptors; try a larger desc_page_size which will lead to less descriptors used - while ((acc_desc_count * batch_size) > (MAX_DESCS_COUNT - 1)) { - local_desc_page_size <<= 1; - - CHECK_AS_EXPECTED(local_desc_page_size <= max_desc_page_size, HAILO_OUT_OF_DESCRIPTORS, - "Network shapes and batch size exceeds driver descriptors capabilities. " - "Required descriptors count: {}, max allowed on the driver: {}. " - "(A common cause for this error could be the batch size - which is {}).", - (batch_size * acc_desc_count), (MAX_DESCS_COUNT - 1), batch_size); - - CHECK_AS_EXPECTED(IS_FIT_IN_UINT16(local_desc_page_size), HAILO_INTERNAL_FAILURE, - "Descriptor page size needs to fit in 16B"); - - acc_desc_count = get_descriptors_count_needed(transfer_sizes, static_cast(local_desc_page_size)); - } - - // Found desc_page_size and acc_desc_count - const auto desc_page_size = static_cast(local_desc_page_size); - - // Find descs_count - const auto descs_count = get_nearest_powerof_2(acc_desc_count, MIN_DESCS_COUNT); - CHECK_AS_EXPECTED(descs_count <= MAX_DESCS_COUNT, HAILO_OUT_OF_DESCRIPTORS); - - if (initial_desc_page_size != desc_page_size) { - LOGGER__WARNING("Desc page size value ({}) is not optimal for performance.", desc_page_size); - } - - return std::make_pair(desc_page_size, descs_count); -} - -uint32_t DescriptorList::get_descriptors_count_needed(const std::vector &transfer_sizes, - uint16_t desc_page_size) -{ - uint32_t desc_count = 0; - for (auto &transfer_size : transfer_sizes) { - desc_count += descriptors_in_buffer(transfer_size, desc_page_size); - } - - // One extra descriptor is needed, because the amount of available descriptors is (desc_count - 1) - desc_count += 1; - return desc_count; -} - uint32_t DescriptorList::get_interrupts_bitmask(InterruptsDomain interrupts_domain) { uint32_t host_bitmask = 0; @@ -353,7 +220,7 @@ void DescriptorList::reprogram_single_descriptor_interrupts_domain(VdmaDescripto // Set the IRQ control bits to zero // Make all edits to the local variable local_pagesize_desc_ctrl that is on the stack to save read/writes to DDR auto local_pagesize_desc_ctrl = (descriptor.PageSize_DescControl & ~DESC_IRQ_MASK); - + if (InterruptsDomain::NONE == interrupts_domain) { // Nothing else to do descriptor.PageSize_DescControl = local_pagesize_desc_ctrl; diff --git a/hailort/libhailort/src/vdma/memory/descriptor_list.hpp b/hailort/libhailort/src/vdma/memory/descriptor_list.hpp index 6800b32..25c3c35 100644 --- a/hailort/libhailort/src/vdma/memory/descriptor_list.hpp +++ b/hailort/libhailort/src/vdma/memory/descriptor_list.hpp @@ -11,11 +11,13 @@ #define _HAILO_VDMA_DESCRIPTOR_LIST_HPP_ #include "hailo/expected.hpp" -#include "hailo/dma_mapped_buffer.hpp" +#include "hailo/hailort_common.hpp" #include "common/utils.hpp" #include "vdma/channel/channel_id.hpp" +#include "vdma/memory/mapped_buffer.hpp" + #include "os/hailort_driver.hpp" #include "os/mmap_buffer.hpp" @@ -35,14 +37,13 @@ static_assert(DEFAULT_DESC_COUNT <= MAX_DESCS_COUNT && DEFAULT_DESC_COUNT >= MIN "DEFAULT_DESC_COUNT not in range"); // From PLDA's vDMA controller reference: -// - Addresses of pages pointed to by vDMA descriptors need to be on a 64B boundry. +// - Addresses of pages pointed to by vDMA descriptors need to be on a 64B boundary. // Hence, we require a minimum page size of 64B. // - G_PAGE_SIZE_MAX dictates the maximum desc page size: // max_page_size = 2 ^ (G_PAGE_SIZE_MAX - 1) // In our case max_page_size = 2 ^ (13 - 1) = 4096 -#define MIN_DESC_PAGE_SIZE (64u) -// TODO: Calculate from G_PAGE_SIZE_MAX (I.e. read the reg etc.) -#define MAX_DESC_PAGE_SIZE (4096u) +static constexpr uint16_t MIN_DESC_PAGE_SIZE = 64; +static constexpr uint16_t MAX_DESC_PAGE_SIZE = 4096; static constexpr uint16_t DEFAULT_DESC_PAGE_SIZE = 512; static_assert(is_powerof2(MIN_DESC_PAGE_SIZE), "MIN_DESC_PAGE_SIZE must be a power of 2"); @@ -53,15 +54,40 @@ static_assert(is_powerof2(DEFAULT_DESC_PAGE_SIZE), "DEFAULT_DESC_PAGE_SIZE must static_assert(DEFAULT_DESC_PAGE_SIZE > 0, "DEFAULT_DESC_PAGE_SIZE must be larger then 0"); -struct VdmaDescriptor +static constexpr auto DESCRIPTOR_STATUS_MASK = 0xFF; +static constexpr auto DESCRIPTOR_STATUS_DONE_BIT = 0; +static constexpr auto DESCRIPTOR_STATUS_ERROR_BIT = 1; + +struct VdmaDescriptor { + // Struct layout is taken from PLDA spec for vDMA, and cannot be changed. uint32_t PageSize_DescControl; uint32_t AddrL_rsvd_DataID; uint32_t AddrH; uint32_t RemainingPageSize_Status; + +#ifndef NDEBUG + // Easy accessors (only on debug since we mark DESC_STATUS_REQ and DESC_STATUS_REQ_ERR are set only on debug). + uint8_t status() const + { + return RemainingPageSize_Status & DESCRIPTOR_STATUS_MASK; + } + + bool is_done() const + { + return is_bit_set(status(), DESCRIPTOR_STATUS_DONE_BIT); + } + + bool is_error() const + { + return is_bit_set(status(), DESCRIPTOR_STATUS_ERROR_BIT); + } +#endif /* NDEBUG */ }; -enum class InterruptsDomain +static_assert(SIZE_OF_SINGLE_DESCRIPTOR == sizeof(VdmaDescriptor), "Invalid size of descriptor"); + +enum class InterruptsDomain { NONE = 0, DEVICE = 1 << 0, @@ -82,7 +108,7 @@ inline bool device_interuptes_enabled(InterruptsDomain interrupts_domain) class DescriptorList { public: - static Expected create(uint32_t desc_count, uint16_t requested_desc_page_size, + static Expected create(uint32_t desc_count, uint16_t desc_page_size, bool is_circular, HailoRTDriver &driver); ~DescriptorList(); @@ -92,25 +118,21 @@ public: DescriptorList(DescriptorList &&other) noexcept; DescriptorList &operator=(DescriptorList &&other) = delete; - uint8_t depth() const - { - return m_depth; - } - uint32_t count() const { - return m_count; + assert(m_desc_list_info.desc_count <= std::numeric_limits::max()); + return static_cast(m_desc_list_info.desc_count); } uint64_t dma_address() const { - return m_dma_address; + return m_desc_list_info.dma_address; } VdmaDescriptor& operator[](size_t i) { - assert(i < m_count); - return m_mapped_list[i]; + assert(i < count()); + return desc_list()[i]; } uint16_t desc_page_size() const @@ -120,23 +142,23 @@ public: uintptr_t handle() const { - return m_desc_handle; + return m_desc_list_info.handle; } uint16_t max_transfers(uint32_t transfer_size) { // We need to keep at least 1 free desc at all time. - return static_cast((m_count - 1) / descriptors_in_buffer(transfer_size)); + return static_cast((count() - 1) / descriptors_in_buffer(transfer_size)); } // Map descriptors starting at offset to the start of buffer, wrapping around the descriptor list as needed // On hailo8, we allow configuring buffer without specific channel index (default is INVALID_VDMA_CHANNEL_INDEX). - hailo_status configure_to_use_buffer(DmaMappedBuffer& buffer, ChannelId channel_id, uint32_t starting_desc = 0); + hailo_status configure_to_use_buffer(MappedBuffer& buffer, ChannelId channel_id, uint32_t starting_desc = 0); // All descritors are initialized to have size of m_desc_page_size - so all we do is set the last descritor for the // Interrupt - and then after transfer has finished clear the previously used first and last decsriptors. // This saves us write/ reads to the desscriptor list which is DMA memory. Expected program_last_descriptor(size_t transfer_size, InterruptsDomain last_desc_interrupts_domain, - size_t desc_offset, bool is_circular); + size_t desc_offset); void program_single_descriptor(VdmaDescriptor &descriptor, uint16_t page_size, InterruptsDomain interrupts_domain); hailo_status reprogram_descriptor_interrupts_domain(size_t desc_index, InterruptsDomain interrupts_domain); void clear_descriptor(const size_t desc_index); @@ -144,31 +166,19 @@ public: uint32_t descriptors_in_buffer(size_t buffer_size) const; static uint32_t descriptors_in_buffer(size_t buffer_size, uint16_t desc_page_size); static uint32_t calculate_descriptors_count(uint32_t buffer_size, uint16_t batch_size, uint16_t desc_page_size); - static Expected> get_desc_buffer_sizes_for_single_transfer(const HailoRTDriver &driver, - uint16_t min_batch_size, uint16_t max_batch_size, uint32_t transfer_size); - static Expected> get_desc_buffer_sizes_for_multiple_transfers(const HailoRTDriver &driver, - uint16_t batch_size, const std::vector &transfer_sizes); private: - DescriptorList(uint32_t desc_count, HailoRTDriver &driver, uint16_t desc_page_size, hailo_status &status); + DescriptorList(uint32_t desc_count, uint16_t desc_page_size, bool is_circular, HailoRTDriver &driver, + hailo_status &status); + + VdmaDescriptor *desc_list() { return reinterpret_cast(m_desc_list_info.user_address); } + uint32_t get_interrupts_bitmask(InterruptsDomain interrupts_domain); void reprogram_single_descriptor_interrupts_domain(VdmaDescriptor &descriptor, InterruptsDomain interrupts_domain); - static Expected calculate_desc_list_depth(size_t count); - // Note: initial_desc_page_size should be the optimal descriptor page size. - static Expected> get_desc_buffer_sizes_for_single_transfer_impl( - const HailoRTDriver &driver, uint16_t min_batch_size, uint16_t max_batch_size, uint32_t transfer_size, - uint16_t initial_desc_page_size); - static Expected> get_desc_buffer_sizes_for_multiple_transfers_impl( - const HailoRTDriver &driver, uint16_t batch_size, const std::vector &transfer_sizes, - uint16_t initial_desc_page_size); - static uint32_t get_descriptors_count_needed(const std::vector &transfer_sizes, - uint16_t desc_page_size); - - MmapBuffer m_mapped_list; - uint32_t m_count; - uint8_t m_depth; - uintptr_t m_desc_handle; - uint64_t m_dma_address; + + + DescriptorsListInfo m_desc_list_info; + const bool m_is_circular; HailoRTDriver &m_driver; const uint16_t m_desc_page_size; }; diff --git a/hailort/libhailort/src/vdma/memory/dma_able_buffer.cpp b/hailort/libhailort/src/vdma/memory/dma_able_buffer.cpp new file mode 100644 index 0000000..6b7d16b --- /dev/null +++ b/hailort/libhailort/src/vdma/memory/dma_able_buffer.cpp @@ -0,0 +1,267 @@ +/** + * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved. + * Distributed under the MIT license (https://opensource.org/licenses/MIT) +**/ +/** + * @file dma_able_buffer.cpp + * @brief A Buffer that can be mapped to some device for dma operations. + * See hpp for more information. + **/ + +#include "dma_able_buffer.hpp" +#include "common/os_utils.hpp" + +#if defined(_MSC_VER) +#include "os/windows/virtual_alloc_guard.hpp" +#endif /* defined(_MSC_VER) */ + + +#if defined(__QNX__) +#include +#endif + +namespace hailort { +namespace vdma { + +#if defined(__linux__) || defined(_MSC_VER) + +// User buffer. This class does not own the buffer. +class UserAllocatedDmaAbleBuffer : public DmaAbleBuffer { +public: + static Expected create(void *user_address, size_t size) + { + CHECK_AS_EXPECTED(0 == (reinterpret_cast(user_address) % OsUtils::get_page_size()), + HAILO_INVALID_ARGUMENT, "User address mapped as dma must be paged aligned (page size {})", + OsUtils::get_page_size()); + + auto buffer = make_shared_nothrow(user_address, size); + CHECK_NOT_NULL_AS_EXPECTED(buffer, HAILO_OUT_OF_HOST_MEMORY); + + return std::static_pointer_cast(buffer); + } + + UserAllocatedDmaAbleBuffer(void *user_address, size_t size) : + m_size(size), + m_user_address(user_address) + {} + + virtual size_t size() const override { return m_size; } + virtual void *user_address() override { return m_user_address; } + virtual vdma_mapped_buffer_driver_identifier buffer_identifier() override { return HailoRTDriver::INVALID_DRIVER_BUFFER_HANDLE_VALUE; } + +private: + const size_t m_size; + void *m_user_address; +}; + + +#if defined(__linux__) +class PageAlignedDmaAbleBuffer : public DmaAbleBuffer { +public: + static Expected create(size_t size) + { + // Shared memory to allow python fork. + auto mmapped_buffer = MmapBuffer::create_shared_memory(size); + CHECK_EXPECTED(mmapped_buffer); + + auto buffer = make_shared_nothrow(mmapped_buffer.release()); + CHECK_NOT_NULL_AS_EXPECTED(buffer, HAILO_OUT_OF_HOST_MEMORY); + return std::static_pointer_cast(buffer); + } + + PageAlignedDmaAbleBuffer(MmapBuffer &&mmapped_buffer) : + m_mmapped_buffer(std::move(mmapped_buffer)) + {} + + virtual void* user_address() override { return m_mmapped_buffer.address(); } + virtual size_t size() const override { return m_mmapped_buffer.size(); } + virtual vdma_mapped_buffer_driver_identifier buffer_identifier() override { return HailoRTDriver::INVALID_DRIVER_BUFFER_HANDLE_VALUE; } + +private: + // Using mmap instead of aligned_alloc to enable MEM_SHARE flag - used for multi-process fork. + MmapBuffer m_mmapped_buffer; +}; + +#elif defined(_MSC_VER) +class PageAlignedDmaAbleBuffer : public DmaAbleBuffer { +public: + static Expected create(size_t size) + { + auto memory_guard = VirtualAllocGuard::create(size); + CHECK_EXPECTED(memory_guard); + + auto buffer = make_shared_nothrow(memory_guard.release()); + CHECK_NOT_NULL_AS_EXPECTED(buffer, HAILO_OUT_OF_HOST_MEMORY); + return std::static_pointer_cast(buffer); + } + + PageAlignedDmaAbleBuffer(VirtualAllocGuard &&memory_guard) : + m_memory_guard(std::move(memory_guard)) + {} + + virtual size_t size() const override { return m_memory_guard.size(); } + virtual void *user_address() override { return m_memory_guard.address(); } + virtual vdma_mapped_buffer_driver_identifier buffer_identifier() override { return HailoRTDriver::INVALID_DRIVER_BUFFER_HANDLE_VALUE; } + +private: + VirtualAllocGuard m_memory_guard; +}; +#else +#error "unsupported platform!" +#endif + +// Allocate low memory buffer using HailoRTDriver. +class DriverAllocatedDmaAbleBuffer : public DmaAbleBuffer { +public: + static Expected create(HailoRTDriver &driver, size_t size) + { + auto driver_buffer_handle = driver.vdma_low_memory_buffer_alloc(size); + CHECK_EXPECTED(driver_buffer_handle); + + auto mmapped_buffer = MmapBuffer::create_file_map(size, driver.fd(), driver_buffer_handle.value()); + if (!mmapped_buffer) { + auto free_status = driver.vdma_low_memory_buffer_free(driver_buffer_handle.value()); + if (HAILO_SUCCESS != free_status) { + LOGGER__ERROR("Failed free vdma low memory with status {}", free_status); + // Continue + } + + return make_unexpected(mmapped_buffer.status()); + } + CHECK_EXPECTED(mmapped_buffer); + + auto buffer = make_shared_nothrow(driver, driver_buffer_handle.value(), + mmapped_buffer.release()); + CHECK_NOT_NULL_AS_EXPECTED(buffer, HAILO_OUT_OF_HOST_MEMORY); + return std::static_pointer_cast(buffer); + } + + DriverAllocatedDmaAbleBuffer(HailoRTDriver &driver, vdma_mapped_buffer_driver_identifier driver_allocated_buffer_id, + MmapBuffer &&mmapped_buffer) : + m_driver(driver), + m_driver_allocated_buffer_id(driver_allocated_buffer_id), + m_mmapped_buffer(std::move(mmapped_buffer)) + {} + + DriverAllocatedDmaAbleBuffer(const DriverAllocatedDmaAbleBuffer &) = delete; + DriverAllocatedDmaAbleBuffer &operator=(const DriverAllocatedDmaAbleBuffer &) = delete; + + ~DriverAllocatedDmaAbleBuffer() + { + auto status = m_mmapped_buffer.unmap(); + if (HAILO_SUCCESS != status) { + LOGGER__ERROR("Failed to unmap buffer"); + // continue + } + + status = m_driver.vdma_low_memory_buffer_free(m_driver_allocated_buffer_id); + if (HAILO_SUCCESS != status) { + LOGGER__ERROR("Failed to free low memory buffer"); + // continue + } + } + + virtual void* user_address() override { return m_mmapped_buffer.address(); } + virtual size_t size() const override { return m_mmapped_buffer.size(); } + virtual vdma_mapped_buffer_driver_identifier buffer_identifier() override { return m_driver_allocated_buffer_id; } + +private: + HailoRTDriver &m_driver; + const vdma_mapped_buffer_driver_identifier m_driver_allocated_buffer_id; + + MmapBuffer m_mmapped_buffer; +}; + +Expected DmaAbleBuffer::create(size_t size, void *user_address) +{ + if (nullptr != user_address) { + return UserAllocatedDmaAbleBuffer::create(user_address, size); + } else { + return PageAlignedDmaAbleBuffer::create(size); + } +} + +Expected DmaAbleBuffer::create(HailoRTDriver &driver, size_t size, void *user_address) +{ + if ((nullptr == user_address) && driver.allocate_driver_buffer()) { + return DriverAllocatedDmaAbleBuffer::create(driver, size); + } else { + // The driver is not needed. + return create(size, user_address); + } +} + +#elif defined(__QNX__) + +class SharedMemoryDmaAbleBuffer : public DmaAbleBuffer { +public: + + static Expected create(size_t size) + { + auto shm_fd = open_shared_memory_fd(size); + CHECK_EXPECTED(shm_fd); + + auto mmapped_buffer = MmapBuffer::create_file_map_nocache(size, shm_fd.value(), 0); + CHECK_EXPECTED(mmapped_buffer); + + auto buffer = make_shared_nothrow(shm_fd.release(), mmapped_buffer.release()); + CHECK_NOT_NULL_AS_EXPECTED(buffer, HAILO_OUT_OF_HOST_MEMORY); + return std::static_pointer_cast(buffer); + } + + SharedMemoryDmaAbleBuffer(FileDescriptor &&shm_fd, MmapBuffer &&mmapped_buffer) : + m_shm_fd(std::move(shm_fd)), + m_mmapped_buffer(std::move(mmapped_buffer)) + {} + + virtual void *user_address() override { return m_mmapped_buffer.address(); } + virtual size_t size() const override { return m_mmapped_buffer.size(); } + virtual vdma_mapped_buffer_driver_identifier buffer_identifier() override { return m_shm_fd; } + +private: + + static Expected open_shared_memory_fd(size_t size) + { + static const int INVALID_FD = -1; + static const char* VDMA_BUFFER_TYPE_MEMORY_NAME = "/memory/below4G/ram/below1G"; + + FileDescriptor type_mem_fd = posix_typed_mem_open(VDMA_BUFFER_TYPE_MEMORY_NAME, O_RDWR, POSIX_TYPED_MEM_ALLOCATE); + CHECK_AS_EXPECTED(INVALID_FD != type_mem_fd, HAILO_FILE_OPERATION_FAILURE, + "Error getting fd from typed memory of type {}, errno {}", VDMA_BUFFER_TYPE_MEMORY_NAME, errno); + + FileDescriptor shm_fd = shm_open(SHM_ANON, O_RDWR | O_CREAT, 0777); + CHECK_AS_EXPECTED(INVALID_FD != shm_fd, HAILO_FILE_OPERATION_FAILURE, + "Error creating shm object, errno is: {}", errno); + + // backs the shared memory object with physical memory. After calling shm_tl, the type_mem_fd can be released. + int err = shm_ctl(shm_fd, SHMCTL_ANON | SHMCTL_TYMEM, (uint64_t)type_mem_fd, size); + CHECK_AS_EXPECTED(-1 != err, HAILO_FILE_OPERATION_FAILURE, + "Error backing shm object in physical memory, errno is: {}", errno); + + return shm_fd; + } + + // Initialization dependency + FileDescriptor m_shm_fd; + MmapBuffer m_mmapped_buffer; +}; + +Expected DmaAbleBuffer::create(size_t size, void *user_address) +{ + CHECK_AS_EXPECTED(nullptr == user_address, HAILO_NOT_SUPPORTED, "Mapping user address is not supported on QNX"); + return SharedMemoryDmaAbleBuffer::create(size); +} + +Expected DmaAbleBuffer::create(HailoRTDriver &driver, size_t size, void *user_address) +{ + // qnx don't need the driver for the allocation + (void)driver; + return DmaAbleBuffer::create(size, user_address); +} + +#else +#error "unsupported platform!" +#endif + +} /* namespace vdma */ +} /* namespace hailort */ diff --git a/hailort/libhailort/src/vdma/memory/dma_able_buffer.hpp b/hailort/libhailort/src/vdma/memory/dma_able_buffer.hpp new file mode 100644 index 0000000..66e1c70 --- /dev/null +++ b/hailort/libhailort/src/vdma/memory/dma_able_buffer.hpp @@ -0,0 +1,54 @@ +/** + * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved. + * Distributed under the MIT license (https://opensource.org/licenses/MIT) +**/ +/** + * @file dma_able_buffer.hpp + * @brief A Buffer that can be mapped to some device for dma operations. + * There are several options for that buffer: + * 1. No allocation - The user gives its own buffer pointer and address. The buffer must be page aligned. + * 2. Normal allocation - page aligned allocation. This is the default option for linux and windows. + * 3. Driver allocation - On some platforms, default user mode memory allocation is not DMAAble. To overcome + * this, we allocate the buffer in a low memory using hailort driver. We check it querying + * HailoRTDriver::allocate_driver_buffer(). + * 4. QNX shared memory allocation - for qnx, in order to pass the driver to the resources manager, we need to + * create a shared memory object, and pass an handle to it in the mapping. TODO: HRT-10298 implement this. + **/ + +#ifndef _HAILO_DMA_ABLE_BUFFER_HPP_ +#define _HAILO_DMA_ABLE_BUFFER_HPP_ + +#include "hailo/expected.hpp" +#include "os/hailort_driver.hpp" +#include "os/mmap_buffer.hpp" + +namespace hailort { +namespace vdma { + +class DmaAbleBuffer; +using DmaAbleBufferPtr = std::shared_ptr; + +class DmaAbleBuffer { +public: + // If user_address is not nullptr, allocation is not needed. + static Expected create(size_t size, void *user_address = nullptr); + + // The driver is used only if driver.allocate_driver_buffer is true, and that the user address is nullptr. + static Expected create(HailoRTDriver &driver, size_t size, void *user_address = nullptr); + + DmaAbleBuffer() = default; + DmaAbleBuffer(DmaAbleBuffer &&other) = delete; + DmaAbleBuffer(const DmaAbleBuffer &other) = delete; + DmaAbleBuffer &operator=(const DmaAbleBuffer &other) = delete; + DmaAbleBuffer &operator=(DmaAbleBuffer &&other) = delete; + virtual ~DmaAbleBuffer() = default; + + virtual void* user_address() = 0; + virtual size_t size() const = 0; + virtual vdma_mapped_buffer_driver_identifier buffer_identifier() = 0; +}; + +} /* namespace vdma */ +} /* namespace hailort */ + +#endif /* _HAILO_DMA_ABLE_BUFFER_HPP_ */ diff --git a/hailort/libhailort/src/vdma/memory/dma_mapped_buffer.cpp b/hailort/libhailort/src/vdma/memory/dma_mapped_buffer.cpp deleted file mode 100644 index bd3a270..0000000 --- a/hailort/libhailort/src/vdma/memory/dma_mapped_buffer.cpp +++ /dev/null @@ -1,96 +0,0 @@ -/** - * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved. - * Distributed under the MIT license (https://opensource.org/licenses/MIT) -**/ -/** - * @file vmda_mapped_buffer.cpp - * @brief Vdma mapped buffer implementation - **/ - -#include "hailo/dma_mapped_buffer.hpp" - -#include "vdma/memory/mapped_buffer_impl.hpp" -#include "vdma/vdma_device.hpp" - - -namespace hailort { - -static Expected convert_flags_to_driver_enum(hailo_vdma_buffer_direction_flags_t data_direction) -{ - static const auto BOTH_DIRECTIONS = HAILO_VDMA_BUFFER_DIRECTION_FLAGS_H2D | HAILO_VDMA_BUFFER_DIRECTION_FLAGS_D2H; - if ((data_direction & BOTH_DIRECTIONS) == BOTH_DIRECTIONS) { - return HailoRTDriver::DmaDirection::BOTH; - } - - if ((data_direction & HAILO_VDMA_BUFFER_DIRECTION_FLAGS_H2D) == HAILO_VDMA_BUFFER_DIRECTION_FLAGS_H2D) { - return HailoRTDriver::DmaDirection::H2D; - } - - if ((data_direction & HAILO_VDMA_BUFFER_DIRECTION_FLAGS_D2H) == HAILO_VDMA_BUFFER_DIRECTION_FLAGS_D2H) { - return HailoRTDriver::DmaDirection::D2H; - } - - return make_unexpected(HAILO_INVALID_ARGUMENT); -} - -// TODO: this should maybe be a vdevice (for mapping buffers to multiple devs) -// TODO: a helper function for the cast to VdmaDevice -Expected DmaMappedBuffer::create(size_t size, - hailo_vdma_buffer_direction_flags_t data_direction_flags, Device &device) -{ - static const auto ALLOCATE_BUFFER = nullptr; - return create(ALLOCATE_BUFFER, size, data_direction_flags, device); -} - -Expected DmaMappedBuffer::create_from_user_address(void *user_address, size_t size, - hailo_vdma_buffer_direction_flags_t data_direction_flags, Device &device) -{ - CHECK_ARG_NOT_NULL_AS_EXPECTED(user_address); - return create(user_address, size, data_direction_flags, device); -} - -Expected DmaMappedBuffer::create(void *user_address, size_t size, - hailo_vdma_buffer_direction_flags_t data_direction_flags, Device &device) -{ - const auto device_type = device.get_type(); - CHECK_AS_EXPECTED(((Device::Type::INTEGRATED == device_type) || (Device::Type::PCIE == device_type)), - HAILO_INVALID_ARGUMENT, "Invalid device type (expected integrated/pcie, received {})", device_type); - VdmaDevice *vdma_device = reinterpret_cast(&device); - - auto data_direction = convert_flags_to_driver_enum(data_direction_flags); - CHECK_EXPECTED(data_direction, "Invalid direction flags received {}", data_direction_flags); - - auto pimpl_exp = Impl::create(vdma_device->get_driver(), data_direction.release(), size, user_address); - CHECK_EXPECTED(pimpl_exp); - - auto pimpl = make_unique_nothrow(pimpl_exp.release()); - CHECK_NOT_NULL_AS_EXPECTED(pimpl, HAILO_OUT_OF_HOST_MEMORY); - - return DmaMappedBuffer(std::move(pimpl)); -} - -DmaMappedBuffer::DmaMappedBuffer(std::unique_ptr pimpl) : - pimpl(std::move(pimpl)) -{} - -// Note: These can't be defined in the header due to the use of pimpl (it'll cause a compilation error) -DmaMappedBuffer::DmaMappedBuffer(DmaMappedBuffer &&other) noexcept = default; -DmaMappedBuffer::~DmaMappedBuffer() = default; - -void *DmaMappedBuffer::user_address() -{ - return pimpl->user_address(); -} - -size_t DmaMappedBuffer::size() const -{ - return pimpl->size(); -} - -hailo_status DmaMappedBuffer::synchronize() -{ - static constexpr auto BUFFER_START = 0; - return pimpl->synchronize(BUFFER_START, size()); -} - -} /* namespace hailort */ diff --git a/hailort/libhailort/src/vdma/memory/mapped_buffer.cpp b/hailort/libhailort/src/vdma/memory/mapped_buffer.cpp new file mode 100644 index 0000000..b179fbb --- /dev/null +++ b/hailort/libhailort/src/vdma/memory/mapped_buffer.cpp @@ -0,0 +1,125 @@ +/** + * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved. + * Distributed under the MIT license (https://opensource.org/licenses/MIT) +**/ +/** + * @file mapped_buffer.cpp + * @brief Vdma mapped buffer implementation + **/ + +#include "mapped_buffer.hpp" + +#include "vdma/vdma_device.hpp" + + +namespace hailort { +namespace vdma { + +Expected MappedBuffer::create(HailoRTDriver &driver, + std::shared_ptr buffer, HailoRTDriver::DmaDirection data_direction) +{ + auto status = HAILO_UNINITIALIZED; + auto result = MappedBuffer(driver, buffer, data_direction, status); + CHECK_SUCCESS_AS_EXPECTED(status); + + return result; +} + +Expected MappedBuffer::create_shared(HailoRTDriver &driver, std::shared_ptr buffer, + HailoRTDriver::DmaDirection data_direction) +{ + auto dma_mapped_buffer = create(driver, buffer, data_direction); + CHECK_EXPECTED(dma_mapped_buffer); + + auto result = make_shared_nothrow(dma_mapped_buffer.release()); + CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY); + + return result; +} + +Expected MappedBuffer::create(HailoRTDriver &driver, + HailoRTDriver::DmaDirection data_direction, size_t size, void *user_address) +{ + auto buffer = DmaAbleBuffer::create(driver, size, user_address); + CHECK_EXPECTED(buffer); + + return create(driver, buffer.release(), data_direction); +} + +Expected MappedBuffer::create_shared(HailoRTDriver &driver, + HailoRTDriver::DmaDirection data_direction, size_t size, void *user_address) +{ + auto dma_mapped_buffer = create(driver, data_direction, size, user_address); + CHECK_EXPECTED(dma_mapped_buffer); + + auto result = make_shared_nothrow(dma_mapped_buffer.release()); + CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY); + + return result; +} + +MappedBuffer::MappedBuffer(HailoRTDriver &driver, std::shared_ptr buffer, + HailoRTDriver::DmaDirection data_direction, hailo_status &status) : + m_driver(driver), + m_buffer(buffer), + m_mapping_handle(HailoRTDriver::INVALID_DRIVER_VDMA_MAPPING_HANDLE_VALUE), + m_data_direction(data_direction) +{ + auto expected_handle = driver.vdma_buffer_map(m_buffer->user_address(), m_buffer->size(), m_data_direction, + m_buffer->buffer_identifier()); + if (!expected_handle) { + LOGGER__ERROR("Mapping address {} to dma failed", m_buffer->user_address()); + status = expected_handle.status(); + return; + } + + m_mapping_handle = expected_handle.release(); + status = HAILO_SUCCESS; +} + +MappedBuffer::~MappedBuffer() +{ + if (HailoRTDriver::INVALID_DRIVER_VDMA_MAPPING_HANDLE_VALUE != m_mapping_handle) { + m_driver.vdma_buffer_unmap(m_mapping_handle); + m_mapping_handle = HailoRTDriver::INVALID_DRIVER_VDMA_MAPPING_HANDLE_VALUE; + } +} + +MappedBuffer::MappedBuffer(MappedBuffer &&other) noexcept : + m_driver(other.m_driver), + m_buffer(std::move(other.m_buffer)), + m_mapping_handle(std::exchange(other.m_mapping_handle, HailoRTDriver::INVALID_DRIVER_VDMA_MAPPING_HANDLE_VALUE)), + m_data_direction(other.m_data_direction) +{} + +void* MappedBuffer::user_address() +{ + return m_buffer->user_address(); +} + +size_t MappedBuffer::size() const +{ + return m_buffer->size(); +} + +HailoRTDriver::VdmaBufferHandle MappedBuffer::handle() +{ + return m_mapping_handle; +} + +hailo_status MappedBuffer::synchronize(HailoRTDriver::DmaSyncDirection sync_direction) +{ + static constexpr auto BUFFER_START = 0; + return synchronize(BUFFER_START, size(), sync_direction); +} + +hailo_status MappedBuffer::synchronize(size_t offset, size_t count, HailoRTDriver::DmaSyncDirection sync_direction) +{ + CHECK(offset + count <= size(), HAILO_INVALID_ARGUMENT, + "Synchronizing {} bytes starting at offset {} will overflow (buffer size {})", + offset, count, size()); + return m_driver.vdma_buffer_sync(m_mapping_handle, sync_direction, offset, count); +} + +} /* namespace vdma */ +} /* namespace hailort */ diff --git a/hailort/libhailort/src/vdma/memory/mapped_buffer.hpp b/hailort/libhailort/src/vdma/memory/mapped_buffer.hpp new file mode 100644 index 0000000..2fc876b --- /dev/null +++ b/hailort/libhailort/src/vdma/memory/mapped_buffer.hpp @@ -0,0 +1,81 @@ +/** + * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved. + * Distributed under the MIT license (https://opensource.org/licenses/MIT) + **/ +/** + * @file mapped_buffer.hpp + * @brief The mapped buffer that is continuous in virtual memory, but not on physical memory. + * We map the buffer to the IOMMU. + * + * The buffer can be used only with the help of a descriptors list that contains pointers to a physical + * continuous "dma pages". + * + * There are 2 options to allocated the buffer: + * 1. User mode allocation - the user mode calls `malloc` or `mmap` to allocate the buffer, then + * using HailoRTDriver we map the driver to the IOMMU (and pin the pages to avoid pagigs). + * This is the default option + * 2. Kernel mode allocation - on some systems, the user mode doesn't allocate the memory in a "dma-able" address, + * so we need to allocate the pages in driver. + **/ + +#ifndef _HAILO_DMA_MAPPED_BUFFER_HPP_ +#define _HAILO_DMA_MAPPED_BUFFER_HPP_ + +#include "hailo/expected.hpp" +#include "os/hailort_driver.hpp" +#include "vdma/memory/dma_able_buffer.hpp" + +#include + + +namespace hailort { +namespace vdma { + + +class MappedBuffer; +using MappedBufferPtr = std::shared_ptr; + +class MappedBuffer final +{ +public: + // Maps the given DmaAbleBuffer in the right direction. + static Expected create(HailoRTDriver &driver, std::shared_ptr buffer, + HailoRTDriver::DmaDirection data_direction); + static Expected create_shared(HailoRTDriver &driver, std::shared_ptr buffer, + HailoRTDriver::DmaDirection data_direction); + + // If user_address is nullptr, a buffer of size 'size' will be allocated and mapped to dma in 'data_direction' + // Otherwise, the buffer pointed to by user_address will be mapped to dma in 'data_direction' + static Expected create(HailoRTDriver &driver, HailoRTDriver::DmaDirection data_direction, + size_t size, void *user_address = nullptr); + static Expected create_shared(HailoRTDriver &driver, HailoRTDriver::DmaDirection data_direction, + size_t size, void *user_address = nullptr); + + + MappedBuffer(MappedBuffer &&other) noexcept; + MappedBuffer(const MappedBuffer &other) = delete; + MappedBuffer &operator=(const MappedBuffer &other) = delete; + MappedBuffer &operator=(MappedBuffer &&other) = delete; + ~MappedBuffer(); + + size_t size() const; + void *user_address(); + HailoRTDriver::VdmaBufferHandle handle(); + hailo_status synchronize(HailoRTDriver::DmaSyncDirection sync_direction); + // TODO: validate that offset is cache aligned (HRT-9811) + hailo_status synchronize(size_t offset, size_t count, HailoRTDriver::DmaSyncDirection sync_direction); + +private: + MappedBuffer(HailoRTDriver &driver, std::shared_ptr buffer, HailoRTDriver::DmaDirection data_direction, + hailo_status &status); + + HailoRTDriver &m_driver; + std::shared_ptr m_buffer; + HailoRTDriver::VdmaBufferHandle m_mapping_handle; + const HailoRTDriver::DmaDirection m_data_direction; +}; + +} /* namespace vdma */ +} /* namespace hailort */ + +#endif /* _HAILO_DMA_MAPPED_BUFFER_HPP_ */ \ No newline at end of file diff --git a/hailort/libhailort/src/vdma/memory/mapped_buffer_factory.cpp b/hailort/libhailort/src/vdma/memory/mapped_buffer_factory.cpp deleted file mode 100644 index 095243e..0000000 --- a/hailort/libhailort/src/vdma/memory/mapped_buffer_factory.cpp +++ /dev/null @@ -1,30 +0,0 @@ -/** - * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved. - * Distributed under the MIT license (https://opensource.org/licenses/MIT) -**/ -/** - * @file mapped_buffer_factory.cpp - * @brief Static utility class for creating DmaMappedBuffers internally in hailort - **/ - -#include "vdma/memory/mapped_buffer_factory.hpp" -#include "vdma/memory/mapped_buffer_impl.hpp" - -namespace hailort -{ -namespace vdma -{ - -Expected MappedBufferFactory::create_mapped_buffer(size_t size, - HailoRTDriver::DmaDirection data_direction, HailoRTDriver &driver) -{ - auto pimpl_exp = DmaMappedBuffer::Impl::create(driver, data_direction, size); - CHECK_EXPECTED(pimpl_exp); - - auto pimpl = make_unique_nothrow(pimpl_exp.release()); - CHECK_NOT_NULL_AS_EXPECTED(pimpl, HAILO_OUT_OF_HOST_MEMORY); - return DmaMappedBuffer(std::move(pimpl)); -} - -} /* namespace vdma */ -} /* namespace hailort */ diff --git a/hailort/libhailort/src/vdma/memory/mapped_buffer_factory.hpp b/hailort/libhailort/src/vdma/memory/mapped_buffer_factory.hpp deleted file mode 100644 index 8cad51f..0000000 --- a/hailort/libhailort/src/vdma/memory/mapped_buffer_factory.hpp +++ /dev/null @@ -1,33 +0,0 @@ -/** - * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved. - * Distributed under the MIT license (https://opensource.org/licenses/MIT) -**/ -/** - * @file mapped_buffer_factory.hpp - * @brief Static utility class for creating DmaMappedBuffers internally in hailort - **/ - -#ifndef _HAILO_MAPPED_BUFFER_FACTORY_HPP_ -#define _HAILO_MAPPED_BUFFER_FACTORY_HPP_ - -#include "hailo/hailort.h" -#include "hailo/dma_mapped_buffer.hpp" -#include "os/hailort_driver.hpp" - -namespace hailort -{ -namespace vdma -{ - -class MappedBufferFactory -{ -public: - MappedBufferFactory() = delete; - static Expected create_mapped_buffer(size_t size, - HailoRTDriver::DmaDirection data_direction, HailoRTDriver &driver); -}; - -} /* namespace vdma */ -} /* namespace hailort */ - -#endif /* _HAILO_MAPPED_BUFFER_FACTORY_HPP_ */ diff --git a/hailort/libhailort/src/vdma/memory/mapped_buffer_impl.cpp b/hailort/libhailort/src/vdma/memory/mapped_buffer_impl.cpp deleted file mode 100644 index 2d7193f..0000000 --- a/hailort/libhailort/src/vdma/memory/mapped_buffer_impl.cpp +++ /dev/null @@ -1,279 +0,0 @@ -/** - * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved. - * Distributed under the MIT license (https://opensource.org/licenses/MIT) - **/ -/** - * @file mapped_buffer_impl.cpp - * @brief Dma mapped buffer pimpl class implementation - **/ -#include "mapped_buffer_impl.hpp" - -namespace hailort { - -#if defined(__linux__) || defined(_MSC_VER) - -Expected DmaMappedBuffer::Impl::create(HailoRTDriver &driver, - HailoRTDriver::DmaDirection data_direction, size_t size, void *user_address) -{ - if (nullptr != user_address) { - // User allocated buffer - create an empty MmapBuffer (it doesn't hold the buffer) - auto status = HAILO_UNINITIALIZED; - auto result = DmaMappedBuffer::Impl(HailoRTDriver::INVALID_DRIVER_BUFFER_HANDLE_VALUE, size, - data_direction, user_address, MmapBuffer(), driver, status); - CHECK_SUCCESS_AS_EXPECTED(status); - - return result; - } else if (driver.allocate_driver_buffer()) { - // Allocate buffer via driver - auto driver_buffer_handle = driver.vdma_low_memory_buffer_alloc(size); - CHECK_EXPECTED(driver_buffer_handle); - - uintptr_t driver_buff_handle = driver_buffer_handle.release(); - - auto mapped_buffer = MmapBuffer::create_file_map(size, driver.fd(), driver_buff_handle); - CHECK_EXPECTED(mapped_buffer); - - auto status = HAILO_UNINITIALIZED; - auto result = DmaMappedBuffer::Impl(driver_buff_handle, size, data_direction, mapped_buffer.release(), - driver, status); - CHECK_SUCCESS_AS_EXPECTED(status); - - return result; - } else { - // Standard userspace allocation - auto mapped_buffer = MmapBuffer::create_shared_memory(size); - CHECK_EXPECTED(mapped_buffer); - - auto status = HAILO_UNINITIALIZED; - auto result = DmaMappedBuffer::Impl(HailoRTDriver::INVALID_DRIVER_BUFFER_HANDLE_VALUE, size, - data_direction, mapped_buffer.release(), driver, status); - CHECK_SUCCESS_AS_EXPECTED(status); - - return result; - } -} - -DmaMappedBuffer::Impl::Impl(vdma_mapped_buffer_driver_identifier driver_allocated_buffer_id, - size_t size, HailoRTDriver::DmaDirection data_direction, void *user_address, - MmapBuffer &&mapped_buffer, HailoRTDriver &driver, hailo_status &status) : - m_driver(driver), - m_driver_allocated_buffer_id(driver_allocated_buffer_id), - m_mapping_handle(HailoRTDriver::INVALID_DRIVER_VDMA_MAPPING_HANDLE_VALUE), - m_mapped_buffer(std::move(mapped_buffer)), - m_size(size), - m_data_direction(data_direction), - m_user_address(user_address) -{ - if (m_mapped_buffer.is_mapped() && (m_user_address != m_mapped_buffer.address())) { - status = HAILO_INVALID_ARGUMENT; - return; - } - - auto expected_handle = driver.vdma_buffer_map(m_user_address, m_size, m_data_direction, - m_driver_allocated_buffer_id); - if (!expected_handle) { - status = expected_handle.status(); - return; - } - - m_mapping_handle = expected_handle.release(); - status = HAILO_SUCCESS; -} - -DmaMappedBuffer::Impl::Impl(vdma_mapped_buffer_driver_identifier driver_allocated_buffer_id, - size_t size, HailoRTDriver::DmaDirection data_direction, - MmapBuffer &&mapped_buffer, HailoRTDriver &driver, hailo_status &status) : - Impl(driver_allocated_buffer_id, size, data_direction, mapped_buffer.address(), std::move(mapped_buffer), driver, status) -{} - -DmaMappedBuffer::Impl::Impl(Impl &&other) noexcept : - m_driver(other.m_driver), - m_driver_allocated_buffer_id(std::exchange(other.m_driver_allocated_buffer_id, HailoRTDriver::INVALID_DRIVER_BUFFER_HANDLE_VALUE)), - m_mapping_handle(std::exchange(other.m_mapping_handle, HailoRTDriver::INVALID_DRIVER_VDMA_MAPPING_HANDLE_VALUE)), - m_mapped_buffer(std::move(other.m_mapped_buffer)), - m_size(std::move(other.m_size)), - m_data_direction(std::move(other.m_data_direction)), - m_user_address(std::move(other.m_user_address)) -{} - -DmaMappedBuffer::Impl::~Impl() -{ - if (HailoRTDriver::INVALID_DRIVER_VDMA_MAPPING_HANDLE_VALUE != m_mapping_handle) { - m_driver.vdma_buffer_unmap(m_mapping_handle); - m_mapping_handle = HailoRTDriver::INVALID_DRIVER_VDMA_MAPPING_HANDLE_VALUE; - } - - if (HailoRTDriver::INVALID_DRIVER_BUFFER_HANDLE_VALUE != m_driver_allocated_buffer_id) { - m_driver.vdma_low_memory_buffer_free(m_driver_allocated_buffer_id); - m_driver_allocated_buffer_id = HailoRTDriver::INVALID_DRIVER_BUFFER_HANDLE_VALUE; - } -} - -void* DmaMappedBuffer::Impl::user_address() -{ - return m_user_address; -} - -size_t DmaMappedBuffer::Impl::size() const -{ - return m_size; -} - -HailoRTDriver::VdmaBufferHandle DmaMappedBuffer::Impl::handle() -{ - return m_mapping_handle; -} - -hailo_status DmaMappedBuffer::Impl::synchronize(size_t offset, size_t count) -{ - CHECK(offset + count <= size(), HAILO_INVALID_ARGUMENT, - "Synchronizing {} bytes starting at offset {} will overflow (buffer size {})", - offset, count, size()); - return m_driver.vdma_buffer_sync(m_mapping_handle, m_data_direction, offset, count); -} - -#elif defined(__QNX__) - -#include - -const int DmaMappedBuffer::Impl::INVALID_FD = -1; -const shm_handle_t DmaMappedBuffer::Impl::INVALID_HANDLE = (shm_handle_t)-1; -const char* DmaMappedBuffer::Impl::VDMA_BUFFER_TYPE_MEMORY_NAME = "/memory/below4G/ram/below1G"; - -Expected DmaMappedBuffer::Impl::create(HailoRTDriver &driver, - HailoRTDriver::DmaDirection data_direction, size_t size, void *user_address) -{ - // TODO: HRT-9508 - CHECK_AS_EXPECTED(user_address == nullptr, HAILO_NOT_IMPLEMENTED, "User allocated buffers not supported on qnx"); - - // Destructor of type_mem_fd will close fd - FileDescriptor type_mem_fd(posix_typed_mem_open(VDMA_BUFFER_TYPE_MEMORY_NAME, O_RDWR, POSIX_TYPED_MEM_ALLOCATE)); - if (INVALID_FD == type_mem_fd) { - LOGGER__ERROR("Error getting fd from typed memory of type {}, errno {}\n", VDMA_BUFFER_TYPE_MEMORY_NAME, - errno); - return make_unexpected(HAILO_INTERNAL_FAILURE); - } - - vdma_mapped_buffer_driver_identifier driver_buff_handle; - driver_buff_handle.shm_fd = shm_open(SHM_ANON, O_RDWR | O_CREAT, 0777); - CHECK_AS_EXPECTED(INVALID_FD != driver_buff_handle.shm_fd, HAILO_INTERNAL_FAILURE, - "Error creating shm object, errno is: {}", errno); - - // backs the shared memory object with physical memory - int err = shm_ctl(driver_buff_handle.shm_fd, SHMCTL_ANON | SHMCTL_TYMEM, (uint64_t)type_mem_fd, - size); - if (-1 == err) { - LOGGER__ERROR("Error backing shm object in physical memory, errno is: {}", errno); - close(driver_buff_handle.shm_fd); - return make_unexpected(HAILO_INTERNAL_FAILURE); - } - - // Create shared memory handle to send to driver - err = shm_create_handle(driver_buff_handle.shm_fd, driver.resource_manager_pid(), O_RDWR, - &driver_buff_handle.shm_handle, 0); - if (0 != err) { - LOGGER__ERROR("Error creating shm object handle, errno is: {}", errno); - close(driver_buff_handle.shm_fd); - return make_unexpected(HAILO_INTERNAL_FAILURE); - } - - void *address = mmap(0, size, PROT_WRITE | PROT_READ | PROT_NOCACHE, MAP_SHARED, driver_buff_handle.shm_fd, 0); - if (MAP_FAILED == address) { - LOGGER__ERROR("Failed to mmap buffer with errno:{}", errno); - shm_delete_handle(driver_buff_handle.shm_handle); - close(driver_buff_handle.shm_fd); - return make_unexpected(HAILO_OUT_OF_HOST_MEMORY); - } - - hailo_status status = HAILO_UNINITIALIZED; - auto result = DmaMappedBuffer::Impl(address, size, data_direction, driver_buff_handle.shm_handle, - driver_buff_handle.shm_fd, driver, status); - if (HAILO_SUCCESS != status) { - LOGGER__ERROR("Failed to map buffer to vdma"); - munmap(address, size); - shm_delete_handle(driver_buff_handle.shm_handle); - close(driver_buff_handle.shm_fd); - return make_unexpected(status); - } - - return result; -} - -DmaMappedBuffer::Impl::Impl(void *addr, size_t size, HailoRTDriver::DmaDirection data_direction, - shm_handle_t shm_handle, int shm_fd, HailoRTDriver &driver, hailo_status &status) : - m_driver(driver), - m_address(addr), - m_size(size), - m_data_direction(data_direction) -{ - m_driver_allocated_buffer_id.shm_handle = shm_handle; - m_driver_allocated_buffer_id.shm_fd = shm_fd; - - auto expected_handle = driver.vdma_buffer_map(addr, size, data_direction, m_driver_allocated_buffer_id); - if (!expected_handle) { - status = expected_handle.status(); - return; - } - - m_mapping_handle = expected_handle.release(); - status = HAILO_SUCCESS; -} - -DmaMappedBuffer::Impl::Impl(Impl &&other) noexcept : - m_driver(other.m_driver), - m_address(std::exchange(other.m_address, nullptr)), - m_size(std::move(other.m_size)), - m_data_direction(std::move(other.m_data_direction)), - m_mapping_handle(std::exchange(other.m_mapping_handle, HailoRTDriver::INVALID_DRIVER_VDMA_MAPPING_HANDLE_VALUE)) -{ - m_driver_allocated_buffer_id.shm_handle = std::exchange(other.m_driver_allocated_buffer_id.shm_handle, INVALID_HANDLE); - m_driver_allocated_buffer_id.shm_fd = std::exchange(other.m_driver_allocated_buffer_id.shm_fd, INVALID_FD); -} - -DmaMappedBuffer::Impl::~Impl() -{ - if (HailoRTDriver::INVALID_DRIVER_VDMA_MAPPING_HANDLE_VALUE != m_mapping_handle) { - m_driver.vdma_buffer_unmap(m_mapping_handle); - m_mapping_handle = HailoRTDriver::INVALID_DRIVER_VDMA_MAPPING_HANDLE_VALUE; - } - - if (nullptr != m_address) { - if (0 != munmap(m_address, m_size)) { - LOGGER__ERROR("Error unmapping memory at address {}, Errno: {}", m_address, errno); - } - } - - if (INVALID_FD != m_driver_allocated_buffer_id.shm_fd) { - if (0 != close(m_driver_allocated_buffer_id.shm_fd)) { - LOGGER__ERROR("Error closing shared memory fd, Errno: {}", errno); - } - } -} - -void* DmaMappedBuffer::Impl::user_address() -{ - return m_address; -} -size_t DmaMappedBuffer::Impl::size() const -{ - return m_size; -} - -HailoRTDriver::VdmaBufferHandle DmaMappedBuffer::Impl::handle() -{ - return m_mapping_handle; -} - -hailo_status DmaMappedBuffer::Impl::synchronize(size_t offset, size_t count) -{ - CHECK(offset + count <= size(), HAILO_INVALID_ARGUMENT, - "Synchronizing {} bytes starting at offset {} will overflow (buffer size {})", - offset, count, size()); - return m_driver.vdma_buffer_sync(m_mapping_handle, m_data_direction, offset, count); -} - -#else -#error "unsupported platform!" -#endif // defined(__linux__) || defined(_MSC_VER) - -} /* namespace hailort */ \ No newline at end of file diff --git a/hailort/libhailort/src/vdma/memory/mapped_buffer_impl.hpp b/hailort/libhailort/src/vdma/memory/mapped_buffer_impl.hpp deleted file mode 100644 index 7643db8..0000000 --- a/hailort/libhailort/src/vdma/memory/mapped_buffer_impl.hpp +++ /dev/null @@ -1,98 +0,0 @@ -/** - * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved. - * Distributed under the MIT license (https://opensource.org/licenses/MIT) - **/ -/** - * @file mapped_buffer_impl.hpp - * @brief Vdma mapped buffer pimpl class defintion - **/ -#ifndef _HAILO_VDMA_MAPPED_BUFFER_IMPL_HPP_ -#define _HAILO_VDMA_MAPPED_BUFFER_IMPL_HPP_ - -#include "hailo/dma_mapped_buffer.hpp" -#include "os/mmap_buffer.hpp" -#include "os/hailort_driver.hpp" -#include "hailo/expected.hpp" - -namespace hailort { - -#if defined(__linux__) || defined(_MSC_VER) - -class DmaMappedBuffer::Impl final { -public: - // If user_address is nullptr, a buffer of size 'size' will be allocated and mapped to dma in 'data_direction' - // Otherwise, the buffer pointed to by user_address will be mapped to dma in 'data_direction' - static Expected create(HailoRTDriver &driver, HailoRTDriver::DmaDirection data_direction, - size_t size, void *user_address = nullptr); - - Impl(Impl &&other) noexcept; - Impl(const Impl &other) = delete; - Impl &operator=(const Impl &other) = delete; - Impl &operator=(Impl &&other) = delete; - ~Impl(); - - void* user_address(); - size_t size() const; - HailoRTDriver::VdmaBufferHandle handle(); - // TODO: validate that offset is cache aligned (HRT-9811) - hailo_status synchronize(size_t offset, size_t count); - -private: - Impl(vdma_mapped_buffer_driver_identifier driver_allocated_buffer_id, size_t size, - HailoRTDriver::DmaDirection data_direction, void *user_address, MmapBuffer &&mapped_buffer, - HailoRTDriver &driver, hailo_status &status); - Impl(vdma_mapped_buffer_driver_identifier driver_allocated_buffer_id, size_t size, - HailoRTDriver::DmaDirection data_direction, MmapBuffer &&mapped_buffer, HailoRTDriver &driver, - hailo_status &status); - - HailoRTDriver &m_driver; - vdma_mapped_buffer_driver_identifier m_driver_allocated_buffer_id; - HailoRTDriver::VdmaBufferHandle m_mapping_handle; - MmapBuffer m_mapped_buffer; - const size_t m_size; - const HailoRTDriver::DmaDirection m_data_direction; - void *const m_user_address; -}; - -#elif defined(__QNX__) - -// TODO: merge qnx and non-qnx impls (HRT-9508) -class DmaMappedBuffer::Impl final { -public: - static Expected create(HailoRTDriver &driver, HailoRTDriver::DmaDirection data_direction, - size_t size, void *user_address = nullptr); - - Impl(const Impl &other) = delete; - Impl &operator=(const Impl &other) = delete; - Impl &operator=(Impl &&other) = delete; - Impl(Impl &&other) noexcept; - ~Impl(); - - void* user_address(); - size_t size() const; - HailoRTDriver::VdmaBufferHandle handle(); - hailo_status synchronize(size_t offset, size_t count); - -private: - Impl(void *addr, size_t size, HailoRTDriver::DmaDirection data_direction, - shm_handle_t shm_handle, int shm_fd, HailoRTDriver &driver, hailo_status &status); - - static const int INVALID_FD; - static const shm_handle_t INVALID_HANDLE; - static const char* VDMA_BUFFER_TYPE_MEMORY_NAME; - - HailoRTDriver &m_driver; - void *m_address; - const size_t m_size; - const HailoRTDriver::DmaDirection m_data_direction; - vdma_mapped_buffer_driver_identifier m_driver_allocated_buffer_id; - HailoRTDriver::VdmaBufferHandle m_mapping_handle; -}; - -#else -#error "unsupported platform!" -#endif // defined(__linux__) || defined(_MSC_VER) - -} /* namespace hailort */ - -#endif /* _HAILO_VDMA_MAPPED_BUFFER_IMPL_HPP_ */ \ No newline at end of file diff --git a/hailort/libhailort/src/vdma/memory/sg_buffer.cpp b/hailort/libhailort/src/vdma/memory/sg_buffer.cpp index 9d6b97b..28a6973 100644 --- a/hailort/libhailort/src/vdma/memory/sg_buffer.cpp +++ b/hailort/libhailort/src/vdma/memory/sg_buffer.cpp @@ -9,28 +9,23 @@ #include "vdma/memory/sg_buffer.hpp" #include "vdma/channel/channel_id.hpp" -#include "vdma/memory/mapped_buffer_factory.hpp" namespace hailort { namespace vdma { Expected SgBuffer::create(HailoRTDriver &driver, size_t size, uint32_t desc_count, uint16_t desc_page_size, - HailoRTDriver::DmaDirection data_direction, ChannelId channel_id) + bool is_circular, HailoRTDriver::DmaDirection data_direction, ChannelId channel_id) { CHECK_AS_EXPECTED(size <= (desc_count * desc_page_size), HAILO_INTERNAL_FAILURE, "Requested buffer size {} must be smaller than {}", size, (desc_count * desc_page_size)); CHECK_AS_EXPECTED((size % desc_page_size) == 0, HAILO_INTERNAL_FAILURE, "SgBuffer size must be a multiple of descriptors page size (size {})", size); - auto mapped_buffer_exp = MappedBufferFactory::create_mapped_buffer(size, - data_direction, driver); - CHECK_EXPECTED(mapped_buffer_exp); + auto mapped_buffer = MappedBuffer::create_shared(driver, data_direction, size); + CHECK_EXPECTED(mapped_buffer); - auto mapped_buffer = make_shared_nothrow(mapped_buffer_exp.release()); - CHECK_NOT_NULL_AS_EXPECTED(mapped_buffer, HAILO_OUT_OF_HOST_MEMORY); - - auto desc_list_exp = DescriptorList::create(desc_count, desc_page_size, driver); + auto desc_list_exp = DescriptorList::create(desc_count, desc_page_size, is_circular, driver); CHECK_EXPECTED(desc_list_exp); auto desc_list = make_shared_nothrow(desc_list_exp.release()); @@ -38,13 +33,13 @@ Expected SgBuffer::create(HailoRTDriver &driver, size_t size, uint32_t assert((desc_count * desc_page_size) <= std::numeric_limits::max()); - auto status = desc_list->configure_to_use_buffer(*mapped_buffer, channel_id); + auto status = desc_list->configure_to_use_buffer(*mapped_buffer.value(), channel_id); CHECK_SUCCESS_AS_EXPECTED(status); - return SgBuffer(mapped_buffer, desc_list); + return SgBuffer(mapped_buffer.release(), desc_list); } -SgBuffer::SgBuffer(std::shared_ptr mapped_buffer, std::shared_ptr desc_list) : +SgBuffer::SgBuffer(std::shared_ptr mapped_buffer, std::shared_ptr desc_list) : m_mapped_buffer(mapped_buffer), m_desc_list(desc_list) {} @@ -69,33 +64,15 @@ uint32_t SgBuffer::descs_count() const return static_cast(m_desc_list->count()); } -uint8_t SgBuffer::depth() const -{ - return m_desc_list->depth(); -} - -std::shared_ptr SgBuffer::get_desc_list() -{ - return m_desc_list; -} - -// TODO: Remove after HRT-7838 -void* SgBuffer::get_user_address() -{ - return m_mapped_buffer->user_address(); -} - -hailo_status SgBuffer::read(void *buf_dst, size_t count, size_t offset, bool should_sync) +hailo_status SgBuffer::read(void *buf_dst, size_t count, size_t offset) { CHECK(count + offset <= m_mapped_buffer->size(), HAILO_INSUFFICIENT_BUFFER); if (count == 0) { return HAILO_SUCCESS; } - if (should_sync) { - const auto status = m_mapped_buffer->synchronize(); - CHECK_SUCCESS(status, "Failed synching SgBuffer buffer on read"); - } + const auto status = m_mapped_buffer->synchronize(offset, count, HailoRTDriver::DmaSyncDirection::TO_HOST); + CHECK_SUCCESS(status, "Failed synching SgBuffer buffer on read"); const auto src_addr = static_cast(m_mapped_buffer->user_address()) + offset; memcpy(buf_dst, src_addr, count); @@ -112,16 +89,16 @@ hailo_status SgBuffer::write(const void *buf_src, size_t count, size_t offset) const auto dst_addr = static_cast(m_mapped_buffer->user_address()) + offset; std::memcpy(dst_addr, buf_src, count); - const auto status = m_mapped_buffer->synchronize(); + const auto status = m_mapped_buffer->synchronize(offset, count, HailoRTDriver::DmaSyncDirection::TO_DEVICE); CHECK_SUCCESS(status, "Failed synching SgBuffer buffer on write"); return HAILO_SUCCESS; } Expected SgBuffer::program_descriptors(size_t transfer_size, InterruptsDomain last_desc_interrupts_domain, - size_t desc_offset, bool is_circular) + size_t desc_offset) { - return m_desc_list->program_last_descriptor(transfer_size, last_desc_interrupts_domain, desc_offset, is_circular); + return m_desc_list->program_last_descriptor(transfer_size, last_desc_interrupts_domain, desc_offset); } hailo_status SgBuffer::reprogram_device_interrupts_for_end_of_batch(size_t transfer_size, uint16_t batch_size, diff --git a/hailort/libhailort/src/vdma/memory/sg_buffer.hpp b/hailort/libhailort/src/vdma/memory/sg_buffer.hpp index 36e2041..bb13157 100644 --- a/hailort/libhailort/src/vdma/memory/sg_buffer.hpp +++ b/hailort/libhailort/src/vdma/memory/sg_buffer.hpp @@ -7,19 +7,18 @@ * @brief Scatter-gather vdma buffer, from the user-mode point of view the buffer is continuous, * but not from the physical-memory point of view. * The sg buffer contains 2 parts: - * - DmaMappedBuffer - the actual buffer stores the data. - * - Descriptors list - each descritpor points to a single "dma page" in the DmaMappedBuffer. + * - MappedBuffer - the actual buffer stores the data. + * - Descriptors list - each descritpor points to a single "dma page" in the MappedBuffer. * The hw accept the descriptors list address and parses it to get the actual data. **/ #ifndef _HAILO_VDMA_SG_BUFFER_HPP_ #define _HAILO_VDMA_SG_BUFFER_HPP_ -#include "hailo/dma_mapped_buffer.hpp" - #include "os/hailort_driver.hpp" #include "vdma/memory/vdma_buffer.hpp" #include "vdma/memory/descriptor_list.hpp" +#include "vdma/memory/mapped_buffer.hpp" namespace hailort { @@ -28,7 +27,7 @@ namespace vdma { class SgBuffer final : public VdmaBuffer { public: static Expected create(HailoRTDriver &driver, size_t size, uint32_t desc_count, uint16_t desc_page_size, - HailoRTDriver::DmaDirection data_direction, vdma::ChannelId channel_id); + bool is_circular, HailoRTDriver::DmaDirection data_direction, vdma::ChannelId channel_id); virtual ~SgBuffer() = default; @@ -46,25 +45,20 @@ public: virtual uint64_t dma_address() const override; virtual uint16_t desc_page_size() const override; virtual uint32_t descs_count() const override; - uint8_t depth() const; - - std::shared_ptr get_desc_list(); - // TODO: Remove after HRT-7838 - void *get_user_address(); - virtual hailo_status read(void *buf_dst, size_t count, size_t offset, bool should_sync) override; + virtual hailo_status read(void *buf_dst, size_t count, size_t offset) override; virtual hailo_status write(const void *buf_src, size_t count, size_t offset) override; virtual Expected program_descriptors(size_t transfer_size, InterruptsDomain last_desc_interrupts_domain, - size_t desc_offset, bool is_circular) override; + size_t desc_offset) override; virtual hailo_status reprogram_device_interrupts_for_end_of_batch(size_t transfer_size, uint16_t batch_size, InterruptsDomain new_interrupts_domain) override; private: - SgBuffer(std::shared_ptr mapped_buffer, std::shared_ptr desc_list); + SgBuffer(std::shared_ptr mapped_buffer, std::shared_ptr desc_list); // Initialization Dependency: The descriptor list points into the mapped buffer so it must be freed before it - std::shared_ptr m_mapped_buffer; + std::shared_ptr m_mapped_buffer; std::shared_ptr m_desc_list; }; diff --git a/hailort/libhailort/src/vdma/memory/vdma_buffer.hpp b/hailort/libhailort/src/vdma/memory/vdma_buffer.hpp index 78171ab..d11393f 100644 --- a/hailort/libhailort/src/vdma/memory/vdma_buffer.hpp +++ b/hailort/libhailort/src/vdma/memory/vdma_buffer.hpp @@ -48,11 +48,11 @@ public: return static_cast(DIV_ROUND_UP(buffer_size, page_size)); } - virtual hailo_status read(void *buf_dst, size_t count, size_t offset, bool should_sync = true) = 0; + virtual hailo_status read(void *buf_dst, size_t count, size_t offset) = 0; virtual hailo_status write(const void *buf_src, size_t count, size_t offset) = 0; virtual Expected program_descriptors(size_t transfer_size, InterruptsDomain last_desc_interrupts_domain, - size_t desc_offset, bool is_circular) = 0; + size_t desc_offset) = 0; virtual hailo_status reprogram_device_interrupts_for_end_of_batch(size_t transfer_size, uint16_t batch_size, InterruptsDomain new_interrupts_domain) = 0; diff --git a/hailort/libhailort/src/vdma/pcie/pcie_device.cpp b/hailort/libhailort/src/vdma/pcie/pcie_device.cpp index 2fe9e93..88fe6b2 100644 --- a/hailort/libhailort/src/vdma/pcie/pcie_device.cpp +++ b/hailort/libhailort/src/vdma/pcie/pcie_device.cpp @@ -52,8 +52,10 @@ Expected> PcieDevice::create() // Take the first device auto scan_result = scan(); CHECK_EXPECTED(scan_result, "Failed scanning pcie devices"); - CHECK_AS_EXPECTED(scan_result->size() == 1, HAILO_INVALID_OPERATION, - "Expected only 1 PCIe device. Pass `hailo_pcie_device_info_t` to create a specific PCIe device"); + CHECK_AS_EXPECTED(scan_result->size() >= 1, HAILO_INVALID_OPERATION, + "There are no PCIe devices on the system"); + + // choose first device return create(scan_result->at(0)); } @@ -62,15 +64,11 @@ Expected> PcieDevice::create(const hailo_pcie_device auto device_info = find_device_info(pcie_device_info); CHECK_EXPECTED(device_info); - auto pcie_device_info_str = pcie_device_info_to_string(pcie_device_info); - CHECK_EXPECTED(pcie_device_info_str); - - auto driver = HailoRTDriver::create(device_info->dev_path); + auto driver = HailoRTDriver::create(*device_info); CHECK_EXPECTED(driver); hailo_status status = HAILO_UNINITIALIZED; - auto device = std::unique_ptr(new (std::nothrow) PcieDevice(driver.release(), pcie_device_info, status, - pcie_device_info_str.release())); + auto device = std::unique_ptr(new (std::nothrow) PcieDevice(driver.release(), status)); CHECK_AS_EXPECTED((nullptr != device), HAILO_OUT_OF_HOST_MEMORY); CHECK_SUCCESS_AS_EXPECTED(status, "Failed creating PcieDevice"); return device; @@ -130,10 +128,16 @@ Expected PcieDevice::pcie_device_info_to_string(const hailo_pcie_de return std::string(device_string); } -PcieDevice::PcieDevice(HailoRTDriver &&driver, const hailo_pcie_device_info_t &device_info, hailo_status &status, - const std::string &device_id) : - VdmaDevice::VdmaDevice(std::move(driver), Device::Type::PCIE, device_id), - m_device_info(device_info) +bool PcieDevice::pcie_device_infos_equal(const hailo_pcie_device_info_t &first, const hailo_pcie_device_info_t &second) +{ + const bool bdf_equal = (first.bus == second.bus) && (first.device == second.device) && (first.func == second.func); + const bool domain_equal = (HAILO_PCIE_ANY_DOMAIN == first.domain) || (HAILO_PCIE_ANY_DOMAIN == second.domain) || + (first.domain == second.domain); + return bdf_equal && domain_equal; +} + +PcieDevice::PcieDevice(HailoRTDriver &&driver, hailo_status &status) : + VdmaDevice::VdmaDevice(std::move(driver), Device::Type::PCIE) { if (driver.is_fw_loaded()) { status = update_fw_state(); @@ -146,8 +150,6 @@ PcieDevice::PcieDevice(HailoRTDriver &&driver, const hailo_pcie_device_info_t &d m_is_control_version_supported = false; } - m_device_id = device_id; - status = HAILO_SUCCESS; } @@ -176,11 +178,6 @@ hailo_status PcieDevice::direct_read_memory(uint32_t address, void *buffer, uint return m_driver.read_memory(HailoRTDriver::MemoryType::DIRECT_MEMORY, address, buffer, size); } -const char *PcieDevice::get_dev_id() const -{ - return m_device_id.c_str(); -} - hailo_status PcieDevice::reset_impl(CONTROL_PROTOCOL__reset_type_t reset_type) { hailo_status status = HAILO_UNINITIALIZED; @@ -210,7 +207,7 @@ hailo_status PcieDevice::reset_impl(CONTROL_PROTOCOL__reset_type_t reset_type) // TODO: fix logic with respect to is_expecting_response, implement wait_for_wakeup(); if (HAILO_SUCCESS == status) { status = Control::parse_and_validate_response(response_buffer, (uint32_t)(response_size), &header, - &payload, &request); + &payload, &request, *this); CHECK_SUCCESS(status); CHECK(is_expecting_response, HAILO_INTERNAL_FAILURE, "Recived valid response from FW for control who is not expecting one."); } else if ((HAILO_FW_CONTROL_FAILURE == status) && (!is_expecting_response)){ diff --git a/hailort/libhailort/src/vdma/pcie/pcie_device.hpp b/hailort/libhailort/src/vdma/pcie/pcie_device.hpp index ed909e0..6bca191 100644 --- a/hailort/libhailort/src/vdma/pcie/pcie_device.hpp +++ b/hailort/libhailort/src/vdma/pcie/pcie_device.hpp @@ -30,6 +30,7 @@ public: static Expected parse_pcie_device_info(const std::string &device_info_str, bool log_on_failure); static Expected pcie_device_info_to_string(const hailo_pcie_device_info_t &device_info); + static bool pcie_device_infos_equal(const hailo_pcie_device_info_t &first, const hailo_pcie_device_info_t &second); virtual ~PcieDevice() = default; @@ -55,20 +56,10 @@ public: void set_is_control_version_supported(bool value); virtual Expected get_architecture() const override; - const hailo_pcie_device_info_t get_device_info() const - { - return m_device_info; - } - virtual const char* get_dev_id() const override; - private: - PcieDevice(HailoRTDriver &&driver, const hailo_pcie_device_info_t &device_info, hailo_status &status, - const std::string &device_id); + PcieDevice(HailoRTDriver &&driver, hailo_status &status); static Expected find_device_info(const hailo_pcie_device_info_t &pcie_device_info); - - const hailo_pcie_device_info_t m_device_info; - std::string m_device_id; }; } /* namespace hailort */ diff --git a/hailort/libhailort/src/vdma/vdma_async_stream.cpp b/hailort/libhailort/src/vdma/vdma_async_stream.cpp index 1b55e27..22392f2 100644 --- a/hailort/libhailort/src/vdma/vdma_async_stream.cpp +++ b/hailort/libhailort/src/vdma/vdma_async_stream.cpp @@ -10,6 +10,7 @@ #include "hailo/hailort_common.hpp" #include "vdma/vdma_async_stream.hpp" +#include "common/os_utils.hpp" namespace hailort @@ -27,27 +28,47 @@ VdmaAsyncInputStream::VdmaAsyncInputStream(VdmaDevice &device, vdma::BoundaryCha return; } + if (channel->type() != vdma::BoundaryChannel::Type::ASYNC) { + LOGGER__ERROR("Can't create a async vdma stream with a non async channel. Received channel type {}", channel->type()); + status = HAILO_INVALID_ARGUMENT; + return; + } + status = HAILO_SUCCESS; } -Expected VdmaAsyncInputStream::sync_write_raw_buffer(const MemoryView &) +hailo_status VdmaAsyncInputStream::wait_for_async_ready(size_t transfer_size, std::chrono::milliseconds timeout) +{ + const bool STOP_IF_DEACTIVATED = true; + return m_channel->wait(transfer_size, timeout, STOP_IF_DEACTIVATED); +} + +Expected VdmaAsyncInputStream::get_async_max_queue_size() const { - return make_unexpected(HAILO_NOT_IMPLEMENTED); + return get_buffer_frames_size(); } -hailo_status VdmaAsyncInputStream::sync_write_all_raw_buffer_no_transform_impl(void *, size_t, size_t) +hailo_status VdmaAsyncInputStream::write_buffer_only(const MemoryView &, const std::function &) { - return HAILO_NOT_IMPLEMENTED; + LOGGER__ERROR("The write_buffer_only function is not supported by async streams"); + return HAILO_INVALID_OPERATION; } -hailo_status VdmaAsyncInputStream::wait_for_ready(size_t transfer_size, std::chrono::milliseconds timeout) +hailo_status VdmaAsyncInputStream::send_pending_buffer(const device_id_t &) { - return m_channel->wait(transfer_size, timeout); + LOGGER__ERROR("The send_pending_buffer function is not supported by async streams"); + return HAILO_INVALID_OPERATION; } -hailo_status VdmaAsyncInputStream::write_async(std::shared_ptr buffer, const TransferDoneCallback &user_callback, void *opaque) +hailo_status VdmaAsyncInputStream::write_async(TransferRequest &&transfer_request) { - return m_channel->transfer(buffer, user_callback, opaque); + return m_channel->transfer_async(std::move(transfer_request)); +} + +hailo_status VdmaAsyncInputStream::write_impl(const MemoryView &) +{ + LOGGER__ERROR("Sync write is not supported by async streams"); + return HAILO_INVALID_OPERATION; } /** Output stream **/ @@ -64,27 +85,246 @@ VdmaAsyncOutputStream::VdmaAsyncOutputStream(VdmaDevice &device, vdma::BoundaryC return; } + if (channel->type() != vdma::BoundaryChannel::Type::ASYNC) { + LOGGER__ERROR("Can't create an async vdma stream with a non async channel. Received channel type {}", channel->type()); + status = HAILO_INVALID_ARGUMENT; + return; + } + + status = HAILO_SUCCESS; +} + +hailo_status VdmaAsyncOutputStream::wait_for_async_ready(size_t transfer_size, std::chrono::milliseconds timeout) +{ + const bool STOP_IF_DEACTIVATED = true; + return m_channel->wait(transfer_size, timeout, STOP_IF_DEACTIVATED); +} + +Expected VdmaAsyncOutputStream::get_async_max_queue_size() const +{ + return get_buffer_frames_size(); +} + +hailo_status VdmaAsyncOutputStream::read_impl(MemoryView &) +{ + LOGGER__ERROR("Sync read is not supported by async streams"); + return HAILO_INVALID_OPERATION; +} + +hailo_status VdmaAsyncOutputStream::read_async(TransferRequest &&transfer_request) +{ + return m_channel->transfer_async(std::move(transfer_request)); +} + +/** Output nms stream **/ +VdmaAsyncOutputNmsStream::VdmaAsyncOutputNmsStream(VdmaDevice &device, vdma::BoundaryChannelPtr channel, + const LayerInfo &edge_layer, EventPtr core_op_activated_event, + uint16_t batch_size, std::chrono::milliseconds transfer_timeout, + hailo_stream_interface_t interface, hailo_status &status) : + VdmaOutputStreamBase(device, channel, edge_layer, core_op_activated_event, batch_size, + transfer_timeout, interface, status), + m_queue_max_size(channel->get_transfers_count_in_buffer(get_info().hw_frame_size)), + m_queue_mutex(), + m_abort_mutex(), + m_queue_cond(), + m_queue(), + m_stream_aborted(false), + m_should_quit(false), + m_worker_thread([this] { process_transfer_requests(); }) +{ + // Check status for base class c'tor + if (HAILO_SUCCESS != status) { + return; + } + + if (edge_layer.format.order != HAILO_FORMAT_ORDER_HAILO_NMS) { + // This shouldn't happen + LOGGER__ERROR("Can't create NMS vdma async output stream if edge layer order isn't NMS. Order received {}", + edge_layer.format.order); + status = HAILO_INTERNAL_FAILURE; + return; + } + + // TODO: after adding NMS single int, we can create an async channel for async nms output stream (HRT-10553) + if (channel->type() != vdma::BoundaryChannel::Type::BUFFERED) { + LOGGER__ERROR("Can't create an async nms vdma stream with a non buffered channel. Received channel type {}", channel->type()); + status = HAILO_INVALID_ARGUMENT; + return; + } + status = HAILO_SUCCESS; } -Expected VdmaAsyncOutputStream::sync_read_raw_buffer(MemoryView &) +VdmaAsyncOutputNmsStream::~VdmaAsyncOutputNmsStream() +{ + // VdmaAsyncOutputNmsStream::deactivate_stream() calls VdmaOutputStreamBase::deactivate_stream(). + // Because this dtor (i.e. ~VdmaAsyncOutputNmsStream()) is called before ~VdmaOutputStreamBase(), calling + // VdmaOutputStreamBase::deactivate_stream() inside VdmaAsyncOutputNmsStream::deactivate_stream() will work. + if (this->is_stream_activated) { + const auto status = deactivate_stream(); + if (HAILO_SUCCESS != status) { + LOGGER__ERROR("Failed to deactivate stream with error status {}", status); + } + } + + if (m_worker_thread.joinable()) { + signal_thread_quit(); + m_worker_thread.join(); + } +} + +hailo_status VdmaAsyncOutputNmsStream::wait_for_async_ready(size_t transfer_size, std::chrono::milliseconds timeout) { - return make_unexpected(HAILO_NOT_IMPLEMENTED); + CHECK(transfer_size == get_info().hw_frame_size, HAILO_INSUFFICIENT_BUFFER, + "On nms stream transfer_size should be {} (given size {})", get_info().hw_frame_size, transfer_size); + std::unique_lock lock(m_queue_mutex); + auto result = m_queue_cond.wait_for(lock, timeout, + [&]{ return m_should_quit || m_stream_aborted || (m_queue.size() < m_queue_max_size); }); + if (result) { + if (m_should_quit) { + return HAILO_STREAM_NOT_ACTIVATED; + } + return m_stream_aborted ? HAILO_STREAM_ABORTED_BY_USER : HAILO_SUCCESS; + } + return HAILO_TIMEOUT; +} + +Expected VdmaAsyncOutputNmsStream::get_async_max_queue_size() const +{ + return Expected(m_queue_max_size); +} + +hailo_status VdmaAsyncOutputNmsStream::read_async(TransferRequest &&transfer_request) +{ + { + std::lock_guard lock(m_queue_mutex); + CHECK(!m_stream_aborted, HAILO_STREAM_ABORTED_BY_USER); + CHECK(m_queue.size() < m_queue_max_size, HAILO_QUEUE_IS_FULL, "No space left in nms queue"); + + m_queue.emplace(std::move(transfer_request)); + } + m_queue_cond.notify_one(); + return HAILO_SUCCESS; +} + +hailo_status VdmaAsyncOutputNmsStream::read(MemoryView /* buffer */) +{ + // We need to override read() since VdmaAsyncOutputNmsStream impl's read_impl. This will cause read() to succeed, + // however this isn't desired for async streams. + LOGGER__ERROR("The read function is not supported by async streams"); + return HAILO_INVALID_OPERATION; +} + +hailo_status VdmaAsyncOutputNmsStream::abort() +{ + std::unique_lock lock(m_abort_mutex); + const auto status = VdmaOutputStreamBase::abort(); + CHECK_SUCCESS(status); + + m_stream_aborted = true; + + return HAILO_SUCCESS; +} + +hailo_status VdmaAsyncOutputNmsStream::clear_abort() +{ + std::unique_lock lock(m_abort_mutex); + const auto status = VdmaOutputStreamBase::clear_abort(); + CHECK_SUCCESS(status); + + m_stream_aborted = false; + + return HAILO_SUCCESS; +} + +hailo_status VdmaAsyncOutputNmsStream::read_impl(MemoryView &buffer) +{ + CHECK((buffer.size() % HailoRTCommon::HW_DATA_ALIGNMENT) == 0, HAILO_INVALID_ARGUMENT, + "Size must be aligned to {} (got {})", HailoRTCommon::HW_DATA_ALIGNMENT, buffer.size()); + + return m_channel->transfer_sync(buffer.data(), buffer.size(), m_transfer_timeout); +} + +hailo_status VdmaAsyncOutputNmsStream::deactivate_stream() +{ + std::unique_lock lock(m_queue_mutex); + + // abort is called because read_nms may block on a non-aborted channel + auto status = abort(); + CHECK_SUCCESS(status); + + // Now for every transfer processed in process_transfer_requests(), we'll pass HAILO_STREAM_ABORTED_BY_USER to the + // callback. + status = VdmaOutputStreamBase::deactivate_stream(); + CHECK_SUCCESS(status); + + // Block until all transfers have been emptied from the queue + auto result = m_queue_cond.wait_for(lock, m_transfer_timeout, [&]{ return m_queue.empty(); }); + CHECK(result, HAILO_TIMEOUT, "Timeout while deactivating async nms output stream"); + + return HAILO_SUCCESS; } -hailo_status VdmaAsyncOutputStream::read_all(MemoryView &) +hailo_status VdmaAsyncOutputNmsStream::activate_stream(uint16_t dynamic_batch_size, bool resume_pending_stream_transfers) { - return HAILO_NOT_IMPLEMENTED; + std::unique_lock lock(m_queue_mutex); + auto status = VdmaOutputStreamBase::activate_stream(dynamic_batch_size, resume_pending_stream_transfers); + CHECK_SUCCESS(status); + + status = clear_abort(); + CHECK_SUCCESS(status); + + return HAILO_SUCCESS; } -hailo_status VdmaAsyncOutputStream::wait_for_ready(size_t transfer_size, std::chrono::milliseconds timeout) +Expected VdmaAsyncOutputNmsStream::get_buffer_frames_size() const { - return m_channel->wait(transfer_size, timeout); + return Expected(m_queue_max_size); } -hailo_status VdmaAsyncOutputStream::read_async(std::shared_ptr buffer, const TransferDoneCallback &user_callback, void *opaque) +void VdmaAsyncOutputNmsStream::signal_thread_quit() { - return m_channel->transfer(buffer, user_callback, opaque); + { + std::unique_lock lock(m_queue_mutex); + m_should_quit = true; + } + m_queue_cond.notify_all(); +} + +void VdmaAsyncOutputNmsStream::process_transfer_requests() +{ + static const size_t FROM_START_OF_BUFFER = 0; + OsUtils::set_current_thread_name("ASYNC_NMS"); + + while (true) { + std::unique_lock lock(m_queue_mutex); + m_queue_cond.wait(lock, [&]{ return m_should_quit || !m_queue.empty(); }); + if (m_should_quit) { + break; + } + + auto transfer_request = m_queue.front(); + m_queue.pop(); + + lock.unlock(); + auto status = read_nms(transfer_request.buffer.data(), FROM_START_OF_BUFFER, transfer_request.buffer.size()); + lock.lock(); + + if (!this->is_stream_activated) { + LOGGER__TRACE("Stream is not active (previous status {})", status); + transfer_request.callback(HAILO_STREAM_ABORTED_BY_USER); + } else if (status != HAILO_SUCCESS) { + // TODO: timeout? stream aborted? (HRT-10513) + transfer_request.callback(status); + } else { + transfer_request.callback(HAILO_SUCCESS); + } + + lock.unlock(); + + // We notify after calling the callback, so that deactivate_stream() will block until the queue is empty + all callbacks have been called + m_queue_cond.notify_one(); + } } } /* namespace hailort */ diff --git a/hailort/libhailort/src/vdma/vdma_async_stream.hpp b/hailort/libhailort/src/vdma/vdma_async_stream.hpp index 5086c55..eb48c17 100644 --- a/hailort/libhailort/src/vdma/vdma_async_stream.hpp +++ b/hailort/libhailort/src/vdma/vdma_async_stream.hpp @@ -17,6 +17,12 @@ #include "vdma/vdma_stream_base.hpp" #include "vdma/vdma_device.hpp" #include "vdma/channel/async_channel.hpp" +#include "vdevice/scheduler/scheduled_core_op_state.hpp" + +#include +#include +#include +#include namespace hailort @@ -31,12 +37,16 @@ public: hailo_status &status); virtual ~VdmaAsyncInputStream() = default; - virtual hailo_status wait_for_ready(size_t transfer_size, std::chrono::milliseconds timeout) override; - virtual hailo_status write_async(std::shared_ptr buffer, const TransferDoneCallback &user_callback, void *opaque); + virtual hailo_status wait_for_async_ready(size_t transfer_size, std::chrono::milliseconds timeout) override; + virtual Expected get_async_max_queue_size() const override; -private: - virtual Expected sync_write_raw_buffer(const MemoryView &buffer) override; - virtual hailo_status sync_write_all_raw_buffer_no_transform_impl(void *buffer, size_t offset, size_t size) override; + virtual hailo_status write_buffer_only(const MemoryView &buffer, const std::function &should_cancel) override; + virtual hailo_status send_pending_buffer(const device_id_t &device_id) override; + + virtual hailo_status write_async(TransferRequest &&transfer_request) override; + +protected: + virtual hailo_status write_impl(const MemoryView &buffer) override; }; class VdmaAsyncOutputStream : public VdmaOutputStreamBase @@ -48,14 +58,53 @@ public: hailo_status &status); virtual ~VdmaAsyncOutputStream() = default; - virtual hailo_status wait_for_ready(size_t transfer_size, std::chrono::milliseconds timeout) override; - virtual hailo_status read_async(std::shared_ptr buffer, const TransferDoneCallback &user_callback, void *opaque = nullptr) override; + virtual hailo_status wait_for_async_ready(size_t transfer_size, std::chrono::milliseconds timeout) override; + virtual Expected get_async_max_queue_size() const override; -private: - virtual Expected sync_read_raw_buffer(MemoryView &buffer); - virtual hailo_status read_all(MemoryView &buffer) override; +protected: + virtual hailo_status read_impl(MemoryView &buffer) override; + virtual hailo_status read_async(TransferRequest &&transfer_request) override; }; +// NMS requires multiple reads from the device + parsing the output. Hence, a background thread is needed. +// This class opens a worker thread that processes nms transfers, signalling the user's callback upon completion. +// read_async adds transfer requests to a producer-consumer queue +class VdmaAsyncOutputNmsStream : public VdmaOutputStreamBase +{ +public: + VdmaAsyncOutputNmsStream(VdmaDevice &device, vdma::BoundaryChannelPtr channel, const LayerInfo &edge_layer, + EventPtr core_op_activated_event, uint16_t batch_size, + std::chrono::milliseconds transfer_timeout, hailo_stream_interface_t interface, + hailo_status &status); + virtual ~VdmaAsyncOutputNmsStream(); + + virtual hailo_status wait_for_async_ready(size_t transfer_size, std::chrono::milliseconds timeout) override; + virtual Expected get_async_max_queue_size() const override; + virtual hailo_status read(MemoryView buffer) override; + virtual hailo_status abort() override; + virtual hailo_status clear_abort() override; + +private: + virtual hailo_status read_impl(MemoryView &buffer) override; + virtual hailo_status read_async(TransferRequest &&transfer_request) override; + virtual hailo_status deactivate_stream() override; + virtual hailo_status activate_stream(uint16_t dynamic_batch_size, bool resume_pending_stream_transfers) override; + virtual Expected get_buffer_frames_size() const override; + + void signal_thread_quit(); + void process_transfer_requests(); + + // TODO: use SpscQueue (HRT-10554) + const size_t m_queue_max_size; + std::mutex m_queue_mutex; + std::mutex m_abort_mutex; + std::condition_variable m_queue_cond; + std::queue m_queue; + std::atomic_bool m_stream_aborted; + // m_should_quit is used to quit the thread (called on destruction) + bool m_should_quit; + std::thread m_worker_thread; +}; } /* namespace hailort */ diff --git a/hailort/libhailort/src/vdma/vdma_config_core_op.cpp b/hailort/libhailort/src/vdma/vdma_config_core_op.cpp index 88c0968..c0f8020 100644 --- a/hailort/libhailort/src/vdma/vdma_config_core_op.cpp +++ b/hailort/libhailort/src/vdma/vdma_config_core_op.cpp @@ -40,7 +40,7 @@ hailo_status VdmaConfigCoreOp::activate_impl(uint16_t dynamic_batch_size, bool r m_active_core_op_holder.set(*this); - status = m_resources_manager->set_inter_context_channels_dynamic_batch_size(dynamic_batch_size); + status = m_resources_manager->set_dynamic_batch_size(dynamic_batch_size); CHECK_SUCCESS(status, "Failed to set inter-context channels dynamic batch size."); status = m_resources_manager->enable_state_machine(dynamic_batch_size); @@ -165,4 +165,9 @@ Expected VdmaConfigCoreOp::get_boundary_vdma_channel_b return m_resources_manager->get_boundary_vdma_channel_by_stream_name(stream_name); } +Expected VdmaConfigCoreOp::run_hw_infer_estimator() +{ + return m_resources_manager->run_hw_only_infer(); +} + } /* namespace hailort */ diff --git a/hailort/libhailort/src/vdma/vdma_config_core_op.hpp b/hailort/libhailort/src/vdma/vdma_config_core_op.hpp index 740d4a3..821ff92 100644 --- a/hailort/libhailort/src/vdma/vdma_config_core_op.hpp +++ b/hailort/libhailort/src/vdma/vdma_config_core_op.hpp @@ -73,6 +73,7 @@ public: virtual hailo_status set_scheduler_timeout(const std::chrono::milliseconds &timeout, const std::string &network_name) override; virtual hailo_status set_scheduler_threshold(uint32_t threshold, const std::string &network_name) override; virtual hailo_status set_scheduler_priority(uint8_t priority, const std::string &network_name) override; + virtual Expected run_hw_infer_estimator() override; virtual ~VdmaConfigCoreOp() = default; VdmaConfigCoreOp(const VdmaConfigCoreOp &other) = delete; diff --git a/hailort/libhailort/src/vdma/vdma_config_manager.cpp b/hailort/libhailort/src/vdma/vdma_config_manager.cpp index 0bf8d4f..1d96c51 100644 --- a/hailort/libhailort/src/vdma/vdma_config_manager.cpp +++ b/hailort/libhailort/src/vdma/vdma_config_manager.cpp @@ -14,7 +14,7 @@ namespace hailort { hailo_status VdmaConfigManager::switch_core_op(std::shared_ptr current_active_core_op, - std::shared_ptr next_core_op, const uint16_t batch_size, bool resume_pending_stream_transfers) + std::shared_ptr next_core_op, const uint16_t batch_size, const bool resume_pending_stream_transfers) { static const auto RESET_NN_CONFIG = false; CHECK((nullptr != current_active_core_op) || (nullptr != next_core_op), HAILO_INVALID_ARGUMENT); @@ -33,6 +33,7 @@ hailo_status VdmaConfigManager::switch_core_op(std::shared_ptr auto status = current_active_core_op->deactivate_host_resources(); CHECK_SUCCESS(status, "Failed deactivating current core-op"); + // TODO HRT-10799 Fix when enabling batch switch flow for hailo15 // TODO: In mercury we need to reset after deactivate. This will be fixed in MSW-762 and the "if" will be removed // when we make the nn_manager responsible to reset the nn-core. if (Device::Type::INTEGRATED == current_active_core_op->get_resources_manager()->get_device().get_type()) { @@ -52,4 +53,12 @@ hailo_status VdmaConfigManager::switch_core_op(std::shared_ptr return HAILO_SUCCESS; } +hailo_status VdmaConfigManager::deactivate_core_op(std::shared_ptr current_active_core_op) +{ + static const auto RESUME_PENDING_STREAM_TRANSFERS = true; + static const uint16_t DEACTIVATE_BATCH_SIZE = 0; + const std::shared_ptr DEACTIVATE_NEXT_CORE_OP = nullptr; + return switch_core_op(current_active_core_op, DEACTIVATE_NEXT_CORE_OP, DEACTIVATE_BATCH_SIZE, RESUME_PENDING_STREAM_TRANSFERS); +} + } /* namespace hailort */ diff --git a/hailort/libhailort/src/vdma/vdma_config_manager.hpp b/hailort/libhailort/src/vdma/vdma_config_manager.hpp index c42b6a8..c20b1e0 100644 --- a/hailort/libhailort/src/vdma/vdma_config_manager.hpp +++ b/hailort/libhailort/src/vdma/vdma_config_manager.hpp @@ -27,7 +27,9 @@ public: VdmaConfigManager() = delete; static hailo_status switch_core_op(std::shared_ptr current_active_core_op, - std::shared_ptr next_core_op, const uint16_t batch_size, bool resume_pending_stream_transfers); + std::shared_ptr next_core_op, const uint16_t batch_size, const bool resume_pending_stream_transfers); + + static hailo_status deactivate_core_op(std::shared_ptr current_active_core_op); }; } /* namespace hailort */ diff --git a/hailort/libhailort/src/vdma/vdma_device.cpp b/hailort/libhailort/src/vdma/vdma_device.cpp index c560a65..a3791a5 100644 --- a/hailort/libhailort/src/vdma/vdma_device.cpp +++ b/hailort/libhailort/src/vdma/vdma_device.cpp @@ -31,11 +31,11 @@ static constexpr std::chrono::milliseconds DEFAULT_TIMEOUT(1000); static constexpr std::chrono::milliseconds DEFAULT_TIMEOUT(50000); #endif /* ifndef HAILO_EMULATOR */ -VdmaDevice::VdmaDevice(HailoRTDriver &&driver, Device::Type type, const std::string &device_id) : +VdmaDevice::VdmaDevice(HailoRTDriver &&driver, Device::Type type) : DeviceBase::DeviceBase(type), m_driver(std::move(driver)), m_is_configured(false) { - activate_notifications(device_id); + activate_notifications(get_dev_id()); } Expected> VdmaDevice::create(const std::string &device_id) @@ -109,6 +109,26 @@ hailo_status VdmaDevice::fw_interact_impl(uint8_t *request_buffer, size_t reques return HAILO_SUCCESS; } +hailo_status VdmaDevice::clear_configured_apps() +{ + static const auto DONT_KEEP_NN_CONFIG_DURING_RESET = false; + auto status = Control::reset_context_switch_state_machine(*this, DONT_KEEP_NN_CONFIG_DURING_RESET); + CHECK_SUCCESS(status); + + // In case of mercury need to reset nn core before activating network group to clear prior nn core state + if (Device::Type::INTEGRATED == get_type()) { + // On core device, the nn_manager is not responsible to reset the nn-core so + // we use the SCU control for that. + status = m_driver.reset_nn_core(); + CHECK_SUCCESS(status); + } + + status = Control::clear_configured_apps(*this); + CHECK_SUCCESS(status, "Failed to clear configured network groups with status {}", status); + + return HAILO_SUCCESS; +} + Expected VdmaDevice::add_hef(Hef &hef, const NetworkGroupsParamsMap &configure_params) { auto status = mark_as_used(); @@ -118,21 +138,9 @@ Expected VdmaDevice::add_hef(Hef &hef, const Netwo // TODO: Do we need this control after fixing HRT-7519? // Reset context_switch state machine - it may have been in an active state if a previous VdmaDevice // wasn't dtor'd (due to SIGKILL for example) - static const auto REMOVE_NN_CONFIG_DURING_RESET = false; - status = Control::reset_context_switch_state_machine(*this, REMOVE_NN_CONFIG_DURING_RESET); + status = clear_configured_apps(); CHECK_SUCCESS_AS_EXPECTED(status); - // In case of mercury need to reset nn core before activating network group to clear prior nn core state - if (Device::Type::INTEGRATED == get_type()) { - // On core device, the nn_manager is not responsible to reset the nn-core so - // we use the SCU control for that. - status = reset(HAILO_RESET_DEVICE_MODE_NN_CORE); - CHECK_SUCCESS_AS_EXPECTED(status); - } - - status = Control::clear_configured_apps(*this); - CHECK_SUCCESS_AS_EXPECTED(status, "Failed to clear configured network groups with status {}", status); - assert(nullptr == m_vdma_interrupts_dispatcher); auto interrupts_dispatcher = vdma::InterruptsDispatcher::create(std::ref(m_driver)); CHECK_EXPECTED(interrupts_dispatcher); @@ -185,8 +193,8 @@ Expected> VdmaDevice::create_configured_ m_core_ops.emplace_back(core_op_ptr); // TODO: HRT-8875 - auto net_flow_ops = hef.pimpl->post_process_ops(core_op_metadata->core_op_name()); - auto network_group_expected = ConfiguredNetworkGroupBase::create(config_params, std::move(core_ops), std::move(net_flow_ops)); + auto metadata = hef.pimpl->network_group_metadata(core_op_metadata->core_op_name()); + auto network_group_expected = ConfiguredNetworkGroupBase::create(config_params, std::move(core_ops), std::move(metadata)); CHECK_EXPECTED(network_group_expected); auto network_group_ptr = network_group_expected.release(); @@ -215,11 +223,6 @@ hailo_reset_device_mode_t VdmaDevice::get_default_reset_mode() return HAILO_RESET_DEVICE_MODE_SOFT; } -uint16_t VdmaDevice::get_default_desc_page_size() const -{ - return m_driver.calc_desc_page_size(vdma::DEFAULT_DESC_PAGE_SIZE); -} - hailo_status VdmaDevice::mark_as_used() { return m_driver.mark_as_used(); @@ -238,9 +241,9 @@ VdmaDevice::~VdmaDevice() LOGGER__WARNING("Stopping notification thread ungracefully"); } if (m_is_configured) { - status = Control::clear_configured_apps(*this); + status = clear_configured_apps(); if (HAILO_SUCCESS != status) { - LOGGER__ERROR("Failed to clear configured core-ops with status {}", status); + LOGGER__WARNING("clear configured apps ended with status {}", status); } } } @@ -334,10 +337,7 @@ Expected>> VdmaDevice::create_core_o // TODO: decide about core_op names - align with the Compiler auto core_op_metadata = hef.pimpl->get_core_op_metadata(network_group_name, partial_clusters_layout_bitmap); CHECK_EXPECTED(core_op_metadata); - - auto core_op_metadata_ptr = make_shared_nothrow(core_op_metadata.release()); - CHECK_AS_EXPECTED(nullptr != core_op_metadata_ptr, HAILO_OUT_OF_HOST_MEMORY); - core_ops_metadata_ptrs.emplace_back(core_op_metadata_ptr); + core_ops_metadata_ptrs.emplace_back(core_op_metadata.release()); } return core_ops_metadata_ptrs; diff --git a/hailort/libhailort/src/vdma/vdma_device.hpp b/hailort/libhailort/src/vdma/vdma_device.hpp index 5aea085..6d5eea7 100644 --- a/hailort/libhailort/src/vdma/vdma_device.hpp +++ b/hailort/libhailort/src/vdma/vdma_device.hpp @@ -32,19 +32,24 @@ public: virtual hailo_status wait_for_wakeup() override; virtual void increment_control_sequence() override; virtual hailo_reset_device_mode_t get_default_reset_mode() override; - uint16_t get_default_desc_page_size() const; - hailo_status mark_as_used(); virtual Expected read_log(MemoryView &buffer, hailo_cpu_id_t cpu_id) override; - HailoRTDriver &get_driver() { + HailoRTDriver &get_driver() + { return std::ref(m_driver); }; + virtual const char* get_dev_id() const override final + { + // m_driver.device_id() is reference. Hence, returning c_str is safe. + return m_driver.device_id().c_str(); + }; + ExpectedRef get_vdma_interrupts_dispatcher(); protected: - VdmaDevice(HailoRTDriver &&driver, Type type, const std::string &device_id); + VdmaDevice(HailoRTDriver &&driver, Type type); virtual Expected read_notification() override; virtual hailo_status disable_notifications() override; @@ -55,7 +60,7 @@ protected: HailoRTDriver m_driver; std::vector> m_core_ops; std::vector> m_network_groups; // TODO: HRT-9547 - Remove when ConfiguredNetworkGroup will be kept in global context - + // The vdma interrupts dispatcher contains a callback with a reference to the current activated network group // (reference to the ResourcesManager). Hence, it must be destructed before the networks groups are destructed. std::unique_ptr m_vdma_interrupts_dispatcher; @@ -68,6 +73,7 @@ private: std::vector> &core_ops, Hef &hef, const ConfigureNetworkParams &config_params, uint8_t network_group_index); + hailo_status clear_configured_apps(); Expected create_networks_group_vector(Hef &hef, const NetworkGroupsParamsMap &configure_params); Expected>> create_core_ops_metadata(Hef &hef, const std::string &network_group_name, uint32_t partial_clusters_layout_bitmap); diff --git a/hailort/libhailort/src/vdma/vdma_stream.cpp b/hailort/libhailort/src/vdma/vdma_stream.cpp index 59ab85b..b3f387c 100644 --- a/hailort/libhailort/src/vdma/vdma_stream.cpp +++ b/hailort/libhailort/src/vdma/vdma_stream.cpp @@ -27,30 +27,18 @@ VdmaInputStream::VdmaInputStream(VdmaDevice &device, vdma::BoundaryChannelPtr ch return; } + if (channel->type() != vdma::BoundaryChannel::Type::BUFFERED) { + LOGGER__ERROR("Can't create a vdma stream with a non buffered channel. Received channel type {}", channel->type()); + status = HAILO_INVALID_ARGUMENT; + return; + } + status = HAILO_SUCCESS; } -Expected VdmaInputStream::sync_write_raw_buffer(const MemoryView &buffer) +hailo_status VdmaInputStream::write_impl(const MemoryView &buffer) { - hailo_status status = HAILO_UNINITIALIZED; - - status = m_channel->wait(buffer.size(), m_channel_timeout); - if ((status == HAILO_STREAM_ABORTED_BY_USER) || (status == HAILO_STREAM_NOT_ACTIVATED)) { - return make_unexpected(status); - } - CHECK_AS_EXPECTED(HAILO_TIMEOUT != status, HAILO_TIMEOUT, - "{} (H2D) failed with status={} (timeout={}ms)", name(), HAILO_TIMEOUT, m_channel_timeout.count()); - CHECK_SUCCESS_AS_EXPECTED(status); - - status = m_channel->transfer((void*)buffer.data(), buffer.size()); - if ((status == HAILO_STREAM_ABORTED_BY_USER) || (status == HAILO_STREAM_NOT_ACTIVATED)) { - return make_unexpected(status); - } - CHECK_AS_EXPECTED(HAILO_TIMEOUT != status, HAILO_TIMEOUT, - "{} (H2D) failed with status={} (timeout={}ms)", name(), HAILO_TIMEOUT, m_channel_timeout.count()); - CHECK_SUCCESS_AS_EXPECTED(status); - - return buffer.size(); + return m_channel->transfer_sync((void*)buffer.data(), buffer.size(), m_channel_timeout); } hailo_status VdmaInputStream::write_buffer_only(const MemoryView &buffer, @@ -60,10 +48,10 @@ hailo_status VdmaInputStream::write_buffer_only(const MemoryView &buffer, return m_channel->write_buffer(buffer, m_channel_timeout, should_cancel); } -hailo_status VdmaInputStream::send_pending_buffer(size_t device_index) +hailo_status VdmaInputStream::send_pending_buffer(const device_id_t &device_id) { + (void)device_id; std::unique_lock lock(m_send_pending_mutex); - CHECK(0 == device_index, HAILO_INVALID_OPERATION); hailo_status status = m_channel->wait(get_frame_size(), m_channel_timeout); if ((HAILO_STREAM_ABORTED_BY_USER == status) || (HAILO_STREAM_NOT_ACTIVATED == status)) { return status; @@ -75,60 +63,34 @@ hailo_status VdmaInputStream::send_pending_buffer(size_t device_index) return m_channel->send_pending_buffer(); } -hailo_status VdmaInputStream::sync_write_all_raw_buffer_no_transform_impl(void *buffer, size_t offset, size_t size) -{ - ASSERT(NULL != buffer); - - return sync_write_raw_buffer(MemoryView(static_cast(buffer) + offset, size)).status(); -} - /** Output stream **/ VdmaOutputStream::VdmaOutputStream(VdmaDevice &device, vdma::BoundaryChannelPtr channel, const LayerInfo &edge_layer, EventPtr core_op_activated_event, uint16_t batch_size, std::chrono::milliseconds transfer_timeout, hailo_stream_interface_t interface, hailo_status &status) : - VdmaOutputStreamBase(device, channel, edge_layer, core_op_activated_event, batch_size, transfer_timeout, interface, status), - m_read_mutex() + VdmaOutputStreamBase(device, channel, edge_layer, core_op_activated_event, batch_size, transfer_timeout, interface, status) { // Check status for base class c'tor if (HAILO_SUCCESS != status) { return; } - status = HAILO_SUCCESS; -} - -Expected VdmaOutputStream::sync_read_raw_buffer(MemoryView &buffer) -{ - hailo_status status = HAILO_UNINITIALIZED; - - status = m_channel->wait(buffer.size(), m_transfer_timeout); - if ((status == HAILO_STREAM_ABORTED_BY_USER) || (status == HAILO_STREAM_NOT_ACTIVATED)) { - return make_unexpected(status); - } - CHECK_AS_EXPECTED(HAILO_TIMEOUT != status, HAILO_TIMEOUT, - "{} (D2H) failed with status={} (timeout={}ms)", name(), HAILO_TIMEOUT, m_transfer_timeout.count()); - CHECK_SUCCESS_AS_EXPECTED(status); - - status = m_channel->transfer(buffer.data(), buffer.size()); - if ((status == HAILO_STREAM_NOT_ACTIVATED) || (status == HAILO_STREAM_ABORTED_BY_USER)) { - return make_unexpected(status); + if (channel->type() != vdma::BoundaryChannel::Type::BUFFERED) { + LOGGER__ERROR("Can't create a vdma stream with a non buffered channel. Received channel type {}", channel->type()); + status = HAILO_INVALID_ARGUMENT; + return; } - CHECK_AS_EXPECTED(HAILO_TIMEOUT != status, HAILO_TIMEOUT, - "{} (D2H) failed with status={} (timeout={}ms)", name(), HAILO_TIMEOUT, m_transfer_timeout.count()); - CHECK_SUCCESS_AS_EXPECTED(status); - return buffer.size(); + status = HAILO_SUCCESS; } -hailo_status VdmaOutputStream::read_all(MemoryView &buffer) +hailo_status VdmaOutputStream::read_impl(MemoryView &buffer) { - std::unique_lock lock(m_read_mutex); - CHECK((buffer.size() % HailoRTCommon::HW_DATA_ALIGNMENT) == 0, HAILO_INVALID_ARGUMENT, + CHECK((buffer.size() % HailoRTCommon::HW_DATA_ALIGNMENT) == 0, HAILO_INVALID_ARGUMENT, "Size must be aligned to {} (got {})", HailoRTCommon::HW_DATA_ALIGNMENT, buffer.size()); - return sync_read_raw_buffer(buffer).status(); + return m_channel->transfer_sync(buffer.data(), buffer.size(), m_transfer_timeout); } } /* namespace hailort */ diff --git a/hailort/libhailort/src/vdma/vdma_stream.hpp b/hailort/libhailort/src/vdma/vdma_stream.hpp index ec6f41e..bd4329f 100644 --- a/hailort/libhailort/src/vdma/vdma_stream.hpp +++ b/hailort/libhailort/src/vdma/vdma_stream.hpp @@ -17,6 +17,7 @@ #include "vdma/vdma_stream_base.hpp" #include "vdma/vdma_device.hpp" #include "vdma/channel/boundary_channel.hpp" +#include "vdevice/scheduler/scheduled_core_op_state.hpp" namespace hailort @@ -31,23 +32,14 @@ public: hailo_status &status); virtual ~VdmaInputStream() = default; - hailo_status write_buffer_only(const MemoryView &buffer, const std::function &should_cancel = []() { return false; }); - hailo_status send_pending_buffer(size_t device_index = 0); - - void notify_all() - { - return m_channel->notify_all(); - } + virtual hailo_status write_buffer_only(const MemoryView &buffer, const std::function &should_cancel = []() { return false; }) override; + virtual hailo_status send_pending_buffer(const device_id_t &device_id) override; private: - virtual Expected sync_write_raw_buffer(const MemoryView &buffer) override; - virtual hailo_status sync_write_all_raw_buffer_no_transform_impl(void *buffer, size_t offset, size_t size) override; + virtual hailo_status write_impl(const MemoryView &buffer) override; std::mutex m_write_only_mutex; std::mutex m_send_pending_mutex; - - friend class InputVDeviceBaseStream; - friend class InputVDeviceNativeStream; }; class VdmaOutputStream : public VdmaOutputStreamBase @@ -60,12 +52,7 @@ public: virtual ~VdmaOutputStream() = default; private: - virtual Expected sync_read_raw_buffer(MemoryView &buffer); - virtual hailo_status read_all(MemoryView &buffer) override; - - std::mutex m_read_mutex; - - friend class OutputVDeviceBaseStream; + virtual hailo_status read_impl(MemoryView &buffer) override; }; diff --git a/hailort/libhailort/src/vdma/vdma_stream_base.cpp b/hailort/libhailort/src/vdma/vdma_stream_base.cpp index 542c438..0f5189d 100644 --- a/hailort/libhailort/src/vdma/vdma_stream_base.cpp +++ b/hailort/libhailort/src/vdma/vdma_stream_base.cpp @@ -24,7 +24,7 @@ static bool validate_device_interface_compatibility(hailo_stream_interface_t int case Device::Type::PCIE: interface_valid = (HAILO_STREAM_INTERFACE_PCIE == interface); break; - + case Device::Type::INTEGRATED: interface_valid = (HAILO_STREAM_INTERFACE_INTEGRATED == interface); break; @@ -48,29 +48,20 @@ Expected> VdmaInputStreamBase::create(hailo { CHECK_AS_EXPECTED(validate_device_interface_compatibility(interface, device.get_type()), HAILO_INTERNAL_FAILURE); + hailo_status status = HAILO_UNINITIALIZED; + std::shared_ptr result = nullptr; if ((stream_params.flags & HAILO_STREAM_FLAGS_ASYNC) != 0) { - CHECK_AS_EXPECTED(channel->type() == vdma::BoundaryChannel::Type::ASYNC, HAILO_INVALID_ARGUMENT, - "Can't create a async vdma stream with a non async channel. Received channel type {}", channel->type()); - - hailo_status status = HAILO_UNINITIALIZED; - auto result = make_shared_nothrow(device, channel, edge_layer, core_op_activated_event, + result = make_shared_nothrow(device, channel, edge_layer, core_op_activated_event, batch_size, DEFAULT_TRANSFER_TIMEOUT, interface, status); - CHECK_SUCCESS_AS_EXPECTED(status); - CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY); - - return std::static_pointer_cast(result); } else { - CHECK_AS_EXPECTED(channel->type() == vdma::BoundaryChannel::Type::BUFFERED, HAILO_INVALID_ARGUMENT, - "Can't create a vdma stream with a non buffered channel. Received channel type {}", channel->type()); - - hailo_status status = HAILO_UNINITIALIZED; - auto result = make_shared_nothrow(device, channel, edge_layer, core_op_activated_event, + result = make_shared_nothrow(device, channel, edge_layer, core_op_activated_event, batch_size, DEFAULT_TRANSFER_TIMEOUT, interface, status); - CHECK_SUCCESS_AS_EXPECTED(status); - CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY); - - return std::static_pointer_cast(result); } + + // Check that the creation of the various subclasses succeeded + CHECK_SUCCESS_AS_EXPECTED(status); + CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY); + return result; } VdmaInputStreamBase::VdmaInputStreamBase(VdmaDevice &device, vdma::BoundaryChannelPtr channel, @@ -134,7 +125,7 @@ hailo_status VdmaInputStreamBase::clear_abort() hailo_status VdmaInputStreamBase::flush() { - const auto dynamic_batch_size = (CONTROL_PROTOCOL__IGNORE_DYNAMIC_BATCH_SIZE == m_dynamic_batch_size) ? + const auto dynamic_batch_size = (CONTROL_PROTOCOL__IGNORE_DYNAMIC_BATCH_SIZE == m_dynamic_batch_size) ? 1 : m_dynamic_batch_size; return m_channel->flush(m_channel_timeout * dynamic_batch_size); } @@ -158,16 +149,8 @@ hailo_status VdmaInputStreamBase::deactivate_stream() return HAILO_SUCCESS; } - // Flush is best effort - auto status = m_channel->flush(VDMA_FLUSH_TIMEOUT); - if (HAILO_STREAM_ABORTED_BY_USER == status) { - LOGGER__INFO("Flush input_channel is not needed because channel was aborted. (channel {})", m_channel->get_channel_id()); - status = HAILO_SUCCESS; - } else if (HAILO_SUCCESS != status) { - LOGGER__ERROR("Failed to flush input_channel. (status {} channel {})", status, m_channel->get_channel_id()); - } - status = m_channel->deactivate(); + auto status = m_channel->deactivate(); if (HAILO_SUCCESS != status) { LOGGER__ERROR("Failed to stop channel with status {}", status); } @@ -201,11 +184,6 @@ Expected VdmaInputStreamBase::get_pending_frames_count() const return m_channel->get_h2d_pending_frames_count(); } -hailo_status VdmaInputStreamBase::register_interrupt_callback(const vdma::ProcessingCompleteCallback &callback) -{ - return m_channel->register_interrupt_callback(callback); -} - hailo_status VdmaInputStreamBase::set_dynamic_batch_size(uint16_t dynamic_batch_size) { // TODO: use std::max in the configure stage @@ -218,7 +196,7 @@ hailo_status VdmaInputStreamBase::set_dynamic_batch_size(uint16_t dynamic_batch_ CHECK(dynamic_batch_size <= m_max_batch_size, HAILO_INVALID_ARGUMENT, "Dynamic batch size ({}) must be <= than the configured batch size ({})", dynamic_batch_size, m_max_batch_size); - + if (CONTROL_PROTOCOL__IGNORE_DYNAMIC_BATCH_SIZE == dynamic_batch_size) { LOGGER__TRACE("Received CONTROL_PROTOCOL__IGNORE_DYNAMIC_BATCH_SIZE == dynamic_batch_size; " "Leaving previously set value of {}", m_dynamic_batch_size); @@ -240,36 +218,34 @@ Expected> VdmaOutputStreamBase::create(hai { CHECK_AS_EXPECTED(validate_device_interface_compatibility(interface, device.get_type()), HAILO_INTERNAL_FAILURE); + hailo_status status = HAILO_UNINITIALIZED; + std::shared_ptr result = nullptr; + // TODO: after adding NMS single int, we can create an async channel for async nms output stream (HRT-10553) if ((stream_params.flags & HAILO_STREAM_FLAGS_ASYNC) != 0) { - CHECK_AS_EXPECTED(channel->type() == vdma::BoundaryChannel::Type::ASYNC, HAILO_INVALID_ARGUMENT, - "Can't create a async vdma stream with a non async channel. Received channel type {}", channel->type()); - - hailo_status status = HAILO_UNINITIALIZED; - auto result = make_shared_nothrow(device, channel, edge_layer, core_op_activated_event, - batch_size, DEFAULT_TRANSFER_TIMEOUT, interface, status); - CHECK_SUCCESS_AS_EXPECTED(status); - CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY); - - return std::static_pointer_cast(result); + if (edge_layer.format.order == HAILO_FORMAT_ORDER_HAILO_NMS) { + result = make_shared_nothrow(device, channel, edge_layer, core_op_activated_event, + batch_size, DEFAULT_TRANSFER_TIMEOUT, interface, status); + } else { + result = make_shared_nothrow(device, channel, edge_layer, core_op_activated_event, + batch_size, DEFAULT_TRANSFER_TIMEOUT, interface, status); + } } else { - CHECK_AS_EXPECTED(channel->type() == vdma::BoundaryChannel::Type::BUFFERED, HAILO_INVALID_ARGUMENT, - "Can't create a vdma stream with a non buffered channel. Received channel type {}", channel->type()); + result = make_shared_nothrow(device, channel, edge_layer, core_op_activated_event, + batch_size, DEFAULT_TRANSFER_TIMEOUT, interface, status); + } - hailo_status status = HAILO_UNINITIALIZED; - auto result = make_shared_nothrow(device, channel, edge_layer, core_op_activated_event, - batch_size, DEFAULT_TRANSFER_TIMEOUT, interface, status); - CHECK_SUCCESS_AS_EXPECTED(status); - CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY); + // Check that the creation of the various subclasses succeeded + CHECK_SUCCESS_AS_EXPECTED(status); + CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY); - return std::static_pointer_cast(result); - } + return result; } VdmaOutputStreamBase::VdmaOutputStreamBase(VdmaDevice &device, vdma::BoundaryChannelPtr channel, const LayerInfo &edge_layer, EventPtr core_op_activated_event, uint16_t batch_size, std::chrono::milliseconds transfer_timeout, hailo_stream_interface_t interface, hailo_status &status) : - OutputStreamBase(edge_layer, std::move(core_op_activated_event), status), + OutputStreamBase(edge_layer, interface, std::move(core_op_activated_event), status), m_device(&device), m_channel(std::move(channel)), m_interface(interface), @@ -277,7 +253,7 @@ VdmaOutputStreamBase::VdmaOutputStreamBase(VdmaDevice &device, vdma::BoundaryCha m_transfer_timeout(transfer_timeout), m_max_batch_size(batch_size), m_dynamic_batch_size(batch_size), - m_transfer_size(get_transfer_size(m_stream_info)) + m_transfer_size(get_transfer_size(m_stream_info, get_layer_info())) { // Check status for base class c'tor if (HAILO_SUCCESS != status) { @@ -353,9 +329,9 @@ hailo_status VdmaOutputStreamBase::activate_stream(uint16_t dynamic_batch_size, return HAILO_SUCCESS; } -hailo_status VdmaOutputStreamBase::register_interrupt_callback(const vdma::ProcessingCompleteCallback &callback) +void VdmaOutputStreamBase::register_interrupt_callback(const vdma::ProcessingCompleteCallback &callback) { - return m_channel->register_interrupt_callback(callback); + m_channel->register_interrupt_callback(callback); } hailo_status VdmaOutputStreamBase::deactivate_stream() @@ -373,11 +349,9 @@ hailo_status VdmaOutputStreamBase::deactivate_stream() return HAILO_SUCCESS; } -uint32_t VdmaOutputStreamBase::get_transfer_size(const hailo_stream_info_t &stream_info) +uint32_t VdmaOutputStreamBase::get_transfer_size(const hailo_stream_info_t &stream_info, const LayerInfo &layer_info) { - // The ppu outputs one bbox per vdma buffer in the case of nms - return (HAILO_FORMAT_ORDER_HAILO_NMS == stream_info.format.order) ? - stream_info.nms_info.bbox_size : stream_info.hw_frame_size; + return LayerInfoUtils::get_stream_transfer_size(stream_info, layer_info); } hailo_status VdmaOutputStreamBase::set_dynamic_batch_size(uint16_t dynamic_batch_size) @@ -392,7 +366,7 @@ hailo_status VdmaOutputStreamBase::set_dynamic_batch_size(uint16_t dynamic_batch CHECK(dynamic_batch_size <= m_max_batch_size, HAILO_INVALID_ARGUMENT, "Dynamic batch size ({}) must be <= than the configured batch size ({})", dynamic_batch_size, m_max_batch_size); - + if (CONTROL_PROTOCOL__IGNORE_DYNAMIC_BATCH_SIZE == dynamic_batch_size) { LOGGER__TRACE("Received CONTROL_PROTOCOL__IGNORE_DYNAMIC_BATCH_SIZE == dynamic_batch_size; " "Leaving previously set value of {}", m_dynamic_batch_size); diff --git a/hailort/libhailort/src/vdma/vdma_stream_base.hpp b/hailort/libhailort/src/vdma/vdma_stream_base.hpp index a7cde98..9569f24 100644 --- a/hailort/libhailort/src/vdma/vdma_stream_base.hpp +++ b/hailort/libhailort/src/vdma/vdma_stream_base.hpp @@ -20,7 +20,6 @@ namespace hailort { -constexpr std::chrono::seconds VDMA_FLUSH_TIMEOUT(10); class VdmaInputStreamBase : public InputStreamBase { public: @@ -41,7 +40,14 @@ public: Expected get_buffer_state(); virtual Expected get_buffer_frames_size() const override; virtual Expected get_pending_frames_count() const override; - virtual hailo_status register_interrupt_callback(const vdma::ProcessingCompleteCallback &callback) override; + + virtual hailo_status write_buffer_only(const MemoryView &buffer, const std::function &should_cancel = []() { return false; }) = 0; + virtual hailo_status send_pending_buffer(const device_id_t &device_id) = 0; + + void notify_all() + { + m_channel->notify_all(); + } protected: VdmaInputStreamBase(VdmaDevice &device, vdma::BoundaryChannelPtr channel, const LayerInfo &edge_layer, @@ -53,6 +59,9 @@ protected: virtual hailo_status deactivate_stream() override; hailo_status set_dynamic_batch_size(uint16_t dynamic_batch_size); + friend class VDeviceInputStreamBase; + friend class VDeviceNativeInputStream; + VdmaDevice *m_device; vdma::BoundaryChannelPtr m_channel; const hailo_stream_interface_t m_interface; @@ -81,7 +90,7 @@ public: virtual Expected get_buffer_frames_size() const override; virtual Expected get_pending_frames_count() const override; - virtual hailo_status register_interrupt_callback(const vdma::ProcessingCompleteCallback &callback); + void register_interrupt_callback(const vdma::ProcessingCompleteCallback &callback); protected: VdmaOutputStreamBase(VdmaDevice &device, vdma::BoundaryChannelPtr channel, const LayerInfo &edge_layer, @@ -91,9 +100,11 @@ protected: virtual hailo_status activate_stream(uint16_t dynamic_batch_size, bool resume_pending_stream_transfers) override; virtual hailo_status deactivate_stream() override; - static uint32_t get_transfer_size(const hailo_stream_info_t &stream_info); + static uint32_t get_transfer_size(const hailo_stream_info_t &stream_info, const LayerInfo &layer_info); hailo_status set_dynamic_batch_size(uint16_t dynamic_batch_size); + friend class VDeviceOutputStreamBase; + VdmaDevice *m_device; vdma::BoundaryChannelPtr m_channel; const hailo_stream_interface_t m_interface; @@ -102,7 +113,6 @@ protected: const uint16_t m_max_batch_size; uint16_t m_dynamic_batch_size; const uint32_t m_transfer_size; - std::mutex m_read_mutex; }; diff --git a/hailort/pre_build/external/CMakeLists.txt b/hailort/pre_build/external/CMakeLists.txt index fd34495..1c7438e 100644 --- a/hailort/pre_build/external/CMakeLists.txt +++ b/hailort/pre_build/external/CMakeLists.txt @@ -16,7 +16,7 @@ function(git_clone proj repo tag) ) endfunction() -git_clone(pybind11 https://github.com/pybind/pybind11.git 80dc998efced8ceb2be59756668a7e90e8bef917) +include(${CMAKE_CURRENT_LIST_DIR}/../../libhailort/bindings/python/externals/pybind11.cmake) git_clone(Catch2 https://github.com/catchorg/Catch2.git c4e3767e265808590986d5db6ca1b5532a7f3d13) git_clone(CLI11 https://github.com/hailo-ai/CLI11.git f1644f15f219303b7ad670732c21018a1e6f0e11) git_clone(spdlog https://github.com/gabime/spdlog.git e2789531912a5c6ab28a90387f97c52963eec08a) diff --git a/hailort/rpc/hailort_rpc.proto b/hailort/rpc/hailort_rpc.proto index 3df851f..873c9ba 100644 --- a/hailort/rpc/hailort_rpc.proto +++ b/hailort/rpc/hailort_rpc.proto @@ -33,6 +33,9 @@ service ProtoHailoRtRpc { rpc ConfiguredNetworkGroup_get_latency_measurement (ConfiguredNetworkGroup_get_latency_measurement_Request) returns (ConfiguredNetworkGroup_get_latency_measurement_Reply) {} rpc ConfiguredNetworkGroup_is_multi_context (ConfiguredNetworkGroup_is_multi_context_Request) returns (ConfiguredNetworkGroup_is_multi_context_Reply) {} rpc ConfiguredNetworkGroup_get_config_params(ConfiguredNetworkGroup_get_config_params_Request) returns (ConfiguredNetworkGroup_get_config_params_Reply) {} + rpc ConfiguredNetworkGroup_get_sorted_output_names(ConfiguredNetworkGroup_get_sorted_output_names_Request) returns (ConfiguredNetworkGroup_get_sorted_output_names_Reply) {} + rpc ConfiguredNetworkGroup_get_stream_names_from_vstream_name(ConfiguredNetworkGroup_get_stream_names_from_vstream_name_Request) returns (ConfiguredNetworkGroup_get_stream_names_from_vstream_name_Reply) {} + rpc ConfiguredNetworkGroup_get_vstream_names_from_stream_name(ConfiguredNetworkGroup_get_vstream_names_from_stream_name_Request) returns (ConfiguredNetworkGroup_get_vstream_names_from_stream_name_Reply) {} rpc InputVStreams_create (VStream_create_Request) returns (VStreams_create_Reply) {} rpc InputVStream_dup_handle (dup_handle_Request) returns (dup_handle_Reply) {} @@ -51,12 +54,18 @@ service ProtoHailoRtRpc { rpc OutputVStream_network_name (VStream_network_name_Request) returns (VStream_network_name_Reply) {} rpc InputVStream_abort (VStream_abort_Request) returns (VStream_abort_Reply) {} rpc OutputVStream_abort (VStream_abort_Request) returns (VStream_abort_Reply) {} + rpc InputVStream_stop_and_clear (VStream_stop_and_clear_Request) returns (VStream_stop_and_clear_Reply) {} + rpc InputVStream_start_vstream (VStream_start_vstream_Request) returns (VStream_start_vstream_Reply) {} + rpc OutputVStream_stop_and_clear (VStream_stop_and_clear_Request) returns (VStream_stop_and_clear_Reply) {} + rpc OutputVStream_start_vstream (VStream_start_vstream_Request) returns (VStream_start_vstream_Reply) {} rpc InputVStream_resume (VStream_resume_Request) returns (VStream_resume_Reply) {} rpc OutputVStream_resume (VStream_resume_Request) returns (VStream_resume_Reply) {} rpc InputVStream_get_user_buffer_format (VStream_get_user_buffer_format_Request) returns (VStream_get_user_buffer_format_Reply) {} rpc OutputVStream_get_user_buffer_format (VStream_get_user_buffer_format_Request) returns (VStream_get_user_buffer_format_Reply) {} rpc InputVStream_get_info (VStream_get_info_Request) returns (VStream_get_info_Reply) {} rpc OutputVStream_get_info (VStream_get_info_Request) returns (VStream_get_info_Reply) {} + rpc InputVStream_is_aborted (VStream_is_aborted_Request) returns (VStream_is_aborted_Reply) {} + rpc OutputVStream_is_aborted (VStream_is_aborted_Request) returns (VStream_is_aborted_Reply) {} } message empty {} @@ -107,6 +116,7 @@ message VDevice_create_Reply { message Release_Request { uint32 handle = 1; + uint32 pid = 2; } message Release_Reply { @@ -159,6 +169,17 @@ message ProtoNmsDefuseInfo { string original_name = 2; } +enum ProtoNmsBurstType { + // No burst + PROTO_NMS_BURST_TYPE_NO_BURST = 0; + // No image delimiter, burst per class + PROTO_NMS_BURST_TYPE_H8_PER_CLASS = 1; + // Image delimiter and burst per class + PROTO_NMS_BURST_TYPE_H15_PER_CLASS = 2; + // Image delimiter and burst per image + PROTO_NMS_BURST_TYPE_H15_PER_FRAME = 3; +} + message ProtoNmsInfo { uint32 number_of_classes = 1; uint32 max_bboxes_per_class = 2; @@ -166,6 +187,8 @@ message ProtoNmsInfo { uint32 chunks_per_frame = 4; bool is_defused = 5; ProtoNmsDefuseInfo defuse_info = 6; + uint32 burst_size = 7; + ProtoNmsBurstType burst_type = 8; } message ProtoQuantInfo { @@ -438,6 +461,35 @@ message ConfiguredNetworkGroup_get_config_params_Reply { ProtoConfigureNetworkParams params = 2; } +message ConfiguredNetworkGroup_get_sorted_output_names_Request { + uint32 handle = 1; +} + +message ConfiguredNetworkGroup_get_sorted_output_names_Reply { + uint32 status = 1; + repeated string sorted_output_names = 2; +} + +message ConfiguredNetworkGroup_get_stream_names_from_vstream_name_Request { + uint32 handle = 1; + string vstream_name = 2; +} + +message ConfiguredNetworkGroup_get_stream_names_from_vstream_name_Reply { + uint32 status = 1; + repeated string streams_names = 2; +} + +message ConfiguredNetworkGroup_get_vstream_names_from_stream_name_Request { + uint32 handle = 1; + string stream_name = 2; +} + +message ConfiguredNetworkGroup_get_vstream_names_from_stream_name_Reply { + uint32 status = 1; + repeated string vstreams_names = 2; +} + message InputVStream_write_Request { uint32 handle = 1; bytes data = 2; @@ -500,6 +552,22 @@ message VStream_abort_Reply { uint32 status = 1; } +message VStream_stop_and_clear_Request { + uint32 handle = 1; +} + +message VStream_stop_and_clear_Reply { + uint32 status = 1; +} + +message VStream_start_vstream_Request { + uint32 handle = 1; +} + +message VStream_start_vstream_Reply { + uint32 status = 1; +} + message VStream_resume_Request { uint32 handle = 1; } @@ -524,4 +592,13 @@ message VStream_get_info_Request { message VStream_get_info_Reply { uint32 status = 1; ProtoVStreamInfo vstream_info = 2; +} + +message VStream_is_aborted_Request { + uint32 handle = 1; +} + +message VStream_is_aborted_Reply { + uint32 status = 1; + bool is_aborted = 2; } \ No newline at end of file diff --git a/hailort/scripts/download_firmware_eth.cmd b/hailort/scripts/download_firmware_eth.cmd index 043b312..db9c3ff 100644 --- a/hailort/scripts/download_firmware_eth.cmd +++ b/hailort/scripts/download_firmware_eth.cmd @@ -2,7 +2,7 @@ @ECHO OFF set BASE_URI=https://hailo-hailort.s3.eu-west-2.amazonaws.com -set HRT_VERSION=4.13.0 +set HRT_VERSION=4.14.0 set FW_DIR=Hailo8/%HRT_VERSION%/FW set FW=hailo8_fw.%HRT_VERSION%_eth.bin diff --git a/hailort/scripts/download_firmware_eth.sh b/hailort/scripts/download_firmware_eth.sh index 70c8886..d02e0c5 100755 --- a/hailort/scripts/download_firmware_eth.sh +++ b/hailort/scripts/download_firmware_eth.sh @@ -2,7 +2,7 @@ set -e readonly BASE_URI="https://hailo-hailort.s3.eu-west-2.amazonaws.com" -readonly HRT_VERSION=4.13.0 +readonly HRT_VERSION=4.14.0 readonly FW_AWS_DIR="Hailo8/${HRT_VERSION}/FW" readonly FW="hailo8_fw.${HRT_VERSION}_eth.bin" diff --git a/hailort/scripts/download_hefs.cmd b/hailort/scripts/download_hefs.cmd index 332c9f1..944367e 100644 --- a/hailort/scripts/download_hefs.cmd +++ b/hailort/scripts/download_hefs.cmd @@ -1,12 +1,12 @@ :: cmd @ECHO OFF set BASE_URI=https://hailo-hailort.s3.eu-west-2.amazonaws.com -set HRT_VERSION=4.13.0 +set HRT_VERSION=4.14.0 set REMOTE_HEF_DIR=Hailo8/%HRT_VERSION%/HEFS set LOCAL_EXAMPLES_HEF_DIR=..\libhailort\examples\hefs -set LOCAL_TUTORIALS_HEF_DIR=..\libhailort\bindings\python\platform\tutorials\hefs +set LOCAL_TUTORIALS_HEF_DIR=..\libhailort\bindings\python\platform\hailo_tutorials\hefs set EXAMPLES_HEFS=(multi_network_shortcut_net.hef shortcut_net.hef) -set TUTORIALS_HEFS=(resnet_v1_18.hef) +set TUTORIALS_HEFS=(resnet_v1_18.hef shortcut_net.hef) if not exist %LOCAL_EXAMPLES_HEF_DIR% mkdir %LOCAL_EXAMPLES_HEF_DIR% if not exist %LOCAL_TUTORIALS_HEF_DIR% mkdir %LOCAL_TUTORIALS_HEF_DIR% diff --git a/hailort/scripts/download_hefs.sh b/hailort/scripts/download_hefs.sh index 489367f..ff56bd0 100755 --- a/hailort/scripts/download_hefs.sh +++ b/hailort/scripts/download_hefs.sh @@ -2,16 +2,17 @@ set -e readonly BASE_URI="https://hailo-hailort.s3.eu-west-2.amazonaws.com" -readonly HRT_VERSION=4.13.0 +readonly HRT_VERSION=4.14.0 readonly REMOTE_HEF_DIR="Hailo8/${HRT_VERSION}/HEFS" readonly LOCAL_EXAMPLES_HEF_DIR="../libhailort/examples/hefs" -readonly LOCAL_TUTORIALS_HEF_DIR="../libhailort/bindings/python/platform/tutorials/hefs/" +readonly LOCAL_TUTORIALS_HEF_DIR="../libhailort/bindings/python/platform/hailo_tutorials/hefs" readonly EXAMPLES_HEFS=( "shortcut_net.hef" "multi_network_shortcut_net.hef" ) readonly TUTORIALS_HEFS=( "resnet_v1_18.hef" + "shortcut_net.hef" ) function create_hef_dir(){ diff --git a/hailort/tools/hailo15-scripts/hailo15_env_vars.sh b/hailort/tools/hailo15-scripts/hailo15_env_vars.sh new file mode 100644 index 0000000..2756ad1 --- /dev/null +++ b/hailort/tools/hailo15-scripts/hailo15_env_vars.sh @@ -0,0 +1,8 @@ +#! /bin/bash +set -e + +# Environment declarations +script_directory=$(cd $(dirname "${BASH_SOURCE[0]}") && pwd) +local_platform_sw_path="$script_directory"/../../../ +h15="10.0.0.1" +ssh-copy-id root@$h15 \ No newline at end of file diff --git a/hailort/tools/hailo15-scripts/load_driver.sh b/hailort/tools/hailo15-scripts/load_driver.sh new file mode 100755 index 0000000..0e4e414 --- /dev/null +++ b/hailort/tools/hailo15-scripts/load_driver.sh @@ -0,0 +1,13 @@ +#! /bin/bash +set -e + +# Include Environment declarations +script_directory=$(cd $(dirname "${BASH_SOURCE[0]}") && pwd) +source "$script_directory"/hailo15_env_vars.sh + +cd $local_platform_sw_path +./install.sh comp build_integrated_nnc_driver --image-path /local/bkc/v0.29-build-2023-05-07 +path="$local_platform_sw_path"/hailort/drivers/linux/integrated_nnc/hailo_integrated_nnc.ko +scp $path root@$h15:/lib/modules/5.15.32-yocto-standard/kernel/drivers/misc/hailo_integrated_nnc.ko + +ssh root@$h15 "modprobe -r hailo_integrated_nnc && modprobe hailo_integrated_nnc" diff --git a/hailort/tools/hailo15-scripts/load_firmware.sh b/hailort/tools/hailo15-scripts/load_firmware.sh new file mode 100755 index 0000000..c568630 --- /dev/null +++ b/hailort/tools/hailo15-scripts/load_firmware.sh @@ -0,0 +1,11 @@ +#! /bin/bash +set -e + +# Include Environment declarations +script_directory=$(cd $(dirname "${BASH_SOURCE[0]}") && pwd) +source "$script_directory"/hailo15_env_vars.sh + +cd $local_platform_sw_path +./install.sh comp build_fw --fw vpu --hw-arch hailo15 +scp firmware/vpu_firmware/build/hailo15_nnc_fw.bin root@$h15:/lib/firmware/hailo/hailo15_nnc_fw.bin +ssh root@$h15 "modprobe -r hailo_integrated_nnc && modprobe hailo_integrated_nnc" diff --git a/hailort/tools/hailo15-scripts/load_hrt.sh b/hailort/tools/hailo15-scripts/load_hrt.sh new file mode 100755 index 0000000..8c6947d --- /dev/null +++ b/hailort/tools/hailo15-scripts/load_hrt.sh @@ -0,0 +1,14 @@ +#! /bin/bash +set -e + +# Include Environment declarations +script_directory=$(cd $(dirname "${BASH_SOURCE[0]}") && pwd) +source "$script_directory"/hailo15_env_vars.sh + +cd $local_platform_sw_path +./build.sh -aaarch64 -brelease install + +scp lib/linux.aarch64.release/libhailort.* root@$h15:/usr/lib/ +scp bin/linux.aarch64.release/hailortcli root@$h15:/usr/bin/ +scp bin/linux.aarch64.release/debalex root@$h15:/usr/bin/ +scp bin/linux.aarch64.release/board_tests root@$h15:/usr/bin/ diff --git a/hailort/tools/hailo15-scripts/load_pcr.sh b/hailort/tools/hailo15-scripts/load_pcr.sh new file mode 100755 index 0000000..4123852 --- /dev/null +++ b/hailort/tools/hailo15-scripts/load_pcr.sh @@ -0,0 +1,12 @@ +#! /bin/bash +set -e + +# Include Environment declarations +script_directory=$(cd $(dirname "${BASH_SOURCE[0]}") && pwd) +source "$script_directory"/hailo15_env_vars.sh + +cd $local_platform_sw_path +# Compile PCR +./install.sh comp build_infra_tools --arch aarch64 --build-hailort --build-type release + +scp platform_internals/hailo_platform_internals/low_level_tools/build/linux.aarch64.release/pcr/pcr root@$h15:/usr/bin/ diff --git a/hailort/tools/hailo15-scripts/read_log.sh b/hailort/tools/hailo15-scripts/read_log.sh new file mode 100755 index 0000000..5c05a0f --- /dev/null +++ b/hailort/tools/hailo15-scripts/read_log.sh @@ -0,0 +1,15 @@ +#! /bin/bash +set -e + +# Include Environment declarations +script_directory=$(cd $(dirname "${BASH_SOURCE[0]}") && pwd) +source "$script_directory"/hailo15_env_vars.sh + +cd $local_platform_sw_path +source hailo_platform_venv/bin/activate +ssh root@$h15 "hailortcli fw-logger /tmp/fw_log.dat" +scp root@$h15:/tmp/fw_log.dat /tmp +ssh root@$h15 "rm /tmp/fw_log.dat" + +python ./platform_internals/hailo_platform_internals/tools/firmware/parse_tracelog.py --fw vpu --core-log-entries firmware/vpu_firmware/build/hailo15_nnc_fw_*_log_entries.csv --core-only --raw-input-file /tmp/fw_log.dat + diff --git a/hailort/tools/hailo15-scripts/sanity_infer.sh b/hailort/tools/hailo15-scripts/sanity_infer.sh new file mode 100755 index 0000000..0393549 --- /dev/null +++ b/hailort/tools/hailo15-scripts/sanity_infer.sh @@ -0,0 +1,8 @@ +#! /bin/bash +set -e + +# Include Environment declarations +script_directory=$(cd $(dirname "${BASH_SOURCE[0]}") && pwd) +source "$script_directory"/hailo15_env_vars.sh + +ssh root@$h15 "hailortcli run /etc/hailo/hefs/hailo15/shortcut_net/28_28_3/shortcut_net.hef -c 1" diff --git a/hailort/tools/hailo15-scripts/update_hrt_and_infer.sh b/hailort/tools/hailo15-scripts/update_hrt_and_infer.sh new file mode 100755 index 0000000..4e8c93d --- /dev/null +++ b/hailort/tools/hailo15-scripts/update_hrt_and_infer.sh @@ -0,0 +1,23 @@ +#! /bin/bash +set -e + +# Include Environment declarations +script_directory=$(cd $(dirname "${BASH_SOURCE[0]}") && pwd) +source "$script_directory"/hailo15_env_vars.sh + +# Build hailo15 artifacts +/bin/bash "$script_directory"/load_hrt.sh + +# Build hailo15 PCR +/bin/bash "$script_directory"/load_pcr.sh + +# Build hailo15 fw +cd $local_platform_sw_path +./install.sh comp build_fw --fw vpu --hw-arch hailo15 +scp firmware/vpu_firmware/build/hailo15_nnc_fw.bin root@$h15:/lib/firmware/hailo/hailo15_nnc_fw.bin + +# Build integrated_nnc (hailo15) driver +/bin/bash "$script_directory"/load_driver.sh + +# Run sanity infer +/bin/bash "$script_directory"/sanity_infer.sh diff --git a/hailort/tools/hw_debug/CMakeLists.txt b/hailort/tools/hw_debug/CMakeLists.txt index 5fd1d77..eeea604 100644 --- a/hailort/tools/hw_debug/CMakeLists.txt +++ b/hailort/tools/hw_debug/CMakeLists.txt @@ -12,8 +12,6 @@ set(FILES ${HAILO_OS_DIR}/hailort_driver.cpp ${HAILO_OS_DIR}/file_descriptor.cpp ${HAILO_FULL_OS_DIR}/driver_scan.cpp - # TODO: HRT-3816 remove mmap header - ${HAILO_OS_DIR}/mmap_buffer.cpp ) if(WIN32) @@ -51,3 +49,8 @@ if(READLINE_INCLUDE_DIR AND READLINE_LIBRARY) else() message(WARNING "Could not find readline library. To better UI, please install it by calling `sudo apt install libreadline6-dev`") endif() + +install(TARGETS debalex + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} +) +cli11_install_completion_file(debalex) \ No newline at end of file diff --git a/hailort/tools/hw_debug/main.cpp b/hailort/tools/hw_debug/main.cpp index 74d33c4..0d0bfac 100644 --- a/hailort/tools/hw_debug/main.cpp +++ b/hailort/tools/hw_debug/main.cpp @@ -92,7 +92,7 @@ static std::vector get_available_device_ids() return device_ids; } -std::string get_device_filepath(const std::string &device_id) +HailoRTDriver::DeviceInfo get_device_info(const std::string &device_id) { auto scan_results = HailoRTDriver::scan_devices(); if (!scan_results) { @@ -107,13 +107,13 @@ std::string get_device_filepath(const std::string &device_id) throw std::runtime_error("Requested device not found"); } - return device_found->dev_path; + return *device_found; } std::shared_ptr create_driver_object(const std::string &device_id) { - auto device_path = get_device_filepath(device_id); - auto hailort_driver = HailoRTDriver::create(device_path); + auto device_info = get_device_info(device_id); + auto hailort_driver = HailoRTDriver::create(device_info); if (!hailort_driver) { throw std::runtime_error("Failed create hailort driver object"); }