From 35d9282f36ac4a6c6765ba69a01d0f4f83da9011 Mon Sep 17 00:00:00 2001
From: HailoRT-Automation
 <98901220+HailoRT-Automation@users.noreply.github.com>
Date: Thu, 28 Dec 2023 16:14:01 +0200
Subject: [PATCH] v4.16.0 (#11)

* v4.16.0
---
 common/include/context_switch_defs.h          |    3 +-
 common/include/control_protocol.h             |    3 +-
 common/include/firmware_status.h              |    1 +
 hailort/.gitignore                            |   13 +-
 hailort/CMakeLists.txt                        |   56 +-
 hailort/LICENSE-3RD-PARTY.md                  |    4 +-
 hailort/cmake/execute_cmake.cmake             |   12 +-
 hailort/cmake/external/benchmark.cmake        |   16 +-
 hailort/cmake/external/catch2.cmake           |   16 +-
 hailort/cmake/external/cli11.cmake            |   21 +
 hailort/cmake/external/dotwriter.cmake        |   16 +-
 hailort/cmake/external/grpc.cmake             |   50 +
 hailort/cmake/external/json.cmake             |   16 +-
 hailort/cmake/external/pevents.cmake          |   23 +-
 hailort/cmake/external/protobuf.cmake         |   67 +
 hailort/cmake/external/pybind11.cmake         |   39 +-
 .../cmake/external/readerwriterqueue.cmake    |   23 +-
 hailort/cmake/external/spdlog.cmake           |   18 +-
 hailort/common/device_measurements.cpp        |   29 +-
 hailort/common/file_utils.cpp                 |    4 +-
 hailort/common/file_utils.hpp                 |    3 +-
 hailort/common/os/posix/os_utils.cpp          |   32 +-
 hailort/common/os/windows/os_utils.cpp        |   41 +
 hailort/common/os_utils.hpp                   |    3 +
 hailort/common/utils.hpp                      |   14 +-
 hailort/hailort_service/CMakeLists.txt        |    2 +
 .../hailort_service/hailort_rpc_service.cpp   | 1032 ++++++++---
 .../hailort_service/hailort_rpc_service.hpp   |   40 +-
 .../service_resource_manager.hpp              |   26 +-
 .../vdevice_callbacks_queue.hpp               |   88 +
 hailort/hailortcli/CMakeLists.txt             |    3 +-
 hailort/hailortcli/fw_control_command.cpp     |    4 +-
 .../hailortcli/run2/network_live_track.cpp    |   39 +-
 .../hailortcli/run2/network_live_track.hpp    |    5 +-
 hailort/hailortcli/run2/network_runner.cpp    |  484 +++--
 hailort/hailortcli/run2/network_runner.hpp    |   74 +-
 hailort/hailortcli/run2/run2_command.cpp      |  150 +-
 hailort/hailortcli/run_command.cpp            |   24 +-
 hailort/libhailort/CMakeLists.txt             |    2 +-
 .../bindings/gstreamer/CMakeLists.txt         |    3 +-
 .../bindings/gstreamer/gst-hailo/common.cpp   |   62 +
 .../bindings/gstreamer/gst-hailo/common.hpp   |   76 +
 .../gstreamer/gst-hailo/gsthailonet.cpp       |  108 +-
 .../gstreamer/gst-hailo/gsthailonet.hpp       |    4 +-
 .../gstreamer/gst-hailo/gsthailonet2.cpp      | 1313 ++++++++++++++
 .../gstreamer/gst-hailo/gsthailonet2.hpp      |  170 ++
 .../gstreamer/gst-hailo/gsthailoplugin.cpp    |    4 +-
 .../gstreamer/gst-hailo/gsthailosend.cpp      |    7 -
 .../gst-hailo/metadata/tensor_meta.hpp        |    6 +-
 .../gst-hailo/network_group_handle.cpp        |    8 +-
 .../gst-hailo/network_group_handle.hpp        |    4 +-
 .../hailo_platform/pyhailort/pyhailort.py     |  235 ++-
 .../notebooks/HRT_0_Inference_Tutorial.ipynb  |    8 +-
 ...rence_Tutorial_Multi_Process_Service.ipynb |    8 +-
 .../bindings/python/platform/setup.py         |    2 +-
 .../bindings/python/src/CMakeLists.txt        |    2 +-
 .../bindings/python/src/hef_api.cpp           |   18 -
 .../bindings/python/src/hef_api.hpp           |    4 -
 .../bindings/python/src/network_group_api.hpp |    8 +-
 .../bindings/python/src/pyhailort.cpp         |   37 +-
 .../cmake/toolchains/toolchains.yaml          |   44 -
 hailort/libhailort/examples/README.md         |    3 +
 .../data_quantization_example/CMakeLists.txt  |    2 +-
 .../c/infer_pipeline_example/CMakeLists.txt   |    2 +-
 .../infer_pipeline_example.c                  |    5 +-
 .../c/multi_device_example/CMakeLists.txt     |    2 +-
 .../multi_device_example.c                    |    5 +-
 .../CMakeLists.txt                            |    2 +-
 .../multi_network_vstream_example.c           |    5 +-
 .../CMakeLists.txt                            |    2 +-
 .../power_measurement_example/CMakeLists.txt  |    2 +-
 .../CMakeLists.txt                            |    2 +-
 .../raw_async_streams_single_thread_example.c |   34 +-
 .../c/raw_streams_example/CMakeLists.txt      |    2 +-
 .../CMakeLists.txt                            |    2 +-
 .../switch_network_groups_example.c           |    5 +-
 .../CMakeLists.txt                            |    2 +-
 .../switch_network_groups_manually_example.c  |    5 +-
 .../c/vstreams_example/CMakeLists.txt         |    2 +-
 .../c/vstreams_example/vstreams_example.c     |   12 +-
 .../libhailort/examples/cpp/CMakeLists.txt    |    4 +
 .../cpp/async_infer_example/CMakeLists.txt    |   16 +
 .../async_infer_example.cpp                   |   93 +
 .../CMakeLists.txt                            |   16 +
 .../async_infer_functionality_example.cpp     |  129 ++
 .../cpp/infer_pipeline_example/CMakeLists.txt |    2 +-
 .../infer_pipeline_example.cpp                |    4 +-
 .../cpp/multi_device_example/CMakeLists.txt   |    2 +-
 .../multi_device_example.cpp                  |    3 +-
 .../CMakeLists.txt                            |    2 +-
 .../multi_network_vstream_example.cpp         |    3 +-
 .../cpp/multi_process_example/CMakeLists.txt  |    2 +-
 .../multi_process_example.cpp                 |    3 +-
 .../CMakeLists.txt                            |    2 +-
 .../power_measurement_example/CMakeLists.txt  |    2 +-
 .../CMakeLists.txt                            |    2 +-
 ...raw_async_streams_multi_thread_example.cpp |   75 +-
 .../CMakeLists.txt                            |    2 +-
 ...aw_async_streams_single_thread_example.cpp |   52 +-
 .../cpp/raw_streams_example/CMakeLists.txt    |    2 +-
 .../raw_streams_example.cpp                   |    5 +-
 .../CMakeLists.txt                            |    2 +-
 .../switch_network_groups_example.cpp         |    3 +-
 .../CMakeLists.txt                            |    2 +-
 ...switch_network_groups_manually_example.cpp |    3 +-
 .../cpp/vstreams_example/CMakeLists.txt       |    2 +-
 .../cpp/vstreams_example/vstreams_example.cpp |   10 +-
 hailort/libhailort/hef.proto                  |   30 +-
 .../include/hailo/buffer_storage.hpp          |   75 +-
 hailort/libhailort/include/hailo/device.hpp   |   48 +-
 hailort/libhailort/include/hailo/hailort.h    |  105 +-
 .../include/hailo/hailort_common.hpp          |   24 +-
 .../include/hailo/hailort_defaults.hpp        |    6 +-
 hailort/libhailort/include/hailo/hef.hpp      |   62 +-
 .../libhailort/include/hailo/infer_model.hpp  |   46 +-
 .../include/hailo/network_group.hpp           |   70 +-
 hailort/libhailort/include/hailo/stream.hpp   |   24 +-
 .../libhailort/include/hailo/transform.hpp    |   21 +-
 hailort/libhailort/include/hailo/vdevice.hpp  |   40 +-
 hailort/libhailort/include/hailo/vstream.hpp  |   10 +-
 hailort/libhailort/src/core_op/CMakeLists.txt |    1 +
 hailort/libhailort/src/core_op/core_op.cpp    |  127 +-
 hailort/libhailort/src/core_op/core_op.hpp    |   49 +-
 .../resource_manager/intermediate_buffer.cpp  |   12 +-
 .../resource_manager/periph_calculator.cpp    |  160 ++
 .../resource_manager/periph_calculator.hpp    |   43 +
 .../resource_manager/resource_manager.cpp     |   20 +-
 .../resource_manager/resource_manager.hpp     |    1 -
 .../resource_manager_builder.cpp              |  130 +-
 .../libhailort/src/device_common/control.cpp  |   23 +-
 .../libhailort/src/device_common/device.cpp   |   23 +
 .../src/device_common/device_internal.cpp     |    8 +-
 hailort/libhailort/src/eth/eth_stream.cpp     |   12 +-
 hailort/libhailort/src/eth/eth_stream.hpp     |   12 +-
 .../libhailort/src/eth/hcp_config_core_op.cpp |    5 +
 .../libhailort/src/eth/hcp_config_core_op.hpp |    1 +
 hailort/libhailort/src/hailort.cpp            |   85 +-
 hailort/libhailort/src/hailort_defaults.cpp   |   21 +-
 .../src/hef/context_switch_actions.cpp        |    1 +
 .../libhailort/src/hef/core_op_metadata.cpp   |   11 +-
 .../libhailort/src/hef/core_op_metadata.hpp   |    1 +
 hailort/libhailort/src/hef/hef.cpp            |  552 +++---
 hailort/libhailort/src/hef/hef_internal.hpp   |   84 +-
 hailort/libhailort/src/hef/layer_info.hpp     |    5 +-
 hailort/libhailort/src/hw_consts.hpp          |   15 +-
 hailort/libhailort/src/mipi/mipi_stream.cpp   |   11 +-
 hailort/libhailort/src/mipi/mipi_stream.hpp   |    4 +-
 .../libhailort/src/net_flow/CMakeLists.txt    |    3 +
 .../src/net_flow/ops/nms_post_process.cpp     |   14 +-
 .../src/net_flow/ops/nms_post_process.hpp     |   52 +-
 hailort/libhailort/src/net_flow/ops/op.hpp    |    1 +
 .../src/net_flow/ops/op_metadata.hpp          |    5 +-
 .../src/net_flow/ops/softmax_post_process.cpp |   26 +-
 .../src/net_flow/ops/softmax_post_process.hpp |   43 +-
 .../src/net_flow/ops/ssd_post_process.cpp     |   21 +-
 .../src/net_flow/ops/ssd_post_process.hpp     |   25 +-
 .../src/net_flow/ops/yolov5_op_metadata.hpp   |   70 +
 .../src/net_flow/ops/yolov5_post_process.cpp  |   11 +-
 .../src/net_flow/ops/yolov5_post_process.hpp  |   78 +-
 .../net_flow/ops/yolov5_seg_op_metadata.hpp   |   60 +
 .../net_flow/ops/yolov5_seg_post_process.cpp  |  146 +-
 .../net_flow/ops/yolov5_seg_post_process.hpp  |   48 +-
 .../src/net_flow/ops/yolov8_post_process.cpp  |  193 ++
 .../src/net_flow/ops/yolov8_post_process.hpp  |  182 ++
 .../src/net_flow/ops/yolox_post_process.cpp   |   12 +-
 .../src/net_flow/ops/yolox_post_process.hpp   |   16 +-
 .../net_flow/pipeline/async_infer_runner.cpp  | 1048 ++---------
 .../net_flow/pipeline/async_infer_runner.hpp  |  105 ++
 .../pipeline/async_infer_runner_internal.hpp  |  184 --
 .../src/net_flow/pipeline/infer_model.cpp     |  440 ++++-
 .../pipeline/infer_model_internal.hpp         |   66 +-
 .../src/net_flow/pipeline/pipeline.cpp        | 1584 ++---------------
 .../src/net_flow/pipeline/pipeline.hpp        |  405 +----
 .../net_flow/pipeline/pipeline_builder.cpp    |  964 ++++++++++
 .../net_flow/pipeline/pipeline_builder.hpp    |  101 ++
 .../net_flow/pipeline/pipeline_internal.cpp   | 1569 ++++++++++++++++
 .../net_flow/pipeline/pipeline_internal.hpp   |  374 ++++
 .../src/net_flow/pipeline/vstream.cpp         |  954 ++++++----
 .../net_flow/pipeline/vstream_internal.hpp    |  275 ++-
 .../src/network_group/network_group.cpp       |  225 ++-
 .../network_group/network_group_internal.hpp  |   62 +-
 hailort/libhailort/src/os/hailort_driver.hpp  |   16 +-
 .../src/os/posix/hailort_driver.cpp           |   93 +-
 .../src/os/windows/hailort_driver.cpp         |    6 +-
 .../src/service/hailort_rpc_client.cpp        |  574 +++++-
 .../src/service/hailort_rpc_client.hpp        |   19 +-
 .../src/service/network_group_client.cpp      |  171 +-
 .../src/stream_common/async_stream_base.cpp   |  125 +-
 .../src/stream_common/async_stream_base.hpp   |   39 +-
 .../src/stream_common/nms_stream.cpp          |   45 +-
 .../src/stream_common/nms_stream.hpp          |    8 +-
 .../stream_common/remote_process_stream.cpp   |   53 +-
 .../stream_common/remote_process_stream.hpp   |   14 +-
 .../src/stream_common/stream_internal.cpp     |  104 +-
 .../src/stream_common/stream_internal.hpp     |  119 +-
 .../src/stream_common/transfer_common.cpp     |   13 +-
 .../src/stream_common/transfer_common.hpp     |   42 +-
 .../libhailort/src/transform/transform.cpp    |  139 +-
 .../src/transform/transform_internal.hpp      |   14 +-
 hailort/libhailort/src/utils/CMakeLists.txt   |    1 +
 .../libhailort/src/utils/buffer_storage.cpp   |  138 +-
 .../libhailort/src/utils/hailort_common.cpp   |    2 +
 .../libhailort/src/utils/hailort_logger.cpp   |    2 +-
 .../libhailort/src/utils/profiler/handler.hpp |   68 +-
 .../src/utils/profiler/monitor_handler.cpp    |   79 +-
 .../src/utils/profiler/monitor_handler.hpp    |   87 +-
 .../src/utils/profiler/profiler_utils.hpp     |    3 +
 .../profiler/scheduler_profiler_handler.cpp   |   23 +-
 .../profiler/scheduler_profiler_handler.hpp   |   16 +-
 .../libhailort/src/utils/profiler/tracer.hpp  |   18 +-
 .../soc_utils/partial_cluster_reader.cpp      |   78 +
 .../soc_utils/partial_cluster_reader.hpp      |   50 +
 .../src/utils/thread_safe_queue.hpp           |   64 +-
 hailort/libhailort/src/vdevice/CMakeLists.txt |    3 +-
 .../src/vdevice/callback_reorder_queue.cpp    |    2 +-
 .../src/vdevice/callback_reorder_queue.hpp    |    2 +-
 .../src/vdevice/pipeline_multiplexer.cpp      |  456 -----
 .../src/vdevice/pipeline_multiplexer.hpp      |  121 --
 .../scheduler/infer_request_accumulator.cpp   |   99 ++
 .../scheduler/infer_request_accumulator.hpp   |   61 +
 .../scheduler/scheduled_core_op_state.cpp     |  176 +-
 .../scheduler/scheduled_core_op_state.hpp     |   61 +-
 .../vdevice/scheduler/scheduled_stream.cpp    |  209 +--
 .../vdevice/scheduler/scheduled_stream.hpp    |   80 +-
 .../src/vdevice/scheduler/scheduler.cpp       |  372 ++--
 .../src/vdevice/scheduler/scheduler.hpp       |   31 +-
 .../src/vdevice/scheduler/scheduler_base.hpp  |   84 +-
 .../vdevice/scheduler/scheduler_counter.hpp   |   99 --
 .../vdevice/scheduler/scheduler_oracle.cpp    |   29 +-
 hailort/libhailort/src/vdevice/vdevice.cpp    |  293 +--
 .../src/vdevice/vdevice_core_op.cpp           |  311 ++--
 .../src/vdevice/vdevice_core_op.hpp           |   38 +-
 .../src/vdevice/vdevice_internal.hpp          |   56 +-
 .../src/vdevice/vdevice_native_stream.cpp     |   98 +-
 .../src/vdevice/vdevice_native_stream.hpp     |   33 +-
 .../vdevice_stream_multiplexer_wrapper.cpp    |  384 ----
 .../vdevice_stream_multiplexer_wrapper.hpp    |  118 --
 hailort/libhailort/src/vdma/CMakeLists.txt    |    1 +
 .../src/vdma/channel/boundary_channel.cpp     |  128 +-
 .../src/vdma/channel/boundary_channel.hpp     |   13 +-
 .../vdma/channel/interrupts_dispatcher.cpp    |    6 +-
 .../src/vdma/circular_stream_buffer_pool.cpp  |   10 +-
 .../src/vdma/circular_stream_buffer_pool.hpp  |    5 +-
 .../src/vdma/memory/buffer_requirements.cpp   |   24 +-
 .../src/vdma/memory/buffer_requirements.hpp   |    5 +-
 .../src/vdma/memory/continuous_buffer.cpp     |   50 +-
 .../src/vdma/memory/continuous_buffer.hpp     |   15 +-
 .../src/vdma/memory/dma_able_buffer.cpp       |   46 +-
 .../src/vdma/memory/dma_able_buffer.hpp       |   15 +-
 .../src/vdma/memory/mapped_buffer.cpp         |   39 +-
 .../src/vdma/memory/mapped_buffer.hpp         |   24 +-
 .../src/vdma/memory/mapping_manager.cpp       |   99 ++
 .../src/vdma/memory/mapping_manager.hpp       |   54 +
 .../libhailort/src/vdma/memory/sg_buffer.cpp  |    2 +-
 .../src/vdma/vdma_config_core_op.cpp          |   47 +-
 .../src/vdma/vdma_config_core_op.hpp          |    2 +
 .../src/vdma/vdma_config_manager.cpp          |    2 +-
 hailort/libhailort/src/vdma/vdma_device.cpp   |   21 +-
 hailort/libhailort/src/vdma/vdma_device.hpp   |   11 +-
 hailort/libhailort/src/vdma/vdma_stream.cpp   |  168 +-
 hailort/libhailort/src/vdma/vdma_stream.hpp   |   24 +-
 hailort/libhailort/tracer_profiler.proto      |    1 +
 hailort/pre_build/CMakeLists.txt              |   20 -
 hailort/pre_build/external/CMakeLists.txt     |   35 -
 hailort/pre_build/tools/CMakeLists.txt        |   28 -
 hailort/prepare_externals.cmake               |   27 +-
 hailort/prepare_externals/CMakeLists.txt      |   22 +
 hailort/rpc/CMakeLists.txt                    |    9 +
 hailort/rpc/hailort_rpc.proto                 |  281 ++-
 hailort/rpc/rpc_definitions.hpp               |    5 +
 hailort/scripts/download_firmware_eth.cmd     |    2 +-
 hailort/scripts/download_firmware_eth.sh      |    2 +-
 hailort/scripts/download_hefs.cmd             |    2 +-
 hailort/scripts/download_hefs.sh              |    2 +-
 hailort/tools/hailo15-scripts/load_hrt.sh     |   11 +-
 hailort/tools/hw_debug/CMakeLists.txt         |    1 +
 276 files changed, 14496 insertions(+), 8708 deletions(-)
 create mode 100644 hailort/cmake/external/cli11.cmake
 create mode 100644 hailort/cmake/external/grpc.cmake
 create mode 100644 hailort/cmake/external/protobuf.cmake
 create mode 100644 hailort/hailort_service/vdevice_callbacks_queue.hpp
 create mode 100644 hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailonet2.cpp
 create mode 100644 hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailonet2.hpp
 delete mode 100644 hailort/libhailort/cmake/toolchains/toolchains.yaml
 create mode 100644 hailort/libhailort/examples/cpp/async_infer_example/CMakeLists.txt
 create mode 100644 hailort/libhailort/examples/cpp/async_infer_example/async_infer_example.cpp
 create mode 100644 hailort/libhailort/examples/cpp/async_infer_functionality_example/CMakeLists.txt
 create mode 100644 hailort/libhailort/examples/cpp/async_infer_functionality_example/async_infer_functionality_example.cpp
 create mode 100644 hailort/libhailort/src/core_op/resource_manager/periph_calculator.cpp
 create mode 100644 hailort/libhailort/src/core_op/resource_manager/periph_calculator.hpp
 create mode 100644 hailort/libhailort/src/net_flow/ops/yolov5_op_metadata.hpp
 create mode 100644 hailort/libhailort/src/net_flow/ops/yolov5_seg_op_metadata.hpp
 create mode 100644 hailort/libhailort/src/net_flow/ops/yolov8_post_process.cpp
 create mode 100644 hailort/libhailort/src/net_flow/ops/yolov8_post_process.hpp
 create mode 100644 hailort/libhailort/src/net_flow/pipeline/async_infer_runner.hpp
 delete mode 100644 hailort/libhailort/src/net_flow/pipeline/async_infer_runner_internal.hpp
 create mode 100644 hailort/libhailort/src/net_flow/pipeline/pipeline_builder.cpp
 create mode 100644 hailort/libhailort/src/net_flow/pipeline/pipeline_builder.hpp
 create mode 100644 hailort/libhailort/src/net_flow/pipeline/pipeline_internal.cpp
 create mode 100644 hailort/libhailort/src/net_flow/pipeline/pipeline_internal.hpp
 create mode 100644 hailort/libhailort/src/utils/soc_utils/partial_cluster_reader.cpp
 create mode 100644 hailort/libhailort/src/utils/soc_utils/partial_cluster_reader.hpp
 delete mode 100644 hailort/libhailort/src/vdevice/pipeline_multiplexer.cpp
 delete mode 100644 hailort/libhailort/src/vdevice/pipeline_multiplexer.hpp
 create mode 100644 hailort/libhailort/src/vdevice/scheduler/infer_request_accumulator.cpp
 create mode 100644 hailort/libhailort/src/vdevice/scheduler/infer_request_accumulator.hpp
 delete mode 100644 hailort/libhailort/src/vdevice/scheduler/scheduler_counter.hpp
 delete mode 100644 hailort/libhailort/src/vdevice/vdevice_stream_multiplexer_wrapper.cpp
 delete mode 100644 hailort/libhailort/src/vdevice/vdevice_stream_multiplexer_wrapper.hpp
 create mode 100644 hailort/libhailort/src/vdma/memory/mapping_manager.cpp
 create mode 100644 hailort/libhailort/src/vdma/memory/mapping_manager.hpp
 delete mode 100644 hailort/pre_build/CMakeLists.txt
 delete mode 100644 hailort/pre_build/external/CMakeLists.txt
 delete mode 100644 hailort/pre_build/tools/CMakeLists.txt
 create mode 100644 hailort/prepare_externals/CMakeLists.txt

diff --git a/common/include/context_switch_defs.h b/common/include/context_switch_defs.h
index 7a88bd6..d7fe8f2 100644
--- a/common/include/context_switch_defs.h
+++ b/common/include/context_switch_defs.h
@@ -64,9 +64,10 @@ typedef struct {
     uint16_t periph_bytes_per_buffer;
     uint16_t periph_buffers_per_frame;
     uint16_t feature_padding_payload;
-    uint16_t buffer_padding_payload;
+    uint32_t buffer_padding_payload;
     uint16_t buffer_padding;
     bool is_periph_calculated_in_hailort;
+    bool is_core_hw_padding_config_in_dfc;
 } CONTEXT_SWITCH_DEFS__stream_reg_info_t;
 
 #if defined(_MSC_VER)
diff --git a/common/include/control_protocol.h b/common/include/control_protocol.h
index 28ad41c..9e022bd 100644
--- a/common/include/control_protocol.h
+++ b/common/include/control_protocol.h
@@ -439,9 +439,10 @@ typedef struct {
     uint16_t periph_bytes_per_buffer;
     uint16_t periph_buffers_per_frame;
     uint16_t feature_padding_payload;
-    uint16_t buffer_padding_payload;
+    uint32_t buffer_padding_payload;
     uint16_t buffer_padding;
     bool is_periph_calculated_in_hailort;
+    bool is_core_hw_padding_config_in_dfc;
 } CONTROL_PROTOCOL__nn_stream_config_t;
 
 typedef struct {
diff --git a/common/include/firmware_status.h b/common/include/firmware_status.h
index fe9d8e3..b33f5a1 100644
--- a/common/include/firmware_status.h
+++ b/common/include/firmware_status.h
@@ -919,6 +919,7 @@ Updating rules:
    FIRMWARE_STATUS__X(DATAFLOW_COMMON_STATUS_INVALID_EDGE_LAYER_INDEX)\
    FIRMWARE_STATUS__X(DATAFLOW_COMMON_STATUS_INVALID_PARAMETER)\
    FIRMWARE_STATUS__X(DATAFLOW_COMMON_STATUS_PADDING_NOT_SUPPORTED_FOR_ARCH)\
+   FIRMWARE_STATUS__X(DATAFLOW_COMMON_STATUS_INVALID_MAX_BUFFER_PADDING_VALUE)\
    \
    FIRMWARE_MODULE__X(FIRMWARE_MODULE__RESET_HANDLER)\
    FIRMWARE_STATUS__X(RESET_HANDLER_CHIP_RESET_FAILED)\
diff --git a/hailort/.gitignore b/hailort/.gitignore
index 71367b8..e544d4c 100644
--- a/hailort/.gitignore
+++ b/hailort/.gitignore
@@ -1,12 +1,3 @@
 /external/
-cmake/external/benchmark/
-cmake/external/catch2/
-cmake/external/dotwriter/
-cmake/external/json/
-cmake/external/pybind11/
-cmake/external/readerwriterqueue/
-cmake/external/spdlog/
-
-pre_build/external/build/
-pre_build/tools/build_protoc/
-pre_build/install/
+cmake/external/*/
+prepare_externals/build/
diff --git a/hailort/CMakeLists.txt b/hailort/CMakeLists.txt
index 4fb18b2..eb90050 100644
--- a/hailort/CMakeLists.txt
+++ b/hailort/CMakeLists.txt
@@ -30,55 +30,25 @@ endif()
 
 # Set firmware version
 add_definitions( -DFIRMWARE_VERSION_MAJOR=4 )
-add_definitions( -DFIRMWARE_VERSION_MINOR=15 )
+add_definitions( -DFIRMWARE_VERSION_MINOR=16 )
 add_definitions( -DFIRMWARE_VERSION_REVISION=0 )
 if(HAILO_BUILD_SERVICE)
     add_definitions( -DHAILO_SUPPORT_MULTI_PROCESS )
 endif()
 
-# The logic of prepare_externals is executed in a sperate module so that it can be run externally (via cmake -P prepare_externals.cmake)
-include(prepare_externals.cmake)
-
-# Include host protobuf for protoc (https://stackoverflow.com/questions/53651181/cmake-find-protobuf-package-in-custom-directory)
-if(CMAKE_HOST_UNIX)
-    include(${CMAKE_CURRENT_LIST_DIR}/pre_build/install/lib/cmake/protobuf/protobuf-config.cmake)
-    include(${CMAKE_CURRENT_LIST_DIR}/pre_build/install/lib/cmake/protobuf/protobuf-module.cmake)
-else()
-    include(${CMAKE_CURRENT_LIST_DIR}/pre_build/install/cmake/protobuf-config.cmake)
-    include(${CMAKE_CURRENT_LIST_DIR}/pre_build/install/cmake/protobuf-module.cmake)
+# TODO: temporary hack to support offline builds. Remove HAILO_OFFLINE_COMPILATION and use FETCHCONTENT_FULLY_DISCONNECTED
+if(HAILO_OFFLINE_COMPILATION)
+    set(FETCHCONTENT_FULLY_DISCONNECTED ON CACHE INTERNAL "")
+    set(HAILO_OFFLINE_COMPILATION OFF CACHE INTERNAL "")
 endif()
 
+# TODO: move protobuf and grpc to inner cmake files
+set(HAILO_EXTERNAL_DIR ${CMAKE_CURRENT_LIST_DIR}/external)
 set(HAILO_EXTERNALS_CMAKE_SCRIPTS ${CMAKE_CURRENT_LIST_DIR}/cmake/external/)
-
-# Add target protobuf directory and exclude its targets from all
-# Disable protobuf tests, protoc and MSVC static runtime unless they are already defined
-# NOTE: we can also force - set(protobuf_BUILD_TESTS OFF CACHE BOOL "Build protobuf tests" FORCE)
-if(NOT protobuf_BUILD_TESTS)
-    set(protobuf_BUILD_TESTS OFF CACHE BOOL "Build protobuf tests")
-endif()
-if(NOT protobuf_BUILD_PROTOC_BINARIES)
-    set(protobuf_BUILD_PROTOC_BINARIES OFF CACHE BOOL "Build libprotoc and protoc compiler")
-endif()
-if(MSVC AND NOT protobuf_MSVC_STATIC_RUNTIME)
-    set(protobuf_MSVC_STATIC_RUNTIME OFF CACHE BOOL "Protobuf MSVC static runtime")
-endif()
-if(NOT protobuf_WITH_ZLIB)
-    set(protobuf_WITH_ZLIB OFF CACHE BOOL "Compile protobuf with zlib")
-endif()
-add_subdirectory(external/protobuf/cmake EXCLUDE_FROM_ALL)
-if(NOT MSVC)
-    set_target_properties(libprotobuf PROPERTIES POSITION_INDEPENDENT_CODE ON)
-    set_target_properties(libprotobuf-lite PROPERTIES POSITION_INDEPENDENT_CODE ON)
-endif()
-
+include(${HAILO_EXTERNALS_CMAKE_SCRIPTS}/protobuf.cmake)
 if(HAILO_BUILD_SERVICE)
-    if(CMAKE_HOST_UNIX)
-        set(HAILO_GRPC_CPP_PLUGIN_EXECUTABLE "${HAILO_PRE_BUILD_BUILD_TOOLS}/build_grpc/grpc_cpp_plugin")
-    else()
-        set(HAILO_GRPC_CPP_PLUGIN_EXECUTABLE "${HAILO_PRE_BUILD_BUILD_TOOLS}/build_grpc/${PRE_BUILD_BUILD_TYPE}/grpc_cpp_plugin.exe")
-    endif()
+    include(${HAILO_EXTERNALS_CMAKE_SCRIPTS}/grpc.cmake)
 endif()
-set(HAILO_PROTOBUF_PROTOC $<TARGET_FILE:protobuf::protoc>)
 
 set(HAILORT_INC_DIR ${PROJECT_SOURCE_DIR}/hailort/libhailort/include)
 set(HAILORT_SRC_DIR ${PROJECT_SOURCE_DIR}/hailort/libhailort/src)
@@ -87,19 +57,11 @@ set(COMMON_INC_DIR ${PROJECT_SOURCE_DIR}/common/include)
 set(DRIVER_INC_DIR ${PROJECT_SOURCE_DIR}/hailort/drivers/common)
 set(RPC_DIR ${PROJECT_SOURCE_DIR}/hailort/rpc)
 
-add_subdirectory(external/CLI11 EXCLUDE_FROM_ALL)
 if(CMAKE_SYSTEM_NAME STREQUAL QNX)
     include(${HAILO_EXTERNALS_CMAKE_SCRIPTS}/pevents.cmake)
 endif()
 
 if(HAILO_BUILD_SERVICE)
-    set(BUILD_TESTING OFF) # disabe abseil tests
-    set(gRPC_ZLIB_PROVIDER "module" CACHE STRING "Provider of zlib library")
-    # The following is an awful hack needed in order to force grpc to use our libprotobuf+liborotoc targets
-    # ('formal' options are to let grpc recompile it which causes a name conflict,
-    # or let it use find_package and take the risk it will use a different installed lib)
-    set(gRPC_PROTOBUF_PROVIDER "hack" CACHE STRING "Provider of protobuf library")
-    add_subdirectory(external/grpc EXCLUDE_FROM_ALL)
     add_subdirectory(rpc)
 endif()
 
diff --git a/hailort/LICENSE-3RD-PARTY.md b/hailort/LICENSE-3RD-PARTY.md
index 375f5d9..4868b78 100644
--- a/hailort/LICENSE-3RD-PARTY.md
+++ b/hailort/LICENSE-3RD-PARTY.md
@@ -2,7 +2,7 @@
 |:---------------------------------|:----------------------------------|:-------------------|:---------------|:----------------------------------------------|:------------------------------------------------------------------------------|
 | CLI11                            | University of Cincinnati          | 3-Clause BSD       | 2.2.0          | Fork                                          | https://github.com/hailo-ai/CLI11                                             |
 | Catch2                           | Catch2 Authors                    | BSL-1.0            | 2.13.7         | Cloned entire package                         | https://github.com/catchorg/Catch2                                            |
-| protobuf                         | Google Inc.                       | BSD                | 3.19.4         | Cloned entire package                         | https://github.com/protocolbuffers/protobuf                                   |
+| protobuf                         | Google Inc.                       | BSD                | 21.12          | Cloned entire package                         | https://github.com/protocolbuffers/protobuf                                   |
 | pybind11                         | Wenzel Jakob                      | BSD                | 2.10.1         | Cloned entire package                         | https://github.com/pybind/pybind11                                            |
 | spdlog                           | Gabi Melman                       | MIT                | 1.6.1          | Cloned entire package                         | https://github.com/gabime/spdlog                                              |
 | folly                            | Facebook, Inc. and its affiliates | Apache License 2.0 | v2020.08.17.00 | Copied only the file `folly/TokenBucket.h`    | https://github.com/facebook/folly                                             |
@@ -12,5 +12,5 @@
 | benchmark                        | Google Inc.                       | Apache License 2.0 | 1.6.0          | Cloned entire package                         | https://github.com/google/benchmark.git                                       |
 | md5                              | Alexander Peslyak                 | cut-down BSD       | -              | Copied code from website                      | http://openwall.info/wiki/people/solar/software/public-domain-source-code/md5 |
 | pevents                          | Mahmoud Al-Qudsi                  | MIT License        | master         | Cloned entire package                         | https://github.com/neosmart/pevents.git                                       |
-| grpc                             | Google Inc.                       | Apache License 2.0 | 1.46.0         | Cloned entire package                         | https://github.com/grpc/grpc                                                  |
+| grpc                             | Google Inc.                       | Apache License 2.0 | 1.46.3         | Cloned entire package                         | https://github.com/grpc/grpc                                                  |
 | stb                              | Sean Barrett                      | MIT License        | 0.97           | Copied only the file `stb/stb_image_resize.h` | https://github.com/nothings/stb                                               |
\ No newline at end of file
diff --git a/hailort/cmake/execute_cmake.cmake b/hailort/cmake/execute_cmake.cmake
index b833f30..12ed41c 100644
--- a/hailort/cmake/execute_cmake.cmake
+++ b/hailort/cmake/execute_cmake.cmake
@@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.0.0)
 function(execute_process_in_clean_env)
     cmake_parse_arguments(execute_process_in_clean_env "" "RESULT_VARIABLE" "" ${ARGN})
     if(CMAKE_HOST_UNIX)
-        string(REPLACE ";" " " cmdline "${execute_process_in_clean_env_UNPARSED_ARGUMENTS}")
+        string(REPLACE ";" "' '" cmdline "'${execute_process_in_clean_env_UNPARSED_ARGUMENTS}'")
         execute_process(COMMAND env -i HOME=$ENV{HOME} PATH=/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin bash -l -c "${cmdline}" OUTPUT_QUIET RESULT_VARIABLE result)
     else()
         # TODO: make it clean env for cross compile
@@ -22,10 +22,11 @@ function(execute_cmake)
     cmake_parse_arguments(execute_cmake "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
 
     execute_process_in_clean_env(
-        "${CMAKE_COMMAND}"
-        "${execute_cmake_SOURCE_DIR}"
-        "-B${execute_cmake_BUILD_DIR}"
-        "${execute_cmake_CONFIGURE_ARGS}"
+        ${CMAKE_COMMAND}
+        ${execute_cmake_SOURCE_DIR}
+        -B ${execute_cmake_BUILD_DIR}
+        -G "${CMAKE_GENERATOR}"
+        ${execute_cmake_CONFIGURE_ARGS}
         RESULT_VARIABLE result
     )
     if(result)
@@ -34,6 +35,7 @@ function(execute_cmake)
 
     if(${execute_cmake_PARALLEL_BUILD} AND (CMAKE_GENERATOR MATCHES "Unix Makefiles"))
         execute_process(COMMAND grep -c ^processor /proc/cpuinfo OUTPUT_VARIABLE cores_count RESULT_VARIABLE result)
+        string(STRIP ${cores_count} cores_count)
         if(result)
             message(FATAL_ERROR "Failed getting the amount of cores")
         endif()
diff --git a/hailort/cmake/external/benchmark.cmake b/hailort/cmake/external/benchmark.cmake
index d012936..18e00be 100644
--- a/hailort/cmake/external/benchmark.cmake
+++ b/hailort/cmake/external/benchmark.cmake
@@ -11,17 +11,15 @@ FetchContent_Declare(
     GIT_REPOSITORY https://github.com/google/benchmark.git 
     GIT_TAG f91b6b42b1b9854772a90ae9501464a161707d1e # Version 1.6.0
     GIT_SHALLOW TRUE
-    SOURCE_DIR "${CMAKE_CURRENT_LIST_DIR}/benchmark"
-    BINARY_DIR "${CMAKE_CURRENT_LIST_DIR}/benchmark"
+    SOURCE_DIR ${HAILO_EXTERNAL_DIR}/benchmark-src
+    SUBBUILD_DIR ${HAILO_EXTERNAL_DIR}/benchmark-subbuild
 )
 
-if(NOT HAILO_OFFLINE_COMPILATION)
-    # https://stackoverflow.com/questions/65527126/disable-install-for-fetchcontent
-    FetchContent_GetProperties(benchmark)
-    if(NOT benchmark_POPULATED)
-        FetchContent_Populate(benchmark)
+# https://stackoverflow.com/questions/65527126/disable-install-for-fetchcontent
+FetchContent_GetProperties(benchmark)
+if(NOT benchmark_POPULATED)
+    FetchContent_Populate(benchmark)
+    if (NOT HAILO_EXTERNALS_EXCLUDE_TARGETS)
         add_subdirectory(${benchmark_SOURCE_DIR} ${benchmark_BINARY_DIR} EXCLUDE_FROM_ALL)
     endif()
-else()
-    add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/benchmark EXCLUDE_FROM_ALL)
 endif()
\ No newline at end of file
diff --git a/hailort/cmake/external/catch2.cmake b/hailort/cmake/external/catch2.cmake
index f8f5c2a..c7a76a1 100644
--- a/hailort/cmake/external/catch2.cmake
+++ b/hailort/cmake/external/catch2.cmake
@@ -7,17 +7,15 @@ FetchContent_Declare(
     GIT_REPOSITORY https://github.com/catchorg/Catch2.git 
     GIT_TAG c4e3767e265808590986d5db6ca1b5532a7f3d13 # Version 2.13.7
     GIT_SHALLOW TRUE
-    SOURCE_DIR "${CMAKE_CURRENT_LIST_DIR}/catch2"
-    BINARY_DIR "${CMAKE_CURRENT_LIST_DIR}/catch2"
+    SOURCE_DIR ${HAILO_EXTERNAL_DIR}/catch2-src
+    SUBBUILD_DIR ${HAILO_EXTERNAL_DIR}/catch2-subbuild
 )
 
-if(NOT HAILO_OFFLINE_COMPILATION)
-    # https://stackoverflow.com/questions/65527126/disable-install-for-fetchcontent
-    FetchContent_GetProperties(catch2)
-    if(NOT catch2_POPULATED)
-        FetchContent_Populate(catch2)
+# https://stackoverflow.com/questions/65527126/disable-install-for-fetchcontent
+FetchContent_GetProperties(catch2)
+if(NOT catch2_POPULATED)
+    FetchContent_Populate(catch2)
+    if (NOT HAILO_EXTERNALS_EXCLUDE_TARGETS)
         add_subdirectory(${catch2_SOURCE_DIR} ${catch2_BINARY_DIR} EXCLUDE_FROM_ALL)
     endif()
-else()
-    add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/catch2 EXCLUDE_FROM_ALL)
 endif()
\ No newline at end of file
diff --git a/hailort/cmake/external/cli11.cmake b/hailort/cmake/external/cli11.cmake
new file mode 100644
index 0000000..f0b90b4
--- /dev/null
+++ b/hailort/cmake/external/cli11.cmake
@@ -0,0 +1,21 @@
+cmake_minimum_required(VERSION 3.11.0)
+
+include(FetchContent)
+
+FetchContent_Declare(
+    cli11
+    GIT_REPOSITORY https://github.com/hailo-ai/CLI11.git
+    GIT_TAG ae78ac41cf225706e83f57da45117e3e90d4a5b4 # Version 2.2.0 + hailo completion
+    GIT_SHALLOW TRUE
+    SOURCE_DIR ${HAILO_EXTERNAL_DIR}/cli11-src
+    SUBBUILD_DIR ${HAILO_EXTERNAL_DIR}/cli11-subbuild
+)
+
+# https://stackoverflow.com/questions/65527126/disable-install-for-fetchcontent
+FetchContent_GetProperties(cli11)
+if(NOT cli11_POPULATED)
+    FetchContent_Populate(cli11)
+    if (NOT HAILO_EXTERNALS_EXCLUDE_TARGETS)
+        add_subdirectory(${cli11_SOURCE_DIR} ${cli11_BINARY_DIR} EXCLUDE_FROM_ALL)
+    endif()
+endif()
\ No newline at end of file
diff --git a/hailort/cmake/external/dotwriter.cmake b/hailort/cmake/external/dotwriter.cmake
index ef7d799..5ee8da9 100644
--- a/hailort/cmake/external/dotwriter.cmake
+++ b/hailort/cmake/external/dotwriter.cmake
@@ -7,17 +7,15 @@ FetchContent_Declare(
     GIT_REPOSITORY https://github.com/hailo-ai/DotWriter
     GIT_TAG e5fa8f281adca10dd342b1d32e981499b8681daf # Version master
     GIT_SHALLOW TRUE
-    SOURCE_DIR "${CMAKE_CURRENT_LIST_DIR}/dotwriter"
-    BINARY_DIR "${CMAKE_CURRENT_LIST_DIR}/dotwriter"
+    SOURCE_DIR ${HAILO_EXTERNAL_DIR}/dotwriter-src
+    SUBBUILD_DIR ${HAILO_EXTERNAL_DIR}/dotwriter-subbuild
 )
 
-if(NOT HAILO_OFFLINE_COMPILATION)
-    # https://stackoverflow.com/questions/65527126/disable-install-for-fetchcontent
-    FetchContent_GetProperties(dotwriter)
-    if(NOT dotwriter_POPULATED)
-        FetchContent_Populate(dotwriter)
+# https://stackoverflow.com/questions/65527126/disable-install-for-fetchcontent
+FetchContent_GetProperties(dotwriter)
+if(NOT dotwriter_POPULATED)
+    FetchContent_Populate(dotwriter)
+    if (NOT HAILO_EXTERNALS_EXCLUDE_TARGETS)
         add_subdirectory(${dotwriter_SOURCE_DIR} ${dotwriter_BINARY_DIR} EXCLUDE_FROM_ALL)
     endif()
-else()
-    add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/dotwriter EXCLUDE_FROM_ALL)
 endif()
\ No newline at end of file
diff --git a/hailort/cmake/external/grpc.cmake b/hailort/cmake/external/grpc.cmake
new file mode 100644
index 0000000..d93c1a6
--- /dev/null
+++ b/hailort/cmake/external/grpc.cmake
@@ -0,0 +1,50 @@
+cmake_minimum_required(VERSION 3.11.0)
+
+include(${CMAKE_CURRENT_LIST_DIR}/protobuf.cmake)
+include(FetchContent)
+
+FetchContent_Declare(
+    grpc
+    GIT_REPOSITORY  https://github.com/grpc/grpc
+    GIT_TAG         53d69cc581c5b7305708587f4f1939278477c28a # v1.46.3
+    GIT_SHALLOW     TRUE
+    SOURCE_DIR      ${HAILO_EXTERNAL_DIR}/grpc-src
+    SUBBUILD_DIR    ${HAILO_EXTERNAL_DIR}/grpc-subbuild
+)
+
+FetchContent_GetProperties(grpc)
+if(NOT grpc_POPULATED)
+    FetchContent_Populate(grpc)
+    if (NOT HAILO_EXTERNALS_EXCLUDE_TARGETS)
+        message(STATUS "Building grpc...")
+        include(${CMAKE_CURRENT_LIST_DIR}/../execute_cmake.cmake)
+        set(TOOL_BUILD_TYPE "Release")
+        execute_cmake(
+            SOURCE_DIR ${HAILO_EXTERNAL_DIR}/grpc-src
+            BUILD_DIR ${HAILO_EXTERNAL_DIR}/grpc-build
+            CONFIGURE_ARGS
+                -DCMAKE_BUILD_TYPE=${TOOL_BUILD_TYPE}
+        
+                -DgRPC_BUILD_TESTS:BOOL=OFF
+                # TODO: check flag on Windows
+                # -DgRPC_BUILD_MSVC_MP_COUNT:STRING=-1
+                -DgRPC_PROTOBUF_PROVIDER:STRING=package
+                -DgRPC_PROTOBUF_PACKAGE_TYPE:STRING=CONFIG
+                -DProtobuf_DIR:PATH=${PROTOBUF_CONFIG_DIR}
+            BUILD_ARGS
+                --config ${TOOL_BUILD_TYPE} --target grpc_cpp_plugin ${CMAKE_EXTRA_BUILD_ARGS}
+            PARALLEL_BUILD
+        )
+        
+        if(HAILO_BUILD_SERVICE)
+            # TODO: go over BUILD_TESTING vs gRPC_BUILD_TESTS. what about avoiding the hack the same way we did for grpc_cpp_plugin?
+            set(BUILD_TESTING OFF) # disabe abseil tests
+            set(gRPC_ZLIB_PROVIDER "module" CACHE STRING "Provider of zlib library")
+            # The following is an awful hack needed in order to force grpc to use our libprotobuf+liborotoc targets
+            # ('formal' options are to let grpc recompile it which causes a name conflict,
+            # or let it use find_package and take the risk it will use a different installed lib)
+            set(gRPC_PROTOBUF_PROVIDER "hack" CACHE STRING "Provider of protobuf library")
+            add_subdirectory(${grpc_SOURCE_DIR} ${grpc_BINARY_DIR} EXCLUDE_FROM_ALL)
+        endif()
+    endif()
+endif()
\ No newline at end of file
diff --git a/hailort/cmake/external/json.cmake b/hailort/cmake/external/json.cmake
index b3a30ab..8c1d4b3 100644
--- a/hailort/cmake/external/json.cmake
+++ b/hailort/cmake/external/json.cmake
@@ -7,17 +7,15 @@ FetchContent_Declare(
     GIT_REPOSITORY https://github.com/ArthurSonzogni/nlohmann_json_cmake_fetchcontent.git
     GIT_TAG 391786c6c3abdd3eeb993a3154f1f2a4cfe137a0 # Version 3.9.1
     GIT_SHALLOW TRUE
-    SOURCE_DIR "${CMAKE_CURRENT_LIST_DIR}/json"
-    BINARY_DIR "${CMAKE_CURRENT_LIST_DIR}/json"
+    SOURCE_DIR ${HAILO_EXTERNAL_DIR}/json-src
+    SUBBUILD_DIR ${HAILO_EXTERNAL_DIR}/json-subbuild
 )
 
-if(NOT HAILO_OFFLINE_COMPILATION)
-    # https://stackoverflow.com/questions/65527126/disable-install-for-fetchcontent
-    FetchContent_GetProperties(json)
-    if(NOT json_POPULATED)
-        FetchContent_Populate(json)
+# https://stackoverflow.com/questions/65527126/disable-install-for-fetchcontent
+FetchContent_GetProperties(json)
+if(NOT json_POPULATED)
+    FetchContent_Populate(json)
+    if (NOT HAILO_EXTERNALS_EXCLUDE_TARGETS)
         add_subdirectory(${json_SOURCE_DIR} ${json_BINARY_DIR} EXCLUDE_FROM_ALL)
     endif()
-else()
-    add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/json EXCLUDE_FROM_ALL)
 endif()
\ No newline at end of file
diff --git a/hailort/cmake/external/pevents.cmake b/hailort/cmake/external/pevents.cmake
index 65c3659..a04e54b 100644
--- a/hailort/cmake/external/pevents.cmake
+++ b/hailort/cmake/external/pevents.cmake
@@ -7,20 +7,17 @@ FetchContent_Declare(
     GIT_REPOSITORY https://github.com/neosmart/pevents.git
     GIT_TAG 1209b1fd1bd2e75daab4380cf43d280b90b45366 # Master
     #GIT_SHALLOW TRUE
-    SOURCE_DIR "${CMAKE_CURRENT_LIST_DIR}/pevents"
-    BINARY_DIR "${CMAKE_CURRENT_LIST_DIR}/pevents"
+    SOURCE_DIR ${HAILO_EXTERNAL_DIR}/pevents-src
+    SUBBUILD_DIR ${HAILO_EXTERNAL_DIR}/pevents-subbuild
 )
 
-if(NOT HAILO_OFFLINE_COMPILATION)
-    # https://stackoverflow.com/questions/65527126/disable-install-for-fetchcontent
-    FetchContent_GetProperties(pevents)
-    if(NOT pevents_POPULATED)
-        FetchContent_Populate(pevents)
+# https://stackoverflow.com/questions/65527126/disable-install-for-fetchcontent
+FetchContent_GetProperties(pevents)
+if(NOT pevents_POPULATED)
+    FetchContent_Populate(pevents)
+    if (NOT HAILO_EXTERNALS_EXCLUDE_TARGETS)
+        add_library(pevents STATIC EXCLUDE_FROM_ALL ${pevents_SOURCE_DIR}/src/pevents.cpp)
+        target_include_directories(pevents PUBLIC ${pevents_SOURCE_DIR}/src)
+        target_compile_definitions(pevents PRIVATE -DWFMO)
     endif()
-endif()
-
-if(NOT TARGET pevents)
-    add_library(pevents STATIC EXCLUDE_FROM_ALL ${pevents_SOURCE_DIR}/src/pevents.cpp)
-    target_include_directories(pevents PUBLIC ${pevents_SOURCE_DIR}/src)
-    target_compile_definitions(pevents PRIVATE -DWFMO)
 endif()
\ No newline at end of file
diff --git a/hailort/cmake/external/protobuf.cmake b/hailort/cmake/external/protobuf.cmake
new file mode 100644
index 0000000..1503920
--- /dev/null
+++ b/hailort/cmake/external/protobuf.cmake
@@ -0,0 +1,67 @@
+cmake_minimum_required(VERSION 3.11.0)
+
+include(FetchContent)
+
+# TODO: support cross generators - https://gitlab.kitware.com/cmake/cmake/-/issues/20536
+FetchContent_Declare(
+    protobuf
+    GIT_REPOSITORY  https://github.com/protocolbuffers/protobuf.git
+    GIT_TAG         f0dc78d7e6e331b8c6bb2d5283e06aa26883ca7c # v21.12
+    GIT_SHALLOW     TRUE
+    SOURCE_DIR      ${HAILO_EXTERNAL_DIR}/protobuf-src
+    SUBBUILD_DIR    ${HAILO_EXTERNAL_DIR}/protobuf-subbuild
+)
+
+FetchContent_GetProperties(protobuf)
+if(NOT protobuf_POPULATED)
+    FetchContent_Populate(protobuf)
+    if (NOT HAILO_EXTERNALS_EXCLUDE_TARGETS)
+        message(STATUS "Building protobuf::protoc...")
+        include(${CMAKE_CURRENT_LIST_DIR}/../execute_cmake.cmake)
+        set(TOOL_BUILD_TYPE "Release")
+        set(PROTOBUF_INSTALL_DIR ${HAILO_EXTERNAL_DIR}/protobuf-install)
+
+        execute_cmake(
+            SOURCE_DIR ${HAILO_EXTERNAL_DIR}/protobuf-src
+            BUILD_DIR ${HAILO_EXTERNAL_DIR}/protobuf-build
+            CONFIGURE_ARGS
+                -DCMAKE_BUILD_TYPE=${TOOL_BUILD_TYPE}
+                -DCMAKE_INSTALL_PREFIX=${PROTOBUF_INSTALL_DIR}
+
+                -Dprotobuf_BUILD_TESTS:BOOL=OFF
+                -Dprotobuf_WITH_ZLIB:BOOL=OFF
+                -Dprotobuf_MSVC_STATIC_RUNTIME:BOOL=OFF
+            BUILD_ARGS
+                # NOTE: We are installing instead of building protoc because "hailort\external\protobuf-build\cmake\protobuf-targets.cmake" (in Windows) is based on config type.
+                # TODO: consider importing protobuf_generate_cpp instead? will it solve it?
+                --config ${TOOL_BUILD_TYPE} --target install ${CMAKE_EXTRA_BUILD_ARGS}
+            PARALLEL_BUILD
+        )
+
+        if(WIN32)
+            set(PROTOBUF_CONFIG_DIR ${PROTOBUF_INSTALL_DIR}/cmake)
+        else()
+            set(PROTOBUF_CONFIG_DIR ${PROTOBUF_INSTALL_DIR}/lib/cmake/protobuf)
+        endif()
+
+        # Include host protobuf for protoc (https://stackoverflow.com/questions/53651181/cmake-find-protobuf-package-in-custom-directory)
+        include(${PROTOBUF_CONFIG_DIR}/protobuf-config.cmake)
+        include(${PROTOBUF_CONFIG_DIR}/protobuf-module.cmake)
+
+        set(protobuf_BUILD_TESTS OFF CACHE BOOL "Build protobuf tests" FORCE)
+        set(protobuf_BUILD_PROTOC_BINARIES OFF CACHE BOOL "Build libprotoc and protoc compiler" FORCE)
+        set(protobuf_MSVC_STATIC_RUNTIME OFF CACHE BOOL "Protobuf MSVC static runtime" FORCE)
+        set(protobuf_WITH_ZLIB OFF CACHE BOOL "Compile protobuf with zlib" FORCE)
+        add_subdirectory(${protobuf_SOURCE_DIR} ${protobuf_BINARY_DIR} EXCLUDE_FROM_ALL)
+
+        if(NOT MSVC)
+            set_target_properties(libprotobuf PROPERTIES POSITION_INDEPENDENT_CODE ON)
+            set_target_properties(libprotobuf-lite PROPERTIES POSITION_INDEPENDENT_CODE ON)
+        endif()
+    endif()
+endif()
+
+
+
+
+
diff --git a/hailort/cmake/external/pybind11.cmake b/hailort/cmake/external/pybind11.cmake
index 0b3df2d..3e6c729 100644
--- a/hailort/cmake/external/pybind11.cmake
+++ b/hailort/cmake/external/pybind11.cmake
@@ -2,34 +2,31 @@ cmake_minimum_required(VERSION 3.11.0)
 
 include(FetchContent)
 
-if(NOT PYTHON_EXECUTABLE AND PYBIND11_PYTHON_VERSION)
-    # venv version is prioritized (instead of PYBIND11_PYTHON_VERSION) if PYTHON_EXECUTABLE is not set.
-    # See https://pybind11.readthedocs.io/en/stable/changelog.html#v2-6-0-oct-21-2020
-    if((${CMAKE_VERSION} VERSION_LESS "3.22.0") AND (NOT WIN32))
-        find_package(PythonInterp ${PYBIND11_PYTHON_VERSION} REQUIRED)
-        set(PYTHON_EXECUTABLE ${Python_EXECUTABLE})
-    else()
-        find_package(Python3 ${PYBIND11_PYTHON_VERSION} REQUIRED EXACT COMPONENTS Interpreter Development)
-        set(PYTHON_EXECUTABLE ${Python3_EXECUTABLE})
-    endif()
-endif()
-
 FetchContent_Declare(
     pybind11
     GIT_REPOSITORY https://github.com/pybind/pybind11.git
     GIT_TAG 80dc998efced8ceb2be59756668a7e90e8bef917 # Version 2.10.1
     GIT_SHALLOW TRUE
-    SOURCE_DIR "${CMAKE_CURRENT_LIST_DIR}/pybind11"
-    BINARY_DIR "${CMAKE_CURRENT_LIST_DIR}/pybind11"
+    SOURCE_DIR ${HAILO_EXTERNAL_DIR}/pybind11-src
+    SUBBUILD_DIR ${HAILO_EXTERNAL_DIR}/pybind11-subbuild
 )
 
-if(NOT HAILO_OFFLINE_COMPILATION)
-    # https://stackoverflow.com/questions/65527126/disable-install-for-fetchcontent
-    FetchContent_GetProperties(pybind11)
-    if(NOT pybind11_POPULATED)
-        FetchContent_Populate(pybind11)
+# https://stackoverflow.com/questions/65527126/disable-install-for-fetchcontent
+FetchContent_GetProperties(pybind11)
+if(NOT pybind11_POPULATED)
+    FetchContent_Populate(pybind11)
+    if (NOT HAILO_EXTERNALS_EXCLUDE_TARGETS)
+        if(NOT PYTHON_EXECUTABLE AND PYBIND11_PYTHON_VERSION)
+            # venv version is prioritized (instead of PYBIND11_PYTHON_VERSION) if PYTHON_EXECUTABLE is not set.
+            # See https://pybind11.readthedocs.io/en/stable/changelog.html#v2-6-0-oct-21-2020
+            if((${CMAKE_VERSION} VERSION_LESS "3.22.0") AND (NOT WIN32))
+                find_package(PythonInterp ${PYBIND11_PYTHON_VERSION} REQUIRED)
+                set(PYTHON_EXECUTABLE ${Python_EXECUTABLE})
+            else()
+                find_package(Python3 ${PYBIND11_PYTHON_VERSION} REQUIRED EXACT COMPONENTS Interpreter Development)
+                set(PYTHON_EXECUTABLE ${Python3_EXECUTABLE})
+            endif()
+        endif()
         add_subdirectory(${pybind11_SOURCE_DIR} ${pybind11_BINARY_DIR} EXCLUDE_FROM_ALL)
     endif()
-else()
-    add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/pybind11 EXCLUDE_FROM_ALL)
 endif()
\ No newline at end of file
diff --git a/hailort/cmake/external/readerwriterqueue.cmake b/hailort/cmake/external/readerwriterqueue.cmake
index ba5b3f4..403b1bc 100644
--- a/hailort/cmake/external/readerwriterqueue.cmake
+++ b/hailort/cmake/external/readerwriterqueue.cmake
@@ -7,20 +7,17 @@ FetchContent_Declare(
     GIT_REPOSITORY https://github.com/cameron314/readerwriterqueue
     GIT_TAG 435e36540e306cac40fcfeab8cc0a22d48464509 # Version 1.0.3
     GIT_SHALLOW TRUE
-    SOURCE_DIR "${CMAKE_CURRENT_LIST_DIR}/readerwriterqueue"
-    BINARY_DIR "${CMAKE_CURRENT_LIST_DIR}/readerwriterqueue"
+    SOURCE_DIR ${HAILO_EXTERNAL_DIR}/readerwriterqueue-src
+    SUBBUILD_DIR ${HAILO_EXTERNAL_DIR}/readerwriterqueue-subbuild
 )
 
-if(NOT HAILO_OFFLINE_COMPILATION)
-    # https://stackoverflow.com/questions/65527126/disable-install-for-fetchcontent
-    FetchContent_GetProperties(readerwriterqueue)
-    if(NOT readerwriterqueue_POPULATED)
-        FetchContent_Populate(readerwriterqueue)
+# https://stackoverflow.com/questions/65527126/disable-install-for-fetchcontent
+FetchContent_GetProperties(readerwriterqueue)
+if(NOT readerwriterqueue_POPULATED)
+    FetchContent_Populate(readerwriterqueue)
+    if (NOT HAILO_EXTERNALS_EXCLUDE_TARGETS)
+        # Add readerwriterqueue as a header-only library
+        add_library(readerwriterqueue INTERFACE)
+        target_include_directories(readerwriterqueue INTERFACE ${readerwriterqueue_SOURCE_DIR})
     endif()
-endif()
-
-if(NOT TARGET readerwriterqueue)
-    # Add readerwriterqueue as a header-only library
-    add_library(readerwriterqueue INTERFACE)
-    target_include_directories(readerwriterqueue INTERFACE ${readerwriterqueue_SOURCE_DIR})
 endif()
\ No newline at end of file
diff --git a/hailort/cmake/external/spdlog.cmake b/hailort/cmake/external/spdlog.cmake
index 1cf80df..1e5502c 100644
--- a/hailort/cmake/external/spdlog.cmake
+++ b/hailort/cmake/external/spdlog.cmake
@@ -7,18 +7,16 @@ FetchContent_Declare(
     GIT_REPOSITORY https://github.com/gabime/spdlog
     GIT_TAG 22a169bc319ac06948e7ee0be6b9b0ac81386604
     GIT_SHALLOW TRUE
-    SOURCE_DIR "${CMAKE_CURRENT_LIST_DIR}/spdlog"
-    BINARY_DIR "${CMAKE_CURRENT_LIST_DIR}/spdlog"
+    SOURCE_DIR ${HAILO_EXTERNAL_DIR}/spdlog-src
+    SUBBUILD_DIR ${HAILO_EXTERNAL_DIR}/spdlog-subbuild
 )
 
-if(NOT HAILO_OFFLINE_COMPILATION)
-    # https://stackoverflow.com/questions/65527126/disable-install-for-fetchcontent
-    FetchContent_GetProperties(spdlog)
-    if(NOT spdlog_POPULATED)
-        FetchContent_Populate(spdlog)
+# https://stackoverflow.com/questions/65527126/disable-install-for-fetchcontent
+FetchContent_GetProperties(spdlog)
+if(NOT spdlog_POPULATED)
+    FetchContent_Populate(spdlog)
+    if (NOT HAILO_EXTERNALS_EXCLUDE_TARGETS)
         add_subdirectory(${spdlog_SOURCE_DIR} ${spdlog_BINARY_DIR} EXCLUDE_FROM_ALL)
+        set_target_properties(spdlog PROPERTIES POSITION_INDEPENDENT_CODE ON)
     endif()
-else()
-    add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/spdlog EXCLUDE_FROM_ALL)
 endif()
-set_target_properties(spdlog PROPERTIES POSITION_INDEPENDENT_CODE ON)
diff --git a/hailort/common/device_measurements.cpp b/hailort/common/device_measurements.cpp
index b498def..d27966a 100644
--- a/hailort/common/device_measurements.cpp
+++ b/hailort/common/device_measurements.cpp
@@ -123,23 +123,36 @@ hailo_status PowerMeasurement::sanity_check()
 
 hailo_status PowerMeasurement::start_measurement()
 {
+    auto status = m_device.stop_power_measurement();
+    CHECK_SUCCESS(status, "Failed to stop power measurement");   
+
+    status = m_device.set_power_measurement(HAILO_MEASUREMENT_BUFFER_INDEX_0, HAILO_DVM_OPTIONS_AUTO, m_measurement_type);
+    CHECK_SUCCESS(status, "Failed to start power measurement");   
+
+    //Note: important to keep the chip sampling period lower than the interval between measurements (DEFAULT_MEASUREMENTS_INTERVAL)
+    status = m_device.start_power_measurement(HAILO_AVERAGE_FACTOR_1, HAILO_SAMPLING_PERIOD_140US);
+    CHECK_SUCCESS(status, "Failed to start power measurement");
+   
     m_is_thread_running = true;
     m_thread = std::thread([this] () {
-        while (m_is_thread_running.load()) {
-            auto power_info = m_device.power_measurement(HAILO_DVM_OPTIONS_AUTO, m_measurement_type);
-            if (HAILO_SUCCESS != power_info.status()) {
-                LOGGER__ERROR("Failed to get chip's power, status = {}", power_info.status());
+        const bool clear_power_measurement_history = true;
+        while (m_is_thread_running.load()) { 
+            std::this_thread::sleep_for(DEFAULT_MEASUREMENTS_INTERVAL);
+            auto power_data = m_device.get_power_measurement(HAILO_MEASUREMENT_BUFFER_INDEX_0, clear_power_measurement_history);
+            if (HAILO_SUCCESS != power_data.status()) {
+                LOGGER__ERROR("Failed to get chip's power, status = {}", power_data.status());
                 m_is_thread_running = false;
                 break;
             }
 
             {
                 std::unique_lock<std::mutex> lock(m_mutex);
-                m_acc->add_data_point(*power_info);
-            }
-            
-            std::this_thread::sleep_for(DEFAULT_MEASUREMENTS_INTERVAL); 
+                m_acc->add_data_point(power_data->average_value);
+            } 
         }
+        auto status = m_device.stop_power_measurement();
+        CHECK_SUCCESS(status, "Failed to start power measurement");
+        return HAILO_SUCCESS;
     });
 
     return HAILO_SUCCESS;
diff --git a/hailort/common/file_utils.cpp b/hailort/common/file_utils.cpp
index 4d3faea..5360484 100644
--- a/hailort/common/file_utils.cpp
+++ b/hailort/common/file_utils.cpp
@@ -34,7 +34,7 @@ Expected<size_t> get_istream_size(std::ifstream &s)
     return Expected<size_t>(static_cast<size_t>(total_size));
 }
 
-Expected<Buffer> read_binary_file(const std::string &file_path)
+Expected<Buffer> read_binary_file(const std::string &file_path, const BufferStorageParams &output_buffer_params)
 {
     std::ifstream file(file_path, std::ios::in | std::ios::binary);
     CHECK_AS_EXPECTED(file.good(), HAILO_OPEN_FILE_FAILURE, "Error opening file {}", file_path);
@@ -42,7 +42,7 @@ Expected<Buffer> read_binary_file(const std::string &file_path)
     auto file_size = get_istream_size(file);
     CHECK_EXPECTED(file_size, "Failed to get file size");
 
-    auto buffer = Buffer::create(file_size.value());
+    auto buffer = Buffer::create(file_size.value(), output_buffer_params);
     CHECK_EXPECTED(buffer, "Failed to allocate file buffer ({} bytes}", file_size.value());
 
     // Read the data
diff --git a/hailort/common/file_utils.hpp b/hailort/common/file_utils.hpp
index 0673774..028c888 100644
--- a/hailort/common/file_utils.hpp
+++ b/hailort/common/file_utils.hpp
@@ -24,7 +24,8 @@ Expected<size_t> get_istream_size(std::ifstream &s);
 /**
  * Reads full file content into a `Buffer`
  */
-Expected<Buffer> read_binary_file(const std::string &file_path);
+Expected<Buffer> read_binary_file(const std::string &file_path,
+    const BufferStorageParams &output_buffer_params = {});
 
 } /* namespace hailort */
 
diff --git a/hailort/common/os/posix/os_utils.cpp b/hailort/common/os/posix/os_utils.cpp
index c9b0e98..6ea226e 100644
--- a/hailort/common/os/posix/os_utils.cpp
+++ b/hailort/common/os/posix/os_utils.cpp
@@ -16,7 +16,9 @@
 #include <signal.h>
 #include <sched.h>
 
-
+#if defined(__QNX__)
+#define OS_UTILS__QNX_PAGE_SIZE (4096)
+#endif /* defined(__QNX__) */
 namespace hailort
 {
 
@@ -74,6 +76,34 @@ size_t OsUtils::get_page_size()
     return page_size;
 }
 
+size_t OsUtils::get_dma_able_alignment()
+{
+#if defined(__linux__)
+    // TODO: HRT-12494 after supporting in linux, restore this code
+    // Return value if was saved already
+    // if (0 != DMA_ABLE_ALIGNMENT) {
+    //     return Expected<size_t>(DMA_ABLE_ALIGNMENT);
+    // }
+    // static const auto cacheline_size = sysconf(_SC_LEVEL1_DCACHE_LINESIZE);
+    // if (-1 == cacheline_size) {
+    //     return make_unexpected(HAILO_INTERNAL_FAILURE);
+    // }
+
+    // // Set static variable to value - so dont need to fetch actual value every function call
+    // // TODO HRT-12459: Currently use DMA_ABLE_ALIGNMENT_WRITE_HW_LIMITATION as minimum until after debug - seeing as all
+    // // Funtions currently calling this function are for write
+    // DMA_ABLE_ALIGNMENT = std::max(HailoRTCommon::DMA_ABLE_ALIGNMENT_WRITE_HW_LIMITATION, static_cast<size_t>(cacheline_size));
+    // return Expected<size_t>(DMA_ABLE_ALIGNMENT);
+
+    return get_page_size();
+
+// TODO: implement on qnx (HRT-12356) - only needed when async api is implemented on qnx
+// TODO - URT-13534 - use sys call for QNX OS to get page size
+#elif defined(__QNX__)
+    return OS_UTILS__QNX_PAGE_SIZE
+#endif
+}
+
 CursorAdjustment::CursorAdjustment(){}
 CursorAdjustment::~CursorAdjustment(){}
 
diff --git a/hailort/common/os/windows/os_utils.cpp b/hailort/common/os/windows/os_utils.cpp
index 3d4022f..b1d2d8c 100644
--- a/hailort/common/os/windows/os_utils.cpp
+++ b/hailort/common/os/windows/os_utils.cpp
@@ -14,6 +14,8 @@
 #include <windows.h>
 #include "spdlog/sinks/win_eventlog_sink.h"
 
+#define CACHE_LEVEL_INDEX (1)
+
 namespace hailort
 {
 
@@ -78,6 +80,45 @@ size_t OsUtils::get_page_size()
     return page_size;
 }
 
+size_t OsUtils::get_dma_able_alignment()
+{
+    // // Return value if was saved already
+    // if (0 != DMA_ABLE_ALIGNMENT) {
+    //     return Expected<size_t>(DMA_ABLE_ALIGNMENT);
+    // }
+
+    // size_t cacheline_size = 0;
+    // DWORD proc_info_struct_size = 0;
+
+    // // We call this function to fail and get the size needed for SYSTEM_LOGICAL_PROCESSOR_INFORMATION struct
+    // BOOL ret_val = GetLogicalProcessorInformation(0, &proc_info_struct_size);
+    // CHECK_AS_EXPECTED((FALSE == ret_val) && (ERROR_INSUFFICIENT_BUFFER == GetLastError()), HAILO_INTERNAL_FAILURE,
+    //     "GetLogicalProcessorInformation Failed with error {}", GetLastError());
+
+    // std::shared_ptr<SYSTEM_LOGICAL_PROCESSOR_INFORMATION> proc_info(
+    //     static_cast<SYSTEM_LOGICAL_PROCESSOR_INFORMATION *>(malloc(proc_info_struct_size)), free);
+    // ret_val = GetLogicalProcessorInformation(static_cast<SYSTEM_LOGICAL_PROCESSOR_INFORMATION *>(proc_info.get()),
+    //     &proc_info_struct_size);
+    // CHECK_AS_EXPECTED(ret_val, HAILO_INTERNAL_FAILURE, "GetLogicalProcessorInformation Failed with error {}",
+    //     GetLastError());
+
+    // for (DWORD i = 0; i < proc_info_struct_size; i++) {
+    //     // Assume same cache line for all processors
+    //     if ((RelationCache == proc_info.get()[i].Relationship) && (CACHE_LEVEL_INDEX == proc_info.get()[i].Cache.Level)) {
+    //         cacheline_size = proc_info.get()[i].Cache.LineSize;
+    //         break;
+    //     }
+    // }
+
+    // // Set static variable to value - so dont need to fetch actual value every function call
+    // // TODO HRT-12459: Currently use DMA_ABLE_ALIGNMENT_WRITE_HW_LIMITATION as minimum until after debug - seeing as all
+    // // Funtions currently calling this function are for write
+    // DMA_ABLE_ALIGNMENT = std::max(HailoRTCommon::DMA_ABLE_ALIGNMENT_WRITE_HW_LIMITATION, static_cast<size_t>(cacheline_size));
+    // return Expected<size_t>(DMA_ABLE_ALIGNMENT);
+    // TODO: HRT-12495 support page-aligned address on windows
+    return get_page_size();
+}
+
 CursorAdjustment::CursorAdjustment()
 {
     // Enables Vitual Terminal Processing - enables ANSI Escape Sequences on Windows
diff --git a/hailort/common/os_utils.hpp b/hailort/common/os_utils.hpp
index 025ef1d..0766f90 100644
--- a/hailort/common/os_utils.hpp
+++ b/hailort/common/os_utils.hpp
@@ -11,6 +11,8 @@
 #define _HAILO_OS_UTILS_HPP_
 
 #include "hailo/hailort.h"
+#include "hailo/expected.hpp"
+#include "hailo/hailort_common.hpp"
 
 #include "common/logger_macros.hpp"
 
@@ -63,6 +65,7 @@ public:
     static void set_current_thread_name(const std::string &name);
     static hailo_status set_current_thread_affinity(uint8_t cpu_index);
     static size_t get_page_size();
+    static size_t get_dma_able_alignment();
 };
 
 } /* namespace hailort */
diff --git a/hailort/common/utils.hpp b/hailort/common/utils.hpp
index 7e23b12..57046ed 100644
--- a/hailort/common/utils.hpp
+++ b/hailort/common/utils.hpp
@@ -244,13 +244,13 @@ _ISEMPTY(                                                               \
     } while(0)
 #define CHECK_AS_RPC_STATUS(cond, reply, ret_val, ...) _CHECK_AS_RPC_STATUS((cond), (reply), (ret_val), ISEMPTY(__VA_ARGS__), "" __VA_ARGS__)
 
-#define _CHECK_GRPC_STATUS(status, ret_val, warning_msg)                                                    \
-    do {                                                                                                    \
-        if (!status.ok()) {                                                                                 \
-            LOGGER__ERROR("CHECK_GRPC_STATUS failed with error massage: {}.", status.error_message());      \
-            LOGGER__WARNING(warning_msg);                                                                   \
-            return ret_val;                                                                                 \
-        }                                                                                                   \
+#define _CHECK_GRPC_STATUS(status, ret_val, warning_msg)                                                                         \
+    do {                                                                                                                         \
+        if (!status.ok()) {                                                                                                      \
+            LOGGER__ERROR("CHECK_GRPC_STATUS failed with error code: {}.", status.error_code());                                 \
+            LOGGER__WARNING(warning_msg);                                                                                        \
+            return ret_val;                                                                                                      \
+        }                                                                                                                        \
     } while(0)
 
 #define SERVICE_WARNING_MSG ("Make sure HailoRT service is enabled and active!")
diff --git a/hailort/hailort_service/CMakeLists.txt b/hailort/hailort_service/CMakeLists.txt
index c2f0cbe..d302b2e 100644
--- a/hailort/hailort_service/CMakeLists.txt
+++ b/hailort/hailort_service/CMakeLists.txt
@@ -24,6 +24,7 @@ target_link_libraries(hailort_service
     spdlog::spdlog
     grpc++_unsecure
     hailort_rpc_grpc_proto
+    readerwriterqueue
 )
 if(WIN32)
     # Needed in order to compile eth utils (we compile here ${HAILORT_COMMON_CPP_SOURCES}, consider removing)
@@ -35,6 +36,7 @@ target_include_directories(hailort_service
     ${CMAKE_CURRENT_SOURCE_DIR}
     ${HAILORT_INC_DIR}
     ${HAILORT_COMMON_DIR}
+    ${HAILORT_SRC_DIR}
     ${COMMON_INC_DIR}
     ${RPC_DIR}
 )
diff --git a/hailort/hailort_service/hailort_rpc_service.cpp b/hailort/hailort_service/hailort_rpc_service.cpp
index b401895..482c93a 100644
--- a/hailort/hailort_service/hailort_rpc_service.cpp
+++ b/hailort/hailort_service/hailort_rpc_service.cpp
@@ -18,6 +18,15 @@
 #include "hailort_rpc_service.hpp"
 #include "rpc/rpc_definitions.hpp"
 #include "service_resource_manager.hpp"
+#include "net_flow/ops/op_metadata.hpp"
+#include "net_flow/ops/nms_post_process.hpp"
+#include "net_flow/ops/yolov8_post_process.hpp"
+#include "net_flow/ops/ssd_post_process.hpp"
+#include "net_flow/ops/yolox_post_process.hpp"
+#include "net_flow/ops/yolov5_op_metadata.hpp"
+#include "net_flow/ops/yolov5_seg_op_metadata.hpp"
+
+#include "hef/layer_info.hpp"
 
 #include <thread>
 
@@ -34,106 +43,39 @@ HailoRtRpcService::HailoRtRpcService()
 
 hailo_status HailoRtRpcService::flush_input_vstream(uint32_t handle)
 {
-    if (is_input_vstream_aborted(handle)) {
-        return HAILO_SUCCESS;
-    }
-
     auto lambda = [](std::shared_ptr<InputVStream> input_vstream) {
         return input_vstream->flush();
     };
     auto &manager = ServiceResourceManager<InputVStream>::get_instance();
-    auto status = manager.execute<hailo_status>(handle, lambda);
-    if (HAILO_SUCCESS != status) {
-        LOGGER__ERROR("Failed to flush input vstream with status {}", status);
-    }
-    return status;
+    auto status = manager.execute(handle, lambda);
+    CHECK_SUCCESS(status, "Failed to flush input vstream with status {}", status);
+
+    return HAILO_SUCCESS;
 }
 
 
 hailo_status HailoRtRpcService::abort_input_vstream(uint32_t handle)
 {
-    if (is_input_vstream_aborted(handle)) {
-        return HAILO_SUCCESS;
-    }
-
     auto lambda = [](std::shared_ptr<InputVStream> input_vstream) {
         return input_vstream->abort();
     };
     auto &manager = ServiceResourceManager<InputVStream>::get_instance();
-    auto status = manager.execute<hailo_status>(handle, lambda);
-    if (HAILO_SUCCESS != status) {
-        LOGGER__ERROR("Failed to abort input vstream with status {}", status);
-    }
-    return status;
+    auto status = manager.execute(handle, lambda);
+    CHECK_SUCCESS(status, "Failed to abort input vstream with status {}", status);
+
+    return HAILO_SUCCESS;
 }
 
 hailo_status HailoRtRpcService::abort_output_vstream(uint32_t handle)
 {
-    if (is_output_vstream_aborted(handle)) {
-        return HAILO_SUCCESS;
-    }
-
     auto lambda = [](std::shared_ptr<OutputVStream> output_vstream) {
         return output_vstream->abort();
     };
     auto &manager = ServiceResourceManager<OutputVStream>::get_instance();
-    auto status = manager.execute<hailo_status>(handle, lambda);
-    if (HAILO_SUCCESS != status) {
-        LOGGER__ERROR("Failed to abort output vstream with status {}", status);
-    }
-    return status;
-}
-
-bool HailoRtRpcService::is_input_vstream_aborted(uint32_t handle)
-{
-    auto lambda = [](std::shared_ptr<InputVStream> input_vstream) {
-        return input_vstream->is_aborted();
-    };
-    auto &manager = ServiceResourceManager<InputVStream>::get_instance();
-    return manager.execute<bool>(handle, lambda);
-}
-
-bool HailoRtRpcService::is_output_vstream_aborted(uint32_t handle)
-{
-    auto lambda = [](std::shared_ptr<OutputVStream> output_vstream) {
-        return output_vstream->is_aborted();
-    };
-    auto &manager = ServiceResourceManager<OutputVStream>::get_instance();
-    return manager.execute<bool>(handle, lambda);
-}
-
-hailo_status HailoRtRpcService::resume_input_vstream(uint32_t handle)
-{
-    if (!is_input_vstream_aborted(handle)) {
-        return HAILO_SUCCESS;
-    }
-
-    auto lambda = [](std::shared_ptr<InputVStream> input_vstream) {
-        return input_vstream->resume();
-    };
-    auto &manager = ServiceResourceManager<InputVStream>::get_instance();
-    auto status = manager.execute<hailo_status>(handle, lambda);
-    if (HAILO_SUCCESS != status) {
-        LOGGER__ERROR("Failed to resume input vstream with status {}", status);
-    }
-    return status;
-}
-
-hailo_status HailoRtRpcService::resume_output_vstream(uint32_t handle)
-{
-    if (!is_output_vstream_aborted(handle)) {
-        return HAILO_SUCCESS;
-    }
+    auto status = manager.execute(handle, lambda);
+    CHECK_SUCCESS(status, "Failed to abort output vstream with status {}", status);
 
-    auto lambda = [](std::shared_ptr<OutputVStream> output_vstream) {
-        return output_vstream->resume();
-    };
-    auto &manager = ServiceResourceManager<OutputVStream>::get_instance();
-    auto status = manager.execute<hailo_status>(handle, lambda);
-    if (HAILO_SUCCESS != status) {
-        LOGGER__ERROR("Failed to resume output vstream with status {}", status);
-    }
-    return status;
+    return HAILO_SUCCESS;
 }
 
 // TODO: Add a named templated release functions for InputVStream and OutputVStream to call abort before release.
@@ -244,9 +186,22 @@ grpc::Status HailoRtRpcService::VDevice_create(grpc::ServerContext *, const VDev
     CHECK_EXPECTED_AS_RPC_STATUS(vdevice, reply);
 
     update_client_id_timestamp(request->pid());
-    auto &manager = ServiceResourceManager<VDevice>::get_instance();
-    auto handle = manager.register_resource(request->pid(), std::move(vdevice.release()));
-    reply->set_handle(handle);
+    std::unique_lock<std::mutex> lock(m_vdevice_creation_mutex);
+    auto &vdevice_manager = ServiceResourceManager<VDevice>::get_instance();
+    auto vdevice_handle = vdevice_manager.register_resource(request->pid(), std::move(vdevice.release()));
+
+    auto cb_queue = VDeviceCallbacksQueue::create(MAX_QUEUE_SIZE);
+    CHECK_EXPECTED_AS_RPC_STATUS(cb_queue, reply);
+
+    auto &cb_queue_manager = ServiceResourceManager<VDeviceCallbacksQueue>::get_instance();
+    auto cb_queue_handle = cb_queue_manager.register_resource(request->pid(), std::move(cb_queue.release()));
+    if (cb_queue_handle != vdevice_handle) {
+        LOGGER__ERROR("cb_queue_handle = {} must be equal to vdevice_handle ={}", cb_queue_handle, vdevice_handle);
+        reply->set_status(HAILO_INTERNAL_FAILURE);
+        return grpc::Status::OK;
+    }
+
+    reply->set_handle(vdevice_handle);
     reply->set_status(static_cast<uint32_t>(HAILO_SUCCESS));
     return grpc::Status::OK;
 }
@@ -254,6 +209,16 @@ grpc::Status HailoRtRpcService::VDevice_create(grpc::ServerContext *, const VDev
 grpc::Status HailoRtRpcService::VDevice_release(grpc::ServerContext*, const Release_Request *request,
     Release_Reply *reply)
 {
+    auto lambda = [](std::shared_ptr<VDeviceCallbacksQueue> cb_queue) {
+        return cb_queue->shutdown();
+    };
+
+    auto &cb_queue_manager = ServiceResourceManager<VDeviceCallbacksQueue>::get_instance();
+    auto status = cb_queue_manager.execute(request->vdevice_identifier().vdevice_handle(), lambda);
+    CHECK_SUCCESS_AS_RPC_STATUS(status, reply);
+
+    cb_queue_manager.release_resource(request->vdevice_identifier().vdevice_handle(), request->pid());
+
     auto &manager = ServiceResourceManager<VDevice>::get_instance();
     manager.release_resource(request->vdevice_identifier().vdevice_handle(), request->pid());
     reply->set_status(static_cast<uint32_t>(HAILO_SUCCESS));
@@ -312,7 +277,7 @@ grpc::Status HailoRtRpcService::VDevice_configure(grpc::ServerContext*, const VD
     auto &vdevice_manager = ServiceResourceManager<VDevice>::get_instance();
     auto networks = vdevice_manager.execute<Expected<ConfiguredNetworkGroupVector>>(request->identifier().vdevice_handle(), lambda,
         hef.release(), configure_params_map);
-    CHECK_SUCCESS_AS_RPC_STATUS(networks.status(), reply);
+    CHECK_EXPECTED_AS_RPC_STATUS(networks, reply);
 
     auto &networks_manager = ServiceResourceManager<ConfiguredNetworkGroup>::get_instance();
     for (auto network : networks.value()) {
@@ -355,6 +320,42 @@ grpc::Status HailoRtRpcService::VDevice_get_default_streams_interface(grpc::Serv
     return grpc::Status::OK;
 }
 
+grpc::Status HailoRtRpcService::VDevice_get_callback_id(grpc::ServerContext*,
+    const VDevice_get_callback_id_Request* request, VDevice_get_callback_id_Reply* reply)
+{
+    auto lambda = [](std::shared_ptr<VDeviceCallbacksQueue> cb_queue) {
+        // TODO: HRT-12360 - Add a `dequeue_all` function that returns all the cb_ids currently in the queue.
+        // (Need to think on the shutdown case)
+        return cb_queue->dequeue();
+    };
+
+    auto &cb_queue_manager = ServiceResourceManager<VDeviceCallbacksQueue>::get_instance();
+    auto cb_id_expected = cb_queue_manager.execute<Expected<ProtoCallbackIdentifier>>(request->identifier().vdevice_handle(), lambda);
+    if (cb_id_expected.status() == HAILO_SHUTDOWN_EVENT_SIGNALED) {
+        reply->set_status(static_cast<uint32_t>(HAILO_SHUTDOWN_EVENT_SIGNALED));
+        return grpc::Status::OK;
+    }
+    CHECK_EXPECTED_AS_RPC_STATUS(cb_id_expected, reply);
+    auto proto_callback_id = reply->mutable_callback_id();
+    *proto_callback_id = cb_id_expected.release();
+    reply->set_status(static_cast<uint32_t>(HAILO_SUCCESS));
+    return grpc::Status::OK;
+}
+
+grpc::Status HailoRtRpcService::VDevice_finish_callback_listener(grpc::ServerContext*,
+    const VDevice_finish_callback_listener_Request* request, VDevice_finish_callback_listener_Reply* reply)
+{
+    auto lambda = [](std::shared_ptr<VDeviceCallbacksQueue> cb_queue) {
+        return cb_queue->shutdown();
+    };
+
+    auto &cb_queue_manager = ServiceResourceManager<VDeviceCallbacksQueue>::get_instance();
+    auto status = cb_queue_manager.execute(request->identifier().vdevice_handle(), lambda);
+    CHECK_SUCCESS_AS_RPC_STATUS(status, reply);
+    reply->set_status(static_cast<uint32_t>(HAILO_SUCCESS));
+    return grpc::Status::OK;
+}
+
 grpc::Status HailoRtRpcService::ConfiguredNetworkGroup_dup_handle(grpc::ServerContext*, const ConfiguredNetworkGroup_dup_handle_Request *request,
     ConfiguredNetworkGroup_dup_handle_Reply* reply)
 {
@@ -363,7 +364,9 @@ grpc::Status HailoRtRpcService::ConfiguredNetworkGroup_dup_handle(grpc::ServerCo
 
     auto &ng_manager = ServiceResourceManager<ConfiguredNetworkGroup>::get_instance();
     auto handle = ng_manager.dup_handle(request->identifier().network_group_handle(), request->pid());
-    reply->set_handle(handle);
+    CHECK_EXPECTED_AS_RPC_STATUS(handle, reply);
+
+    reply->set_handle(handle.release());
     return grpc::Status::OK;
 }
 
@@ -376,6 +379,95 @@ grpc::Status HailoRtRpcService::ConfiguredNetworkGroup_release(grpc::ServerConte
     return grpc::Status::OK;
 }
 
+grpc::Status HailoRtRpcService::ConfiguredNetworkGroup_infer_async(grpc::ServerContext*,
+    const ConfiguredNetworkGroup_infer_async_Request *request, ConfiguredNetworkGroup_infer_async_Reply *reply)
+{
+    auto vdevice_handle = request->identifier().vdevice_handle();
+    auto ng_handle = request->identifier().network_group_handle();
+    auto infer_request_done_cb_idx = request->infer_request_done_cb_idx();
+
+    NamedBuffersCallbacks named_buffers_callbacks;
+    for (const auto &proto_transfer_request : request->transfer_requests()) {
+        auto &stream_name = proto_transfer_request.stream_name();
+        auto direction = proto_transfer_request.direction();
+        auto cb_idx = proto_transfer_request.cb_idx();
+        BufferPtr buffer;
+        if (direction == HAILO_H2D_STREAM) {
+            // TODO: Remove memcpy after HRT-12238
+            auto buffer_exp = Buffer::create_shared(reinterpret_cast<const uint8_t*>(proto_transfer_request.data().c_str()),
+                proto_transfer_request.size(), BufferStorageParams::create_dma());
+            CHECK_EXPECTED_AS_RPC_STATUS(buffer_exp, reply);
+            buffer = buffer_exp.release();
+        } else {
+            // TODO: HRT-12360 - Use buffer pool for the service reads
+            auto buffer_exp = Buffer::create_shared(proto_transfer_request.size(), BufferStorageParams::create_dma());
+            CHECK_EXPECTED_AS_RPC_STATUS(buffer_exp, reply);
+            buffer = buffer_exp.release();
+        }
+
+        std::function<void(hailo_status)> transfer_done = [vdevice_handle, ng_handle, cb_idx, stream_name, direction, buffer]
+            (hailo_status status)
+        {
+            ProtoCallbackIdentifier cb_identifier;
+            cb_identifier.set_vdevice_handle(vdevice_handle);
+            cb_identifier.set_network_group_handle(ng_handle);
+            cb_identifier.set_cb_type(CALLBACK_TYPE_TRANSFER);
+            cb_identifier.set_stream_name(stream_name);
+            cb_identifier.set_cb_idx(cb_idx);
+            cb_identifier.set_status(status);
+
+            auto lambda = [direction](std::shared_ptr<VDeviceCallbacksQueue> cb_queue, ProtoCallbackIdentifier &cb_identifier, BufferPtr buffer) {
+                if (direction == HAILO_D2H_STREAM) {
+                    cb_identifier.set_data(buffer->data(), buffer->size());
+                }
+                return cb_queue->enqueue(std::move(cb_identifier));
+            };
+
+            auto &cb_queue_manager = ServiceResourceManager<VDeviceCallbacksQueue>::get_instance();
+            auto exc_status = cb_queue_manager.execute(vdevice_handle, lambda, std::move(cb_identifier), buffer);
+            if (exc_status != HAILO_SUCCESS) {
+                LOGGER__ERROR("Failed to enqueue callback to VDeviceCallbacksQueue with status={}", status);
+            }
+        };
+        named_buffers_callbacks.emplace(stream_name, std::make_pair(MemoryView(*buffer), transfer_done));
+    }
+
+    auto infer_request_done_cb = [vdevice_handle, ng_handle, infer_request_done_cb_idx](hailo_status status){
+        ProtoCallbackIdentifier cb_identifier;
+        cb_identifier.set_vdevice_handle(vdevice_handle);
+        cb_identifier.set_network_group_handle(ng_handle);
+        cb_identifier.set_cb_type(CALLBACK_TYPE_INFER_REQUEST);
+        cb_identifier.set_cb_idx(infer_request_done_cb_idx);
+
+        auto lambda = [](std::shared_ptr<VDeviceCallbacksQueue> cb_queue, ProtoCallbackIdentifier &cb_identifier) {
+            return cb_queue->enqueue(std::move(cb_identifier));
+        };
+
+        auto &cb_queue_manager = ServiceResourceManager<VDeviceCallbacksQueue>::get_instance();
+        auto exc_status = cb_queue_manager.execute(vdevice_handle, lambda, std::move(cb_identifier));
+        if (exc_status != HAILO_SUCCESS) {
+            LOGGER__ERROR("Failed to enqueue callback to VDeviceCallbacksQueue with status={}", status);
+        }
+    };
+
+    auto lambda = [](std::shared_ptr<ConfiguredNetworkGroup> cng, NamedBuffersCallbacks &named_buffers_callbacks,
+        const std::function<void(hailo_status)> &infer_request_done_cb) {
+            return cng->infer_async(named_buffers_callbacks, infer_request_done_cb);
+    };
+
+    auto &manager = ServiceResourceManager<ConfiguredNetworkGroup>::get_instance();
+    auto status = manager.execute(request->identifier().network_group_handle(), lambda, named_buffers_callbacks, infer_request_done_cb);
+    if (HAILO_STREAM_ABORTED_BY_USER == status) {
+        LOGGER__INFO("User aborted inference");
+        reply->set_status(static_cast<uint32_t>(HAILO_STREAM_ABORTED_BY_USER));
+        return grpc::Status::OK;
+    }
+    CHECK_SUCCESS_AS_RPC_STATUS(status, reply);
+
+    reply->set_status(status);
+    return grpc::Status::OK;
+}
+
 ProtoNamedVStreamParams get_named_params(const std::string &name, const hailo_vstream_params_t &params)
 {
     ProtoNamedVStreamParams named_params;
@@ -396,15 +488,15 @@ grpc::Status HailoRtRpcService::ConfiguredNetworkGroup_make_input_vstream_params
     const ConfiguredNetworkGroup_make_input_vstream_params_Request *request,
     ConfiguredNetworkGroup_make_input_vstream_params_Reply *reply)
 {
-    auto lambda = [](std::shared_ptr<ConfiguredNetworkGroup> cng, bool quantized,
+    auto lambda = [](std::shared_ptr<ConfiguredNetworkGroup> cng,
         hailo_format_type_t format_type, uint32_t timeout_ms, uint32_t queue_size, std::string network_name) {
-            return cng->make_input_vstream_params(quantized, format_type, timeout_ms, queue_size, network_name);
+            return cng->make_input_vstream_params({}, format_type, timeout_ms, queue_size, network_name);
     };
     auto &manager = ServiceResourceManager<ConfiguredNetworkGroup>::get_instance();
-    auto expected_params = manager.execute<Expected<std::map<std::string, hailo_vstream_params_t>>>(
-        request->identifier().network_group_handle(), lambda, request->quantized(), static_cast<hailo_format_type_t>(request->format_type()),
-        request->timeout_ms(), request->queue_size(), request->network_name());
+    auto expected_params = manager.execute<Expected<std::map<std::string, hailo_vstream_params_t>>>(request->identifier().network_group_handle(),
+        lambda, static_cast<hailo_format_type_t>(request->format_type()), request->timeout_ms(), request->queue_size(), request->network_name());
     CHECK_EXPECTED_AS_RPC_STATUS(expected_params, reply);
+
     auto params_map = reply->mutable_vstream_params_map();
     auto params_map_impl = params_map->mutable_vstream_params_map();
     for (auto& name_to_params : expected_params.value()) {
@@ -419,15 +511,15 @@ grpc::Status HailoRtRpcService::ConfiguredNetworkGroup_make_output_vstream_param
     const ConfiguredNetworkGroup_make_output_vstream_params_Request *request,
     ConfiguredNetworkGroup_make_output_vstream_params_Reply *reply)
 {
-    auto lambda = [](std::shared_ptr<ConfiguredNetworkGroup> cng, bool quantized,
+    auto lambda = [](std::shared_ptr<ConfiguredNetworkGroup> cng,
         hailo_format_type_t format_type, uint32_t timeout_ms, uint32_t queue_size, std::string network_name) {
-            return cng->make_output_vstream_params(quantized, format_type, timeout_ms, queue_size, network_name);
+            return cng->make_output_vstream_params({}, format_type, timeout_ms, queue_size, network_name);
     };
     auto &manager = ServiceResourceManager<ConfiguredNetworkGroup>::get_instance();
     auto expected_params = manager.execute<Expected<std::map<std::string, hailo_vstream_params_t>>>(request->identifier().network_group_handle(), 
-        lambda, request->quantized(), static_cast<hailo_format_type_t>(request->format_type()),
-        request->timeout_ms(), request->queue_size(), request->network_name());
+        lambda, static_cast<hailo_format_type_t>(request->format_type()), request->timeout_ms(), request->queue_size(), request->network_name());
     CHECK_EXPECTED_AS_RPC_STATUS(expected_params, reply);
+
     auto params_map = reply->mutable_vstream_params_map();
     auto params_map_impl = params_map->mutable_vstream_params_map();
     for (auto& name_to_params : expected_params.value()) {
@@ -442,15 +534,16 @@ grpc::Status HailoRtRpcService::ConfiguredNetworkGroup_make_output_vstream_param
     const ConfiguredNetworkGroup_make_output_vstream_params_groups_Request *request,
     ConfiguredNetworkGroup_make_output_vstream_params_groups_Reply *reply)
 {
-    auto lambda = [](std::shared_ptr<ConfiguredNetworkGroup> cng, bool quantized,
+    auto lambda = [](std::shared_ptr<ConfiguredNetworkGroup> cng,
         hailo_format_type_t format_type, uint32_t timeout_ms, uint32_t queue_size) {
-            return cng->make_output_vstream_params_groups(quantized, format_type, timeout_ms, queue_size);
+            return cng->make_output_vstream_params_groups({}, format_type, timeout_ms, queue_size);
     };
     auto &manager = ServiceResourceManager<ConfiguredNetworkGroup>::get_instance();
     auto expected_params = manager.execute<Expected<std::vector<std::map<std::string, hailo_vstream_params_t>>>>(
-        request->identifier().network_group_handle(), lambda, request->quantized(), static_cast<hailo_format_type_t>(request->format_type()),
+        request->identifier().network_group_handle(), lambda, static_cast<hailo_format_type_t>(request->format_type()),
         request->timeout_ms(), request->queue_size());
     CHECK_EXPECTED_AS_RPC_STATUS(expected_params, reply);
+
     auto params_map_vector = reply->mutable_vstream_params_groups();
     for (auto &params_map : expected_params.value()) {
         ProtoNamedVStreamParamsMap params_map_proto;
@@ -470,28 +563,46 @@ grpc::Status HailoRtRpcService::ConfiguredNetworkGroup_get_default_stream_interf
     ConfiguredNetworkGroup_get_default_stream_interface_Reply *reply)
 {
     auto lambda = [](std::shared_ptr<ConfiguredNetworkGroup> cng) {
-            return cng->get_default_streams_interface();
+        return cng->get_default_streams_interface();
     };
     auto &net_group_manager = ServiceResourceManager<ConfiguredNetworkGroup>::get_instance();
-    auto expected_stream_interface = net_group_manager.execute<Expected<hailo_stream_interface_t>>(request->identifier().network_group_handle(), lambda);
+    auto expected_stream_interface = net_group_manager.execute<Expected<hailo_stream_interface_t>>(request->identifier().network_group_handle(),
+        lambda);
     CHECK_EXPECTED_AS_RPC_STATUS(expected_stream_interface, reply);
+
     reply->set_stream_interface(static_cast<uint32_t>(expected_stream_interface.value()));
     reply->set_status(static_cast<uint32_t>(HAILO_SUCCESS));
     return grpc::Status::OK;
 }
 
+grpc::Status HailoRtRpcService::ConfiguredNetworkGroup_shutdown(grpc::ServerContext*,
+    const ConfiguredNetworkGroup_shutdown_Request *request, ConfiguredNetworkGroup_shutdown_Reply *reply)
+{
+    auto lambda = [](std::shared_ptr<ConfiguredNetworkGroup> cng) {
+        return cng->shutdown();
+    };
+    auto &net_group_manager = ServiceResourceManager<ConfiguredNetworkGroup>::get_instance();
+    auto status = net_group_manager.execute(
+        request->identifier().network_group_handle(), lambda);
+    CHECK_SUCCESS_AS_RPC_STATUS(status, reply);
+
+    reply->set_status(status);
+    return grpc::Status::OK;
+}
+
 grpc::Status HailoRtRpcService::ConfiguredNetworkGroup_get_output_vstream_groups(grpc::ServerContext*,
     const ConfiguredNetworkGroup_get_output_vstream_groups_Request *request,
     ConfiguredNetworkGroup_get_output_vstream_groups_Reply *reply)
 {
     auto lambda = [](std::shared_ptr<ConfiguredNetworkGroup> cng) {
-            return cng->get_output_vstream_groups();
+        return cng->get_output_vstream_groups();
     };
     auto &net_group_manager = ServiceResourceManager<ConfiguredNetworkGroup>::get_instance();
     auto expected_output_vstream_groups = net_group_manager.execute<Expected<std::vector<std::vector<std::string>>>>(
         request->identifier().network_group_handle(), lambda);
     CHECK_EXPECTED_AS_RPC_STATUS(expected_output_vstream_groups, reply);
-    auto output_vstream_groups = expected_output_vstream_groups.value();
+
+    auto output_vstream_groups = expected_output_vstream_groups.release();
     auto groups_proto = reply->mutable_output_vstream_groups();
     for (auto& group : output_vstream_groups) {
         ProtoVStreamGroup group_proto;
@@ -543,17 +654,352 @@ void serialize_vstream_infos(ConfiguredNetworkGroup_get_vstream_infos_Reply *rep
     }
 }
 
+void serialize_layer_info(const LayerInfo &layer_info, ProtoLayerInfo *layer_info_proto)
+{
+    layer_info_proto->set_type(static_cast<uint32_t>(layer_info.type));
+    layer_info_proto->set_direction(static_cast<uint32_t>(layer_info.direction));
+    layer_info_proto->set_stream_index(layer_info.stream_index);
+    layer_info_proto->set_dma_engine_index(layer_info.dma_engine_index);
+    layer_info_proto->set_name(std::string(layer_info.name));
+    layer_info_proto->set_network_name(std::string(layer_info.network_name));
+    layer_info_proto->set_network_index(layer_info.network_index);
+    layer_info_proto->set_max_shmifo_size(layer_info.max_shmifo_size);
+    layer_info_proto->set_context_index(layer_info.context_index);
+    layer_info_proto->set_pad_index(layer_info.pad_index);
+
+    // Transformation and shape info
+    auto shape_proto = layer_info_proto->mutable_shape();
+    shape_proto->set_height(layer_info.shape.height);
+    shape_proto->set_width(layer_info.shape.width);
+    shape_proto->set_features(layer_info.shape.features);
+
+    auto hw_shape_proto = layer_info_proto->mutable_hw_shape();
+    hw_shape_proto->set_height(layer_info.hw_shape.height);
+    hw_shape_proto->set_width(layer_info.hw_shape.width);
+    hw_shape_proto->set_features(layer_info.hw_shape.features);
+
+    layer_info_proto->set_hw_data_bytes(layer_info.hw_data_bytes);
+
+    auto format_proto = layer_info_proto->mutable_format();
+    format_proto->set_flags(layer_info.format.flags);
+    format_proto->set_order(layer_info.format.order);
+    format_proto->set_type(layer_info.format.type);
+
+    auto single_quant_info_proto = layer_info_proto->mutable_quant_info();
+    single_quant_info_proto->set_qp_zp(layer_info.quant_info.qp_zp);
+    single_quant_info_proto->set_qp_scale(layer_info.quant_info.qp_scale);
+    single_quant_info_proto->set_limvals_min(layer_info.quant_info.limvals_min);
+    single_quant_info_proto->set_limvals_max(layer_info.quant_info.limvals_max);
+
+    auto quant_infos_proto = layer_info_proto->mutable_quant_infos();
+    for (const auto &quant_info : layer_info.quant_infos) {
+        ProtoQuantInfo proto_quant_info;
+        proto_quant_info.set_qp_zp(quant_info.qp_zp);
+        proto_quant_info.set_qp_scale(quant_info.qp_scale);
+        proto_quant_info.set_limvals_min(quant_info.limvals_min);
+        proto_quant_info.set_limvals_max(quant_info.limvals_max);
+        quant_infos_proto->Add(std::move(proto_quant_info));
+    }
+
+    auto proto_nms_info = layer_info_proto->mutable_nms_info();
+    proto_nms_info->set_number_of_classes(layer_info.nms_info.number_of_classes);
+    proto_nms_info->set_max_bboxes_per_class(layer_info.nms_info.max_bboxes_per_class);
+    proto_nms_info->set_bbox_size(layer_info.nms_info.bbox_size);
+    proto_nms_info->set_chunks_per_frame(layer_info.nms_info.chunks_per_frame);
+    proto_nms_info->set_is_defused(layer_info.nms_info.is_defused);
+    auto proto_nms_info_defuse_info = proto_nms_info->mutable_defuse_info();
+    proto_nms_info_defuse_info->set_class_group_index(layer_info.nms_info.defuse_info.class_group_index);
+    proto_nms_info_defuse_info->set_original_name(std::string(layer_info.nms_info.defuse_info.original_name));
+    proto_nms_info->set_burst_size(layer_info.nms_info.burst_size);
+    proto_nms_info->set_burst_type(static_cast<ProtoNmsBurstType>(layer_info.nms_info.burst_type));
+
+    // Mux info
+    layer_info_proto->set_is_mux(layer_info.is_mux);
+
+    auto predecessor_proto = layer_info_proto->mutable_predecessor();
+    for (const auto &pred : layer_info.predecessor) {
+        ProtoLayerInfo proto_pred;
+        serialize_layer_info(pred, &proto_pred);
+        predecessor_proto->Add(std::move(proto_pred));
+    }
+
+    layer_info_proto->set_height_gcd(layer_info.height_gcd);
+
+    auto ratios_proto = layer_info_proto->mutable_height_ratios();
+    for (const auto &height_ratio : layer_info.height_ratios) {
+        ratios_proto->Add(height_ratio);
+    }
+
+    // Multi planes info
+    layer_info_proto->set_is_multi_planar(layer_info.is_multi_planar);
+
+    auto planes_proto = layer_info_proto->mutable_planes();
+    for (const auto &pred : layer_info.planes) {
+        ProtoLayerInfo proto_pred;
+        serialize_layer_info(pred, &proto_pred);
+        planes_proto->Add(std::move(proto_pred));
+    }
+
+    layer_info_proto->set_plane_index(layer_info.plane_index);
+
+    // Defused nms info
+    layer_info_proto->set_is_defused_nms(layer_info.is_defused_nms);
+
+    auto fused_proto = layer_info_proto->mutable_fused_nms_layer();
+    for (const auto &fused : layer_info.fused_nms_layer) {
+        ProtoLayerInfo proto_fused_layer;
+        serialize_layer_info(fused, &proto_fused_layer);
+        fused_proto->Add(std::move(proto_fused_layer));
+    }
+}
+
+void serialize_buffer_metadata(const std::pair<std::string, hailort::net_flow::BufferMetaData> &pair, ProtoNamedMetadata *named_metadata_proto)
+{
+    named_metadata_proto->set_name(pair.first);
+
+    auto metadata_params_proto = named_metadata_proto->mutable_params();
+
+    auto shape_proto = metadata_params_proto->mutable_shape();
+    shape_proto->set_height(pair.second.shape.height);
+    shape_proto->set_width(pair.second.shape.width);
+    shape_proto->set_features(pair.second.shape.features);
+
+    auto padded_shape_proto = metadata_params_proto->mutable_padded_shape();
+    padded_shape_proto->set_height(pair.second.padded_shape.height);
+    padded_shape_proto->set_width(pair.second.padded_shape.width);
+    padded_shape_proto->set_features(pair.second.padded_shape.features);
+
+    auto format_proto = metadata_params_proto->mutable_format();
+    format_proto->set_type(pair.second.format.type);
+    format_proto->set_order(pair.second.format.order);
+    format_proto->set_flags(pair.second.format.flags);
+
+    auto quant_info_proto = metadata_params_proto->mutable_quant_info();
+    quant_info_proto->set_qp_zp(pair.second.quant_info.qp_zp);
+    quant_info_proto->set_qp_scale(pair.second.quant_info.qp_scale);
+    quant_info_proto->set_limvals_min(pair.second.quant_info.limvals_min);
+    quant_info_proto->set_limvals_max(pair.second.quant_info.limvals_max);
+}
+
+void serialize_input_metadata(const std::unordered_map<std::string, hailort::net_flow::BufferMetaData> &inputs_metadata, ProtoOpMetadata *op_metadata_proto)
+{
+    for (const auto &pair : inputs_metadata) {
+        auto input_metadata_proto = op_metadata_proto->add_inputs_metadata();
+        serialize_buffer_metadata(pair, input_metadata_proto);
+    }
+}
+
+void serialize_output_metadata(const std::unordered_map<std::string, hailort::net_flow::BufferMetaData> &outputs_metadata, ProtoOpMetadata *op_metadata_proto)
+{
+    for (const auto &pair : outputs_metadata) {
+        auto output_metadata_proto = op_metadata_proto->add_outputs_metadata();
+        serialize_buffer_metadata(pair, output_metadata_proto);
+    }
+}
+
+void serialize_yolov5_op_metadata(hailort::net_flow::OpMetadata &op_metadata, ProtoOpMetadata *op_metadata_proto)
+{
+    hailort::net_flow::Yolov5OpMetadata* yolov5_op_metadata = static_cast<hailort::net_flow::Yolov5OpMetadata*>(&op_metadata);
+    auto &yolov5_config = yolov5_op_metadata->yolov5_config();
+    auto yolov5_config_proto = op_metadata_proto->mutable_yolov5_config();
+
+    yolov5_config_proto->set_image_height(yolov5_config.image_height);
+    yolov5_config_proto->set_image_width(yolov5_config.image_width);
+
+    auto yolov5_config_anchors_list_proto = yolov5_config_proto->mutable_yolov5_anchors();
+    for (auto &layer_anchors_pair : yolov5_config.anchors) {
+        ProtoYolov5Anchors yolov5_anchors_proto;
+        yolov5_anchors_proto.set_layer(layer_anchors_pair.first);
+        auto yolov5_anchors_list_proto = yolov5_anchors_proto.mutable_anchors();
+        for (auto &anchor : layer_anchors_pair.second) {
+            yolov5_anchors_list_proto->Add(anchor);
+        }
+        yolov5_config_anchors_list_proto->Add(std::move(yolov5_anchors_proto));
+    }
+}
+
+void serialize_ssd_op_metadata(hailort::net_flow::OpMetadata &op_metadata, ProtoOpMetadata *op_metadata_proto)
+{
+    hailort::net_flow::SSDOpMetadata* ssd_op_metadata = static_cast<hailort::net_flow::SSDOpMetadata*>(&op_metadata);
+    auto &ssd_config = ssd_op_metadata->ssd_config();
+    auto ssd_config_proto = op_metadata_proto->mutable_ssd_config();
+
+    ssd_config_proto->set_image_height(ssd_config.image_height);
+    ssd_config_proto->set_image_width(ssd_config.image_width);
+    ssd_config_proto->set_centers_scale_factor(ssd_config.centers_scale_factor);
+    ssd_config_proto->set_bbox_dimensions_scale_factor(ssd_config.bbox_dimensions_scale_factor);
+    ssd_config_proto->set_ty_index(ssd_config.ty_index);
+    ssd_config_proto->set_tx_index(ssd_config.tx_index);
+    ssd_config_proto->set_th_index(ssd_config.th_index);
+    ssd_config_proto->set_tw_index(ssd_config.tw_index);
+    ssd_config_proto->set_normalize_boxes(ssd_config.normalize_boxes);
+
+    auto ssd_reg_to_cls_list_proto = ssd_config_proto->mutable_reg_to_cls_inputs();
+    for (auto &reg_to_cls_input : ssd_config.reg_to_cls_inputs) {
+        ProtoSSDRegToClsInputs ssd_reg_to_cls_proto;
+        ssd_reg_to_cls_proto.set_reg(reg_to_cls_input.first);
+        ssd_reg_to_cls_proto.set_cls(reg_to_cls_input.second);
+        ssd_reg_to_cls_list_proto->Add(std::move(ssd_reg_to_cls_proto));
+    }
+
+    auto ssd_anchors_list_proto = ssd_config_proto->mutable_anchors();
+    for (auto &anchors : ssd_config.anchors) {
+        ProtoSSDAnchors ssd_anchors_proto;
+        ssd_anchors_proto.set_layer(anchors.first);
+        auto ssd_anchors_per_layer_proto = ssd_anchors_proto.mutable_anchors_per_layer();
+        for (auto anchor : anchors.second) {
+            ssd_anchors_per_layer_proto->Add(anchor);
+        }
+        ssd_anchors_list_proto->Add(std::move(ssd_anchors_proto));
+    }
+}
+
+void serialize_yolov8_op_metadata(hailort::net_flow::OpMetadata &op_metadata, ProtoOpMetadata *op_metadata_proto)
+{
+    hailort::net_flow::Yolov8OpMetadata* yolov8_op_metadata = static_cast<hailort::net_flow::Yolov8OpMetadata*>(&op_metadata);
+    auto &yolov8_config = yolov8_op_metadata->yolov8_config();
+    auto yolov8_config_proto = op_metadata_proto->mutable_yolov8_config();
+
+    yolov8_config_proto->set_image_height(yolov8_config.image_height);
+    yolov8_config_proto->set_image_width(yolov8_config.image_width);
+
+    auto yolov8_reg_to_cls_list_proto = yolov8_config_proto->mutable_reg_to_cls_inputs();
+    for (auto &reg_to_cls_input : yolov8_config.reg_to_cls_inputs) {
+        ProtoYolov8MatchingLayersNames yolov8_matching_later_names_proto;
+        yolov8_matching_later_names_proto.set_reg(reg_to_cls_input.reg);
+        yolov8_matching_later_names_proto.set_cls(reg_to_cls_input.cls);
+        yolov8_matching_later_names_proto.set_stride(reg_to_cls_input.stride);
+        yolov8_reg_to_cls_list_proto->Add(std::move(yolov8_matching_later_names_proto));
+    }
+}
+
+void serialize_yolox_op_metadata(hailort::net_flow::OpMetadata &op_metadata, ProtoOpMetadata *op_metadata_proto)
+{
+    hailort::net_flow::YoloxOpMetadata* yolox_op_metadata = static_cast<hailort::net_flow::YoloxOpMetadata*>(&op_metadata);
+    auto &yolox_config = yolox_op_metadata->yolox_config();
+    auto yolox_config_proto = op_metadata_proto->mutable_yolox_config();
+
+    yolox_config_proto->set_image_height(yolox_config.image_height);
+    yolox_config_proto->set_image_width(yolox_config.image_width);
+
+    auto yolox_reg_to_cls_list_proto = yolox_config_proto->mutable_input_names();
+    for (auto &input_name : yolox_config.input_names) {
+        ProtoYoloxMatchingLayersNames yolox_input_name_proto;
+        yolox_input_name_proto.set_reg(input_name.reg);
+        yolox_input_name_proto.set_obj(input_name.obj);
+        yolox_input_name_proto.set_cls(input_name.cls);
+        yolox_reg_to_cls_list_proto->Add(std::move(yolox_input_name_proto));
+    }
+}
+
+void serialize_yolov5seg_op_metadata(hailort::net_flow::OpMetadata &op_metadata, ProtoOpMetadata *op_metadata_proto)
+{
+    hailort::net_flow::Yolov5SegOpMetadata* yolov5seg_op_metadata = static_cast<hailort::net_flow::Yolov5SegOpMetadata*>(&op_metadata);
+
+    auto &yolov5_config = yolov5seg_op_metadata->yolov5_config();
+    auto yolov5_config_proto = op_metadata_proto->mutable_yolov5_config();
+
+    yolov5_config_proto->set_image_height(yolov5_config.image_height);
+    yolov5_config_proto->set_image_width(yolov5_config.image_width);
+
+    auto yolov5_config_anchors_list_proto = yolov5_config_proto->mutable_yolov5_anchors();
+    for (auto &layer_anchors_pair : yolov5_config.anchors) {
+        ProtoYolov5Anchors yolov5_anchors_proto;
+        yolov5_anchors_proto.set_layer(layer_anchors_pair.first);
+        auto yolov5_anchors_list_proto = yolov5_anchors_proto.mutable_anchors();
+        for (auto &anchor : layer_anchors_pair.second) {
+            yolov5_anchors_list_proto->Add(anchor);
+        }
+        yolov5_config_anchors_list_proto->Add(std::move(yolov5_anchors_proto));
+    }
+
+    auto &yolov5seg_config = yolov5seg_op_metadata->yolov5seg_config();
+    auto yolov5seg_config_proto = op_metadata_proto->mutable_yolov5seg_config();
+
+    yolov5seg_config_proto->set_mask_threshold(yolov5seg_config.mask_threshold);
+    yolov5seg_config_proto->set_layer_name(yolov5seg_config.proto_layer_name);
+}
+
+void serialize_op_matadata(hailort::net_flow::OpMetadata &op_metadata, ProtoOpMetadata *op_metadata_proto)
+{
+    op_metadata_proto->set_name(std::string(op_metadata.get_name()));
+    op_metadata_proto->set_type(static_cast<uint32_t>(op_metadata.type()));
+
+    // Init + set values for inputs_metadata
+    auto &inputs_metadata = op_metadata.inputs_metadata();
+    serialize_input_metadata(inputs_metadata, op_metadata_proto);
+
+    // Init + set values for outputs_metadata
+    auto &outputs_metadata = op_metadata.outputs_metadata();
+    serialize_output_metadata(outputs_metadata, op_metadata_proto);
+
+    if ((op_metadata.type() == net_flow::OperationType::YOLOX) | (op_metadata.type() == net_flow::OperationType::YOLOV5) |
+        (op_metadata.type() == net_flow::OperationType::YOLOV8) | (op_metadata.type() == net_flow::OperationType::SSD) |
+        (op_metadata.type() == net_flow::OperationType::YOLOV5SEG) | (op_metadata.type() == net_flow::OperationType::IOU)) {
+        // NMS fields
+        hailort::net_flow::NmsOpMetadata* nms_op_metadata = static_cast<hailort::net_flow::NmsOpMetadata*>(&op_metadata);
+        auto &nms_config = nms_op_metadata->nms_config();
+        auto nms_config_proto = op_metadata_proto->mutable_nms_post_process_config();
+        nms_config_proto->set_nms_score_th(nms_config.nms_score_th);
+        nms_config_proto->set_nms_iou_th(nms_config.nms_iou_th);
+        nms_config_proto->set_max_proposals_per_class(nms_config.max_proposals_per_class);
+        nms_config_proto->set_number_of_classes(nms_config.number_of_classes);
+        nms_config_proto->set_background_removal(nms_config.background_removal);
+        nms_config_proto->set_background_removal_index(nms_config.background_removal_index);
+        nms_config_proto->set_cross_classes(nms_config.cross_classes);
+    }
+
+    switch (op_metadata.type()) {
+    case net_flow::OperationType::YOLOV5: {
+        serialize_yolov5_op_metadata(op_metadata, op_metadata_proto);
+        break;
+    }
+    case net_flow::OperationType::SSD: {
+        serialize_ssd_op_metadata(op_metadata, op_metadata_proto);
+        break;
+    }
+    case net_flow::OperationType::YOLOV8: {
+        serialize_yolov8_op_metadata(op_metadata, op_metadata_proto);
+        break;
+    }
+    case net_flow::OperationType::YOLOX: {
+        serialize_yolox_op_metadata(op_metadata, op_metadata_proto);
+        break;
+    }
+
+    case net_flow::OperationType::YOLOV5SEG: {
+        serialize_yolov5seg_op_metadata(op_metadata, op_metadata_proto);
+        break;
+    }
+    default: {
+        // IOU, SOFTMAX, ARGMAX - nothing to do, no additional members
+    }
+    }
+}
+
+void serialize_ops_metadata(std::vector<net_flow::PostProcessOpMetadataPtr> &ops_metadata, ProtoOpsMetadata *ops_metadata_proto)
+{
+    auto ops_metadata_list_proto = ops_metadata_proto->mutable_ops_metadata();
+    for (auto& op_metadata : ops_metadata) {
+        ProtoOpMetadata op_metadata_proto;
+        serialize_op_matadata(*op_metadata, &op_metadata_proto);
+        ops_metadata_list_proto->Add(std::move(op_metadata_proto));
+    }
+}
+
+
 grpc::Status HailoRtRpcService::ConfiguredNetworkGroup_get_input_vstream_infos(grpc::ServerContext*,
     const ConfiguredNetworkGroup_get_vstream_infos_Request *request,
     ConfiguredNetworkGroup_get_vstream_infos_Reply *reply)
 {
     auto lambda = [](std::shared_ptr<ConfiguredNetworkGroup> cng, std::string network_name) {
-            return cng->get_input_vstream_infos(network_name);
+        return cng->get_input_vstream_infos(network_name);
     };
     auto &net_group_manager = ServiceResourceManager<ConfiguredNetworkGroup>::get_instance();
     auto expected_vstream_infos = net_group_manager.execute<Expected<std::vector<hailo_vstream_info_t>>>(
         request->identifier().network_group_handle(), lambda, request->network_name());
     CHECK_EXPECTED_AS_RPC_STATUS(expected_vstream_infos, reply);
+
     serialize_vstream_infos(reply, expected_vstream_infos.value());
     reply->set_status(static_cast<uint32_t>(HAILO_SUCCESS));
     return grpc::Status::OK;
@@ -564,12 +1010,13 @@ grpc::Status HailoRtRpcService::ConfiguredNetworkGroup_get_output_vstream_infos(
     ConfiguredNetworkGroup_get_vstream_infos_Reply *reply)
 {
     auto lambda = [](std::shared_ptr<ConfiguredNetworkGroup> cng, std::string network_name) {
-            return cng->get_output_vstream_infos(network_name);
+        return cng->get_output_vstream_infos(network_name);
     };
     auto &net_group_manager = ServiceResourceManager<ConfiguredNetworkGroup>::get_instance();
     auto expected_vstream_infos = net_group_manager.execute<Expected<std::vector<hailo_vstream_info_t>>>(
         request->identifier().network_group_handle(), lambda, request->network_name());
     CHECK_EXPECTED_AS_RPC_STATUS(expected_vstream_infos, reply);
+
     serialize_vstream_infos(reply, expected_vstream_infos.value());
     reply->set_status(static_cast<uint32_t>(HAILO_SUCCESS));
     return grpc::Status::OK;
@@ -580,12 +1027,13 @@ grpc::Status HailoRtRpcService::ConfiguredNetworkGroup_get_all_vstream_infos(grp
     ConfiguredNetworkGroup_get_vstream_infos_Reply *reply)
 {
     auto lambda = [](std::shared_ptr<ConfiguredNetworkGroup> cng, std::string network_name) {
-            return cng->get_all_vstream_infos(network_name);
+        return cng->get_all_vstream_infos(network_name);
     };
     auto &net_group_manager = ServiceResourceManager<ConfiguredNetworkGroup>::get_instance();
     auto expected_vstream_infos = net_group_manager.execute<Expected<std::vector<hailo_vstream_info_t>>>(
         request->identifier().network_group_handle(), lambda, request->network_name());
     CHECK_EXPECTED_AS_RPC_STATUS(expected_vstream_infos, reply);
+
     serialize_vstream_infos(reply, expected_vstream_infos.value());
     reply->set_status(static_cast<uint32_t>(HAILO_SUCCESS));
     return grpc::Status::OK;
@@ -599,7 +1047,9 @@ grpc::Status HailoRtRpcService::ConfiguredNetworkGroup_is_scheduled(grpc::Server
         return cng->is_scheduled();
     };
     auto &manager = ServiceResourceManager<ConfiguredNetworkGroup>::get_instance();
-    auto is_scheduled = manager.execute<bool>(request->identifier().network_group_handle(), lambda);
+    auto is_scheduled = manager.execute<Expected<bool>>(request->identifier().network_group_handle(), lambda);
+    CHECK_EXPECTED_AS_RPC_STATUS(is_scheduled, reply);
+
     reply->set_is_scheduled(static_cast<bool>(is_scheduled));
     reply->set_status(static_cast<uint32_t>(HAILO_SUCCESS));
     return grpc::Status::OK;
@@ -610,12 +1060,13 @@ grpc::Status HailoRtRpcService::ConfiguredNetworkGroup_set_scheduler_timeout(grp
     ConfiguredNetworkGroup_set_scheduler_timeout_Reply *reply)
 {
     auto lambda = [](std::shared_ptr<ConfiguredNetworkGroup> cng, std::chrono::milliseconds timeout_ms, std::string network_name) {
-            return cng->set_scheduler_timeout(timeout_ms, network_name);
+        return cng->set_scheduler_timeout(timeout_ms, network_name);
     };
     auto &net_group_manager = ServiceResourceManager<ConfiguredNetworkGroup>::get_instance();
-    auto status = net_group_manager.execute<hailo_status>(request->identifier().network_group_handle(), lambda,
-        static_cast<std::chrono::milliseconds>(request->timeout_ms()),
-        request->network_name());
+    auto status = net_group_manager.execute(request->identifier().network_group_handle(), lambda,
+        static_cast<std::chrono::milliseconds>(request->timeout_ms()), request->network_name());
+    CHECK_SUCCESS_AS_RPC_STATUS(status, reply);
+
     reply->set_status(status);
     return grpc::Status::OK;
 }
@@ -625,11 +1076,13 @@ grpc::Status HailoRtRpcService::ConfiguredNetworkGroup_set_scheduler_threshold(g
     ConfiguredNetworkGroup_set_scheduler_threshold_Reply *reply)
 {
     auto lambda = [](std::shared_ptr<ConfiguredNetworkGroup> cng, uint32_t threshold, std::string network_name) {
-            return cng->set_scheduler_threshold(threshold, network_name);
+        return cng->set_scheduler_threshold(threshold, network_name);
     };
     auto &net_group_manager = ServiceResourceManager<ConfiguredNetworkGroup>::get_instance();
-    auto status = net_group_manager.execute<hailo_status>(request->identifier().network_group_handle(), lambda, request->threshold(),
-        request->network_name());
+    auto status = net_group_manager.execute(request->identifier().network_group_handle(), lambda,
+        request->threshold(), request->network_name());
+    CHECK_SUCCESS_AS_RPC_STATUS(status, reply);
+
     reply->set_status(status);
     return grpc::Status::OK;
 }
@@ -639,11 +1092,13 @@ grpc::Status HailoRtRpcService::ConfiguredNetworkGroup_set_scheduler_priority(gr
     ConfiguredNetworkGroup_set_scheduler_priority_Reply *reply)
 {
     auto lambda = [](std::shared_ptr<ConfiguredNetworkGroup> cng, uint8_t priority, std::string network_name) {
-            return cng->set_scheduler_priority(priority, network_name);
+        return cng->set_scheduler_priority(priority, network_name);
     };
     auto &net_group_manager = ServiceResourceManager<ConfiguredNetworkGroup>::get_instance();
-    auto status = net_group_manager.execute<hailo_status>(request->identifier().network_group_handle(), lambda, static_cast<uint8_t>(request->priority()),
-        request->network_name());
+    auto status = net_group_manager.execute(request->identifier().network_group_handle(), lambda,
+        static_cast<uint8_t>(request->priority()), request->network_name());
+    CHECK_SUCCESS_AS_RPC_STATUS(status, reply);
+
     reply->set_status(status);
     return grpc::Status::OK;
 }
@@ -653,12 +1108,13 @@ grpc::Status HailoRtRpcService::ConfiguredNetworkGroup_get_config_params(grpc::S
     ConfiguredNetworkGroup_get_config_params_Reply *reply)
 {
     auto lambda = [](std::shared_ptr<ConfiguredNetworkGroup> cng) {
-            return cng->get_config_params();
+        return cng->get_config_params();
     };
     auto &net_group_manager = ServiceResourceManager<ConfiguredNetworkGroup>::get_instance();
     auto expected_params = net_group_manager.execute<Expected<ConfigureNetworkParams>>(request->identifier().network_group_handle(), lambda);
     CHECK_EXPECTED_AS_RPC_STATUS(expected_params, reply);
-    auto net_configure_params = expected_params.value();
+
+    auto net_configure_params = expected_params.release();
     auto proto_network_configure_params = reply->mutable_params();
     proto_network_configure_params->set_batch_size(net_configure_params.batch_size);
     proto_network_configure_params->set_power_mode(net_configure_params.power_mode);
@@ -716,7 +1172,7 @@ grpc::Status HailoRtRpcService::InputVStreams_create(grpc::ServerContext *, cons
 
 
     auto lambda = [](std::shared_ptr<ConfiguredNetworkGroup> cng, const std::map<std::string, hailo_vstream_params_t> &inputs_params) {
-            return cng->create_input_vstreams(inputs_params);
+        return cng->create_input_vstreams(inputs_params);
     };
     auto vstreams_expected = net_group_manager.execute<Expected<std::vector<InputVStream>>>(network_group_handle, lambda, inputs_params);
     CHECK_EXPECTED_AS_RPC_STATUS(vstreams_expected, reply);
@@ -737,19 +1193,9 @@ grpc::Status HailoRtRpcService::InputVStream_release(grpc::ServerContext *, cons
     Release_Reply *reply)
 {
     auto vstream_handle = request->vstream_identifier().vstream_handle();
-    auto was_aborted = is_input_vstream_aborted(vstream_handle);
-    flush_input_vstream(vstream_handle);
-    abort_input_vstream(vstream_handle);
     auto &manager = ServiceResourceManager<InputVStream>::get_instance();
-    auto resource = manager.release_resource(vstream_handle, request->pid());
-    auto status = HAILO_SUCCESS;
-    if (resource && (!was_aborted)) {
-        status = resource->resume();
-        if (HAILO_SUCCESS != status) {
-            LOGGER__INFO("Failed to resume input vstream {} after destruction", resource->name());
-        }
-    }
-    reply->set_status(static_cast<uint32_t>(status));
+    manager.release_resource(vstream_handle, request->pid());
+    reply->set_status(static_cast<uint32_t>(HAILO_SUCCESS));
     return grpc::Status::OK;
 }
 
@@ -785,7 +1231,7 @@ grpc::Status HailoRtRpcService::OutputVStreams_create(grpc::ServerContext *, con
     net_group_manager.dup_handle(network_group_handle, client_pid);
 
     auto lambda = [](std::shared_ptr<ConfiguredNetworkGroup> cng, const std::map<std::string, hailo_vstream_params_t> &output_params) {
-            return cng->create_output_vstreams(output_params);
+        return cng->create_output_vstreams(output_params);
     };
     auto vstreams_expected = net_group_manager.execute<Expected<std::vector<OutputVStream>>>(network_group_handle, lambda, output_params);
     CHECK_EXPECTED_AS_RPC_STATUS(vstreams_expected, reply);
@@ -806,18 +1252,9 @@ grpc::Status HailoRtRpcService::OutputVStream_release(grpc::ServerContext *, con
     Release_Reply *reply)
 {
     auto vstream_handle = request->vstream_identifier().vstream_handle();
-    auto was_aborted = is_output_vstream_aborted(vstream_handle);
-    abort_output_vstream(vstream_handle);
     auto &manager = ServiceResourceManager<OutputVStream>::get_instance();
-    auto resource = manager.release_resource(vstream_handle, request->pid());
-    auto status = HAILO_SUCCESS;
-    if (resource && (!was_aborted)) {
-        status = resource->resume();
-        if (HAILO_SUCCESS != status) {
-            LOGGER__INFO("Failed to resume output vstream {} after destruction", resource->name());
-        }
-    }
-    reply->set_status(static_cast<uint32_t>(status));
+    manager.release_resource(vstream_handle, request->pid());
+    reply->set_status(static_cast<uint32_t>(HAILO_SUCCESS));
     return grpc::Status::OK;
 }
 
@@ -826,11 +1263,13 @@ grpc::Status HailoRtRpcService::ConfiguredNetworkGroup_name(grpc::ServerContext*
     ConfiguredNetworkGroup_name_Reply *reply)
 {
     auto lambda = [](std::shared_ptr<ConfiguredNetworkGroup> cng) {
-            return cng->name();
+        return cng->name();
     };
     auto &manager = ServiceResourceManager<ConfiguredNetworkGroup>::get_instance();
-    auto network_group_name = manager.execute<std::string>(request->identifier().network_group_handle(), lambda);
-    reply->set_network_group_name(network_group_name);
+    auto network_group_name = manager.execute<Expected<std::string>>(request->identifier().network_group_handle(), lambda);
+    CHECK_EXPECTED_AS_RPC_STATUS(network_group_name, reply);
+
+    reply->set_network_group_name(network_group_name.release());
     reply->set_status(static_cast<uint32_t>(HAILO_SUCCESS));
     return grpc::Status::OK;
 }
@@ -839,13 +1278,14 @@ grpc::Status HailoRtRpcService::InputVStream_is_multi_planar(grpc::ServerContext
         InputVStream_is_multi_planar_Reply *reply)
 {
     auto lambda = [](std::shared_ptr<InputVStream> input_vstream) {
-            return input_vstream->is_multi_planar();
+        return input_vstream->is_multi_planar();
     };
     auto &manager = ServiceResourceManager<InputVStream>::get_instance();
-    auto multi_planar = manager.execute<bool>(request->identifier().vstream_handle(), lambda);
+    auto multi_planar = manager.execute<Expected<bool>>(request->identifier().vstream_handle(), lambda);
+    CHECK_EXPECTED_AS_RPC_STATUS(multi_planar, reply);
 
     reply->set_status(static_cast<uint32_t>(HAILO_SUCCESS));
-    reply->set_is_multi_planar(multi_planar);
+    reply->set_is_multi_planar(multi_planar.release());
     return grpc::Status::OK;
 }
 
@@ -854,10 +1294,10 @@ grpc::Status HailoRtRpcService::InputVStream_write(grpc::ServerContext*, const I
 {
     std::vector<uint8_t> data(request->data().begin(), request->data().end());
     auto lambda = [](std::shared_ptr<InputVStream> input_vstream, const MemoryView &buffer) {
-            return input_vstream->write(std::move(buffer));
+        return input_vstream->write(std::move(buffer));
     };
     auto &manager = ServiceResourceManager<InputVStream>::get_instance();
-    auto status = manager.execute<hailo_status>(request->identifier().vstream_handle(), lambda, MemoryView::create_const(data.data(), data.size()));
+    auto status = manager.execute(request->identifier().vstream_handle(), lambda, MemoryView::create_const(data.data(), data.size()));
 
     if (HAILO_STREAM_ABORTED_BY_USER == status) {
         LOGGER__INFO("User aborted VStream write.");
@@ -884,10 +1324,10 @@ grpc::Status HailoRtRpcService::InputVStream_write_pix(grpc::ServerContext*, con
     }
 
     auto lambda = [](std::shared_ptr<InputVStream> input_vstream, const hailo_pix_buffer_t &buffer) {
-            return input_vstream->write(std::move(buffer));
+        return input_vstream->write(std::move(buffer));
     };
     auto &manager = ServiceResourceManager<InputVStream>::get_instance();
-    auto status = manager.execute<hailo_status>(request->identifier().vstream_handle(), lambda, pix_buffer);
+    auto status = manager.execute(request->identifier().vstream_handle(), lambda, pix_buffer);
 
     if (HAILO_STREAM_ABORTED_BY_USER == status) {
         LOGGER__INFO("User aborted VStream write.");
@@ -904,11 +1344,13 @@ grpc::Status HailoRtRpcService::ConfiguredNetworkGroup_get_network_infos(grpc::S
     ConfiguredNetworkGroup_get_network_infos_Reply *reply)
 {
     auto lambda = [](std::shared_ptr<ConfiguredNetworkGroup> cng) {
-            return cng->get_network_infos();
+        return cng->get_network_infos();
     };
     auto &manager = ServiceResourceManager<ConfiguredNetworkGroup>::get_instance();
-    auto expected_network_infos = manager.execute<Expected<std::vector<hailo_network_info_t>>>(request->identifier().network_group_handle(), lambda);
+    auto expected_network_infos = manager.execute<Expected<std::vector<hailo_network_info_t>>>(request->identifier().network_group_handle(),
+        lambda);
     CHECK_EXPECTED_AS_RPC_STATUS(expected_network_infos, reply);
+
     auto infos_proto = reply->mutable_network_infos();
     for (auto& info : expected_network_infos.value()) {
         infos_proto->Add(std::string(info.name));
@@ -922,10 +1364,11 @@ grpc::Status HailoRtRpcService::OutputVStream_read(grpc::ServerContext*, const O
 {
     std::vector<uint8_t> data(request->size());
     auto lambda = [](std::shared_ptr<OutputVStream> output_vstream, MemoryView &buffer) {
-            return output_vstream->read(std::move(buffer));
+        return output_vstream->read(std::move(buffer));
     };
     auto &manager = ServiceResourceManager<OutputVStream>::get_instance();
-    auto status = manager.execute<hailo_status>(request->identifier().vstream_handle(), lambda, MemoryView(data.data(), data.size()));
+    auto status = manager.execute(request->identifier().vstream_handle(), lambda, MemoryView(data.data(), data.size()));
+
     if (HAILO_STREAM_ABORTED_BY_USER == status) {
         LOGGER__INFO("User aborted VStream read.");
         reply->set_status(static_cast<uint32_t>(HAILO_STREAM_ABORTED_BY_USER));
@@ -942,13 +1385,15 @@ grpc::Status HailoRtRpcService::ConfiguredNetworkGroup_get_all_stream_infos(grpc
     ConfiguredNetworkGroup_get_all_stream_infos_Reply *reply)
 {
     auto lambda = [](std::shared_ptr<ConfiguredNetworkGroup> cng) {
-            return cng->get_all_stream_infos();
+        return cng->get_all_stream_infos();
     };
     auto &manager = ServiceResourceManager<ConfiguredNetworkGroup>::get_instance();
-    auto expected_stream_infos = manager.execute<Expected<std::vector<hailo_stream_info_t>>>(request->identifier().network_group_handle(), lambda);
+    auto expected_stream_infos = manager.execute<Expected<std::vector<hailo_stream_info_t>>>(request->identifier().network_group_handle(),
+        lambda);
     CHECK_EXPECTED_AS_RPC_STATUS(expected_stream_infos, reply);
+
     auto proto_stream_infos = reply->mutable_stream_infos();
-    for (auto& stream_info : expected_stream_infos.value()) {
+    for (const auto &stream_info : expected_stream_infos.value()) {
         ProtoStreamInfo proto_stream_info;
         if (stream_info.format.order == HAILO_FORMAT_ORDER_HAILO_NMS) {
             auto proto_nms_info = proto_stream_info.mutable_nms_info();
@@ -999,7 +1444,7 @@ grpc::Status HailoRtRpcService::ConfiguredNetworkGroup_get_latency_measurement(g
     ConfiguredNetworkGroup_get_latency_measurement_Reply *reply)
 {
     auto lambda = [](std::shared_ptr<ConfiguredNetworkGroup> cng, const std::string &network_name) {
-            return cng->get_latency_measurement(network_name);
+        return cng->get_latency_measurement(network_name);
     };
     auto &manager = ServiceResourceManager<ConfiguredNetworkGroup>::get_instance();
     auto expected_latency_result = manager.execute<Expected<LatencyMeasurementResult>>(
@@ -1019,11 +1464,13 @@ grpc::Status HailoRtRpcService::ConfiguredNetworkGroup_is_multi_context(grpc::Se
     ConfiguredNetworkGroup_is_multi_context_Reply *reply)
 {
     auto lambda = [](std::shared_ptr<ConfiguredNetworkGroup> cng) {
-            return cng->is_multi_context();
+        return cng->is_multi_context();
     };
     auto &manager = ServiceResourceManager<ConfiguredNetworkGroup>::get_instance();
-    auto is_multi_context = manager.execute<bool>(request->identifier().network_group_handle(), lambda);
-    reply->set_is_multi_context(static_cast<bool>(is_multi_context));
+    auto is_multi_context = manager.execute<Expected<bool>>(request->identifier().network_group_handle(), lambda);
+    CHECK_EXPECTED_AS_RPC_STATUS(is_multi_context, reply);
+
+    reply->set_is_multi_context(static_cast<bool>(is_multi_context.release()));
     reply->set_status(static_cast<uint32_t>(HAILO_SUCCESS));
     return grpc::Status::OK;
 }
@@ -1033,11 +1480,13 @@ grpc::Status HailoRtRpcService::ConfiguredNetworkGroup_get_sorted_output_names(g
     ConfiguredNetworkGroup_get_sorted_output_names_Reply *reply)
 {
     auto lambda = [](std::shared_ptr<ConfiguredNetworkGroup> cng) {
-            return cng->get_sorted_output_names();
+        return cng->get_sorted_output_names();
     };
     auto &manager = ServiceResourceManager<ConfiguredNetworkGroup>::get_instance();
-    auto sorted_output_names_expected = manager.execute<Expected<std::vector<std::string>>>(request->identifier().network_group_handle(), lambda);
+    auto sorted_output_names_expected = manager.execute<Expected<std::vector<std::string>>>(request->identifier().network_group_handle(),
+        lambda);
     CHECK_EXPECTED_AS_RPC_STATUS(sorted_output_names_expected, reply);
+
     auto sorted_output_names_proto = reply->mutable_sorted_output_names();
     for (auto &name : sorted_output_names_expected.value()) {
         sorted_output_names_proto->Add(std::move(name));
@@ -1046,17 +1495,121 @@ grpc::Status HailoRtRpcService::ConfiguredNetworkGroup_get_sorted_output_names(g
     return grpc::Status::OK;
 }
 
+grpc::Status HailoRtRpcService::ConfiguredNetworkGroup_get_min_buffer_pool_size(grpc::ServerContext*,
+    const ConfiguredNetworkGroup_get_min_buffer_pool_size_Request *request,
+    ConfiguredNetworkGroup_get_min_buffer_pool_size_Reply *reply)
+{
+    auto lambda = [](std::shared_ptr<ConfiguredNetworkGroup> cng) {
+        return cng->get_min_buffer_pool_size();
+    };
+    auto &manager = ServiceResourceManager<ConfiguredNetworkGroup>::get_instance();
+    auto min_buffer_pool_size_expected = manager.execute<Expected<size_t>>(request->identifier().network_group_handle(),
+        lambda);
+    CHECK_EXPECTED_AS_RPC_STATUS(min_buffer_pool_size_expected, reply);
+
+    reply->set_min_buffer_pool_size(static_cast<uint32_t>(min_buffer_pool_size_expected.release()));
+    reply->set_status(static_cast<uint32_t>(HAILO_SUCCESS));
+    return grpc::Status::OK;
+}
+
+grpc::Status HailoRtRpcService::ConfiguredNetworkGroup_get_layer_info(grpc::ServerContext*,
+    const ConfiguredNetworkGroup_get_layer_info_Request *request,
+    ConfiguredNetworkGroup_get_layer_info_Reply *reply)
+{
+    auto lambda = [](std::shared_ptr<ConfiguredNetworkGroup> cng, const std::string &stream_name) {
+        return cng->get_layer_info(stream_name);
+    };
+    auto &manager = ServiceResourceManager<ConfiguredNetworkGroup>::get_instance();
+    auto layer_info_expected = manager.execute<Expected<std::unique_ptr<LayerInfo>>>(
+        request->identifier().network_group_handle(), lambda, request->stream_name());
+    CHECK_EXPECTED_AS_RPC_STATUS(layer_info_expected, reply);
+
+    auto layer_info = layer_info_expected.release();
+    auto info_proto = reply->mutable_layer_info();
+    serialize_layer_info(*layer_info, info_proto);
+    reply->set_status(static_cast<uint32_t>(HAILO_SUCCESS));
+    return grpc::Status::OK;
+}
+
+grpc::Status HailoRtRpcService::ConfiguredNetworkGroup_get_ops_metadata(grpc::ServerContext*,
+    const ConfiguredNetworkGroup_get_ops_metadata_Request *request,
+    ConfiguredNetworkGroup_get_ops_metadata_Reply *reply)
+{
+    auto lambda = [](std::shared_ptr<ConfiguredNetworkGroup> cng) {
+        return cng->get_ops_metadata();
+    };
+    auto &manager = ServiceResourceManager<ConfiguredNetworkGroup>::get_instance();
+    auto ops_metadata_expected = manager.execute<Expected<std::vector<net_flow::PostProcessOpMetadataPtr>>>(
+        request->identifier().network_group_handle(), lambda);
+    CHECK_EXPECTED_AS_RPC_STATUS(ops_metadata_expected, reply);
+
+    auto ops_metadata = ops_metadata_expected.release();
+    auto ops_metadata_proto = reply->mutable_ops_metadata();
+    serialize_ops_metadata(ops_metadata, ops_metadata_proto);
+    reply->set_status(static_cast<uint32_t>(HAILO_SUCCESS));
+    return grpc::Status::OK;
+}
+
+grpc::Status HailoRtRpcService::ConfiguredNetworkGroup_set_nms_score_threshold(grpc::ServerContext*,
+    const ConfiguredNetworkGroup_set_nms_score_threshold_Request *request,
+    ConfiguredNetworkGroup_set_nms_score_threshold_Reply *reply)
+{
+    auto lambda = [](std::shared_ptr<ConfiguredNetworkGroup> cng, const std::string &edge_name, float32_t nms_score_threshold) {
+        return cng->set_nms_score_threshold(edge_name, nms_score_threshold);
+    };
+    auto &manager = ServiceResourceManager<ConfiguredNetworkGroup>::get_instance();
+    auto status = manager.execute(request->identifier().network_group_handle(), lambda,
+                                    request->edge_name(), request->nms_score_th());
+    CHECK_SUCCESS_AS_RPC_STATUS(status, reply);
+
+    reply->set_status(static_cast<uint32_t>(HAILO_SUCCESS));
+    return grpc::Status::OK;
+}
+
+grpc::Status HailoRtRpcService::ConfiguredNetworkGroup_set_nms_iou_threshold(grpc::ServerContext*,
+    const ConfiguredNetworkGroup_set_nms_iou_threshold_Request *request,
+    ConfiguredNetworkGroup_set_nms_iou_threshold_Reply *reply)
+{
+    auto lambda = [](std::shared_ptr<ConfiguredNetworkGroup> cng, const std::string &edge_name, float32_t iou_threshold) {
+        return cng->set_nms_iou_threshold(edge_name, iou_threshold);
+    };
+    auto &manager = ServiceResourceManager<ConfiguredNetworkGroup>::get_instance();
+    auto status = manager.execute(request->identifier().network_group_handle(), lambda,
+                                    request->edge_name(), request->nms_iou_th());
+    CHECK_SUCCESS_AS_RPC_STATUS(status, reply);
+
+    reply->set_status(static_cast<uint32_t>(HAILO_SUCCESS));
+    return grpc::Status::OK;
+}
+
+grpc::Status HailoRtRpcService::ConfiguredNetworkGroup_set_nms_max_bboxes_per_class(grpc::ServerContext*,
+    const ConfiguredNetworkGroup_set_nms_max_bboxes_per_class_Request *request,
+    ConfiguredNetworkGroup_set_nms_max_bboxes_per_class_Reply *reply)
+{
+    auto lambda = [](std::shared_ptr<ConfiguredNetworkGroup> cng, const std::string &edge_name, uint32_t max_bboxes) {
+        return cng->set_nms_max_bboxes_per_class(edge_name, max_bboxes);
+    };
+    auto &manager = ServiceResourceManager<ConfiguredNetworkGroup>::get_instance();
+    auto status = manager.execute(request->identifier().network_group_handle(), lambda,
+                                    request->edge_name(), request->nms_max_bboxes_per_class());
+    CHECK_SUCCESS_AS_RPC_STATUS(status, reply);
+
+    reply->set_status(static_cast<uint32_t>(HAILO_SUCCESS));
+    return grpc::Status::OK;
+}
+
 grpc::Status HailoRtRpcService::ConfiguredNetworkGroup_get_stream_names_from_vstream_name(grpc::ServerContext*,
     const ConfiguredNetworkGroup_get_stream_names_from_vstream_name_Request *request,
     ConfiguredNetworkGroup_get_stream_names_from_vstream_name_Reply *reply)
 {
     auto lambda = [](std::shared_ptr<ConfiguredNetworkGroup> cng, const std::string &vstream_name) {
-            return cng->get_stream_names_from_vstream_name(vstream_name);
+        return cng->get_stream_names_from_vstream_name(vstream_name);
     };
     auto &manager = ServiceResourceManager<ConfiguredNetworkGroup>::get_instance();
     auto streams_names_expected = manager.execute<Expected<std::vector<std::string>>>(
         request->identifier().network_group_handle(), lambda, request->vstream_name());
     CHECK_EXPECTED_AS_RPC_STATUS(streams_names_expected, reply);
+
     auto streams_names_proto = reply->mutable_streams_names();
     for (auto &name : streams_names_expected.value()) {
         streams_names_proto->Add(std::move(name));
@@ -1070,12 +1623,13 @@ grpc::Status HailoRtRpcService::ConfiguredNetworkGroup_get_vstream_names_from_st
     ConfiguredNetworkGroup_get_vstream_names_from_stream_name_Reply *reply)
 {
     auto lambda = [](std::shared_ptr<ConfiguredNetworkGroup> cng, const std::string &stream_name) {
-            return cng->get_vstream_names_from_stream_name(stream_name);
+        return cng->get_vstream_names_from_stream_name(stream_name);
     };
     auto &manager = ServiceResourceManager<ConfiguredNetworkGroup>::get_instance();
     auto vstreams_names_expected = manager.execute<Expected<std::vector<std::string>>>(
         request->identifier().network_group_handle(), lambda, request->stream_name());
     CHECK_EXPECTED_AS_RPC_STATUS(vstreams_names_expected, reply);
+
     auto vstreams_names_proto = reply->mutable_vstreams_names();
     for (auto &name : vstreams_names_expected.value()) {
         vstreams_names_proto->Add(std::move(name));
@@ -1088,11 +1642,13 @@ grpc::Status HailoRtRpcService::InputVStream_get_frame_size(grpc::ServerContext*
     VStream_get_frame_size_Reply *reply)
 {
     auto lambda = [](std::shared_ptr<InputVStream> input_vstream) {
-            return input_vstream->get_frame_size();
+        return input_vstream->get_frame_size();
     };
     auto &manager = ServiceResourceManager<InputVStream>::get_instance();
-    auto frame_size = manager.execute<size_t>(request->identifier().vstream_handle(), lambda);
-    reply->set_frame_size(static_cast<uint32_t>(frame_size));
+    auto frame_size = manager.execute<Expected<size_t>>(request->identifier().vstream_handle(), lambda);
+    CHECK_EXPECTED_AS_RPC_STATUS(frame_size, reply);
+
+    reply->set_frame_size(static_cast<uint32_t>(frame_size.release()));
     reply->set_status(static_cast<uint32_t>(HAILO_SUCCESS));
     return grpc::Status::OK;
 }
@@ -1101,11 +1657,13 @@ grpc::Status HailoRtRpcService::OutputVStream_get_frame_size(grpc::ServerContext
     VStream_get_frame_size_Reply *reply)
 {
     auto lambda = [](std::shared_ptr<OutputVStream> output_vstream) {
-            return output_vstream->get_frame_size();
+        return output_vstream->get_frame_size();
     };
     auto &manager = ServiceResourceManager<OutputVStream>::get_instance();
-    auto frame_size = manager.execute<size_t>(request->identifier().vstream_handle(), lambda);
-    reply->set_frame_size(static_cast<uint32_t>(frame_size));
+    auto frame_size = manager.execute<Expected<size_t>>(request->identifier().vstream_handle(), lambda);
+    CHECK_EXPECTED_AS_RPC_STATUS(frame_size, reply);
+
+    reply->set_frame_size(static_cast<uint32_t>(frame_size.release()));
     reply->set_status(static_cast<uint32_t>(HAILO_SUCCESS));
     return grpc::Status::OK;
 }
@@ -1122,11 +1680,14 @@ grpc::Status HailoRtRpcService::InputVStream_name(grpc::ServerContext*, const VS
     VStream_name_Reply *reply)
 {
     auto lambda = [](std::shared_ptr<InputVStream> input_vstream) {
-            return input_vstream->name();
+        return input_vstream->name();
     };
     auto &manager = ServiceResourceManager<InputVStream>::get_instance();
-    auto name = manager.execute<std::string>(request->identifier().vstream_handle(), lambda);
-    reply->set_name(name);
+    auto name = manager.execute<Expected<std::string>>(request->identifier().vstream_handle(), lambda);
+    CHECK_EXPECTED_AS_RPC_STATUS(name, reply);
+
+
+    reply->set_name(name.release());
     reply->set_status(static_cast<uint32_t>(HAILO_SUCCESS));
     return grpc::Status::OK;
 }
@@ -1135,11 +1696,13 @@ grpc::Status HailoRtRpcService::OutputVStream_name(grpc::ServerContext*, const V
     VStream_name_Reply *reply)
 {
     auto lambda = [](std::shared_ptr<OutputVStream> output_vstream) {
-            return output_vstream->name();
+        return output_vstream->name();
     };
     auto &manager = ServiceResourceManager<OutputVStream>::get_instance();
-    auto name = manager.execute<std::string>(request->identifier().vstream_handle(), lambda);
-    reply->set_name(name);
+    auto name = manager.execute<Expected<std::string>>(request->identifier().vstream_handle(), lambda);
+    CHECK_EXPECTED_AS_RPC_STATUS(name, reply);
+
+    reply->set_name(name.release());
     reply->set_status(static_cast<uint32_t>(HAILO_SUCCESS));
     return grpc::Status::OK;
 }
@@ -1148,11 +1711,13 @@ grpc::Status HailoRtRpcService::InputVStream_network_name(grpc::ServerContext*,
     VStream_network_name_Reply *reply)
 {
     auto lambda = [](std::shared_ptr<InputVStream> input_vstream) {
-            return input_vstream->network_name();
+        return input_vstream->network_name();
     };
     auto &manager = ServiceResourceManager<InputVStream>::get_instance();
-    auto name = manager.execute<std::string>(request->identifier().vstream_handle(), lambda);
-    reply->set_network_name(name);
+    auto name = manager.execute<Expected<std::string>>(request->identifier().vstream_handle(), lambda);
+    CHECK_EXPECTED_AS_RPC_STATUS(name, reply);
+
+    reply->set_network_name(name.release());
     reply->set_status(static_cast<uint32_t>(HAILO_SUCCESS));
     return grpc::Status::OK;
 }
@@ -1161,11 +1726,13 @@ grpc::Status HailoRtRpcService::OutputVStream_network_name(grpc::ServerContext*,
     VStream_network_name_Reply *reply)
 {
     auto lambda = [](std::shared_ptr<OutputVStream> output_vstream) {
-            return output_vstream->network_name();
+        return output_vstream->network_name();
     };
     auto &manager = ServiceResourceManager<OutputVStream>::get_instance();
-    auto name = manager.execute<std::string>(request->identifier().vstream_handle(), lambda);
-    reply->set_network_name(name);
+    auto name = manager.execute<Expected<std::string>>(request->identifier().vstream_handle(), lambda);
+    CHECK_EXPECTED_AS_RPC_STATUS(name, reply);
+
+    reply->set_network_name(name.release());
     reply->set_status(static_cast<uint32_t>(HAILO_SUCCESS));
     return grpc::Status::OK;
 }
@@ -1190,10 +1757,12 @@ grpc::Status HailoRtRpcService::InputVStream_resume(grpc::ServerContext*, const
     VStream_resume_Reply *reply)
 {
     auto lambda = [](std::shared_ptr<InputVStream> input_vstream) {
-            return input_vstream->resume();
+        return input_vstream->resume();
     };
     auto &manager = ServiceResourceManager<InputVStream>::get_instance();
-    auto status = manager.execute<hailo_status>(request->identifier().vstream_handle(), lambda);
+    auto status = manager.execute(request->identifier().vstream_handle(), lambda);
+    CHECK_SUCCESS_AS_RPC_STATUS(status, reply);
+
     reply->set_status(status);
     return grpc::Status::OK;
 }
@@ -1202,10 +1771,12 @@ grpc::Status HailoRtRpcService::OutputVStream_resume(grpc::ServerContext*, const
     VStream_resume_Reply *reply)
 {
     auto lambda = [](std::shared_ptr<OutputVStream> output_vstream) {
-            return output_vstream->resume();
+        return output_vstream->resume();
     };
     auto &manager = ServiceResourceManager<OutputVStream>::get_instance();
-    auto status = manager.execute<hailo_status>(request->identifier().vstream_handle(), lambda);
+    auto status = manager.execute(request->identifier().vstream_handle(), lambda);
+    CHECK_SUCCESS_AS_RPC_STATUS(status, reply);
+
     reply->set_status(status);
     return grpc::Status::OK;
 }
@@ -1214,10 +1785,12 @@ grpc::Status HailoRtRpcService::InputVStream_stop_and_clear(grpc::ServerContext*
     VStream_stop_and_clear_Reply *reply)
 {
     auto lambda = [](std::shared_ptr<InputVStream> input_vstream) {
-            return input_vstream->stop_and_clear();
+        return input_vstream->stop_and_clear();
     };
     auto &manager = ServiceResourceManager<InputVStream>::get_instance();
-    auto status = manager.execute<hailo_status>(request->identifier().vstream_handle(), lambda);
+    auto status = manager.execute(request->identifier().vstream_handle(), lambda);
+    CHECK_SUCCESS_AS_RPC_STATUS(status, reply);
+
     reply->set_status(status);
     return grpc::Status::OK;
 }
@@ -1226,10 +1799,12 @@ grpc::Status HailoRtRpcService::OutputVStream_stop_and_clear(grpc::ServerContext
     VStream_stop_and_clear_Reply *reply)
 {
     auto lambda = [](std::shared_ptr<OutputVStream> output_vstream) {
-            return output_vstream->stop_and_clear();
+        return output_vstream->stop_and_clear();
     };
     auto &manager = ServiceResourceManager<OutputVStream>::get_instance();
-    auto status = manager.execute<hailo_status>(request->identifier().vstream_handle(), lambda);
+    auto status = manager.execute(request->identifier().vstream_handle(), lambda);
+    CHECK_SUCCESS_AS_RPC_STATUS(status, reply);
+
     reply->set_status(status);
     return grpc::Status::OK;
 }
@@ -1238,10 +1813,12 @@ grpc::Status HailoRtRpcService::InputVStream_start_vstream(grpc::ServerContext*,
     VStream_start_vstream_Reply *reply)
 {
     auto lambda = [](std::shared_ptr<InputVStream> input_vstream) {
-            return input_vstream->start_vstream();
+        return input_vstream->start_vstream();
     };
     auto &manager = ServiceResourceManager<InputVStream>::get_instance();
-    auto status = manager.execute<hailo_status>(request->identifier().vstream_handle(), lambda);
+    auto status = manager.execute(request->identifier().vstream_handle(), lambda);
+    CHECK_SUCCESS_AS_RPC_STATUS(status, reply);
+
     reply->set_status(status);
     return grpc::Status::OK;
 }
@@ -1250,10 +1827,12 @@ grpc::Status HailoRtRpcService::OutputVStream_start_vstream(grpc::ServerContext*
     VStream_start_vstream_Reply *reply)
 {
     auto lambda = [](std::shared_ptr<OutputVStream> output_vstream) {
-            return output_vstream->start_vstream();
+        return output_vstream->start_vstream();
     };
     auto &manager = ServiceResourceManager<OutputVStream>::get_instance();
-    auto status = manager.execute<hailo_status>(request->identifier().vstream_handle(), lambda);
+    auto status = manager.execute(request->identifier().vstream_handle(), lambda);
+    CHECK_SUCCESS_AS_RPC_STATUS(status, reply);
+
     reply->set_status(status);
     return grpc::Status::OK;
 }
@@ -1262,10 +1841,13 @@ grpc::Status HailoRtRpcService::InputVStream_get_user_buffer_format(grpc::Server
     VStream_get_user_buffer_format_Reply *reply)
 {
     auto lambda = [](std::shared_ptr<InputVStream> input_vstream) {
-            return input_vstream->get_user_buffer_format();
+        return input_vstream->get_user_buffer_format();
     };
     auto &manager = ServiceResourceManager<InputVStream>::get_instance();
-    auto format = manager.execute<hailo_format_t>(request->identifier().vstream_handle(), lambda);
+    auto format_exp = manager.execute<Expected<hailo_format_t>>(request->identifier().vstream_handle(), lambda);
+    CHECK_EXPECTED_AS_RPC_STATUS(format_exp, reply);
+
+    auto format = format_exp.release();
     reply->set_status(static_cast<uint32_t>(HAILO_SUCCESS));
 
     auto proto_user_buffer_format = reply->mutable_user_buffer_format();
@@ -1280,10 +1862,13 @@ grpc::Status HailoRtRpcService::OutputVStream_get_user_buffer_format(grpc::Serve
     VStream_get_user_buffer_format_Reply *reply)
 {
     auto lambda = [](std::shared_ptr<OutputVStream> output_vstream) {
-            return output_vstream->get_user_buffer_format();
+        return output_vstream->get_user_buffer_format();
     };
     auto &manager = ServiceResourceManager<OutputVStream>::get_instance();
-    auto format = manager.execute<hailo_format_t>(request->identifier().vstream_handle(), lambda);
+    auto format_exp = manager.execute<Expected<hailo_format_t>>(request->identifier().vstream_handle(), lambda);
+    CHECK_EXPECTED_AS_RPC_STATUS(format_exp, reply);
+
+    auto format = format_exp.release();
     reply->set_status(static_cast<uint32_t>(HAILO_SUCCESS));
 
     auto proto_user_buffer_format = reply->mutable_user_buffer_format();
@@ -1298,10 +1883,13 @@ grpc::Status HailoRtRpcService::InputVStream_get_info(grpc::ServerContext*, cons
     VStream_get_info_Reply *reply)
 {
     auto lambda = [](std::shared_ptr<InputVStream> input_vstream) {
-            return input_vstream->get_info();
+        return input_vstream->get_info();
     };
     auto &manager = ServiceResourceManager<InputVStream>::get_instance();
-    auto info = manager.execute<hailo_vstream_info_t>(request->identifier().vstream_handle(), lambda);
+    auto info_exp = manager.execute<Expected<hailo_vstream_info_t>>(request->identifier().vstream_handle(), lambda);
+    CHECK_EXPECTED_AS_RPC_STATUS(info_exp, reply);
+
+    auto info = info_exp.release();
     auto info_proto = reply->mutable_vstream_info();
     serialize_vstream_info(info, info_proto);
     reply->set_status(static_cast<uint32_t>(HAILO_SUCCESS));
@@ -1312,10 +1900,13 @@ grpc::Status HailoRtRpcService::OutputVStream_get_info(grpc::ServerContext*, con
     VStream_get_info_Reply *reply)
 {
     auto lambda = [](std::shared_ptr<OutputVStream> output_vstream) {
-            return output_vstream->get_info();
+        return output_vstream->get_info();
     };
     auto &manager = ServiceResourceManager<OutputVStream>::get_instance();
-    auto info = manager.execute<hailo_vstream_info_t>(request->identifier().vstream_handle(), lambda);
+    auto info_exp = manager.execute<Expected<hailo_vstream_info_t>>(request->identifier().vstream_handle(), lambda);
+    CHECK_EXPECTED_AS_RPC_STATUS(info_exp, reply);
+
+    auto info = info_exp.release();
     auto info_proto = reply->mutable_vstream_info();
     serialize_vstream_info(info, info_proto);
     reply->set_status(static_cast<uint32_t>(HAILO_SUCCESS));
@@ -1326,11 +1917,13 @@ grpc::Status HailoRtRpcService::OutputVStream_is_aborted(grpc::ServerContext*, c
     VStream_is_aborted_Reply *reply)
 {
     auto lambda = [](std::shared_ptr<OutputVStream> output_vstream) {
-            return output_vstream->is_aborted();
+        return output_vstream->is_aborted();
     };
     auto &manager = ServiceResourceManager<OutputVStream>::get_instance();
-    auto is_aborted = manager.execute<bool>(request->identifier().vstream_handle(), lambda);
-    reply->set_is_aborted(is_aborted);
+    auto is_aborted = manager.execute<Expected<bool>>(request->identifier().vstream_handle(), lambda);
+    CHECK_EXPECTED_AS_RPC_STATUS(is_aborted, reply);
+
+    reply->set_is_aborted(is_aborted.release());
     reply->set_status(static_cast<uint32_t>(HAILO_SUCCESS));
     return grpc::Status::OK;
 }
@@ -1339,11 +1932,13 @@ grpc::Status HailoRtRpcService::InputVStream_is_aborted(grpc::ServerContext*, co
     VStream_is_aborted_Reply *reply)
 {
     auto lambda = [](std::shared_ptr<InputVStream> input_vstream) {
-            return input_vstream->is_aborted();
+        return input_vstream->is_aborted();
     };
     auto &manager = ServiceResourceManager<InputVStream>::get_instance();
-    auto is_aborted = manager.execute<bool>(request->identifier().vstream_handle(), lambda);
-    reply->set_is_aborted(is_aborted);
+    auto is_aborted = manager.execute<Expected<bool>>(request->identifier().vstream_handle(), lambda);
+    CHECK_EXPECTED_AS_RPC_STATUS(is_aborted, reply);
+
+    reply->set_is_aborted(is_aborted.release());
     reply->set_status(static_cast<uint32_t>(HAILO_SUCCESS));
     return grpc::Status::OK;
 }
@@ -1352,11 +1947,12 @@ grpc::Status HailoRtRpcService::OutputVStream_set_nms_score_threshold(grpc::Serv
     VStream_set_nms_score_threshold_Reply *reply)
 {
     auto lambda = [](std::shared_ptr<OutputVStream> output_vstream, float32_t threshold) {
-            return output_vstream->set_nms_score_threshold(threshold);
+        return output_vstream->set_nms_score_threshold(threshold);
     };
     auto &manager = ServiceResourceManager<OutputVStream>::get_instance();
-    auto status = manager.execute<hailo_status>(request->identifier().vstream_handle(), lambda, static_cast<float32_t>(request->threshold()));
-    CHECK_SUCCESS_AS_RPC_STATUS(status,  reply, "set_nms_score_threshold failed");
+    auto status = manager.execute(request->identifier().vstream_handle(), lambda, static_cast<float32_t>(request->threshold()));
+    CHECK_SUCCESS_AS_RPC_STATUS(status, reply, "set_nms_score_threshold failed");
+
     reply->set_status(static_cast<uint32_t>(HAILO_SUCCESS));
     return grpc::Status::OK;
 }
@@ -1365,11 +1961,12 @@ grpc::Status HailoRtRpcService::OutputVStream_set_nms_iou_threshold(grpc::Server
     VStream_set_nms_iou_threshold_Reply *reply)
 {
     auto lambda = [](std::shared_ptr<OutputVStream> output_vstream, float32_t threshold) {
-            return output_vstream->set_nms_iou_threshold(threshold);
+        return output_vstream->set_nms_iou_threshold(threshold);
     };
     auto &manager = ServiceResourceManager<OutputVStream>::get_instance();
-    auto status = manager.execute<hailo_status>(request->identifier().vstream_handle(), lambda, static_cast<float32_t>(request->threshold()));
-    CHECK_SUCCESS_AS_RPC_STATUS(status,  reply, "set_nms_iou_threshold failed");
+    auto status = manager.execute(request->identifier().vstream_handle(), lambda, static_cast<float32_t>(request->threshold()));
+    CHECK_SUCCESS_AS_RPC_STATUS(status, reply, "set_nms_iou_threshold failed");
+
     reply->set_status(static_cast<uint32_t>(HAILO_SUCCESS));
     return grpc::Status::OK;
 }
@@ -1378,11 +1975,12 @@ grpc::Status HailoRtRpcService::OutputVStream_set_nms_max_proposals_per_class(gr
     VStream_set_nms_max_proposals_per_class_Reply *reply)
 {
     auto lambda = [](std::shared_ptr<OutputVStream> output_vstream, uint32_t max_proposals_per_class) {
-            return output_vstream->set_nms_max_proposals_per_class(max_proposals_per_class);
+        return output_vstream->set_nms_max_proposals_per_class(max_proposals_per_class);
     };
     auto &manager = ServiceResourceManager<OutputVStream>::get_instance();
-    auto status = manager.execute<hailo_status>(request->identifier().vstream_handle(), lambda, static_cast<uint32_t>(request->max_proposals_per_class()));
+    auto status = manager.execute(request->identifier().vstream_handle(), lambda, static_cast<uint32_t>(request->max_proposals_per_class()));
     CHECK_SUCCESS_AS_RPC_STATUS(status,  reply, "set_nms_max_proposals_per_class failed");
+
     reply->set_status(static_cast<uint32_t>(HAILO_SUCCESS));
     return grpc::Status::OK;
 }
diff --git a/hailort/hailort_service/hailort_rpc_service.hpp b/hailort/hailort_service/hailort_rpc_service.hpp
index 3dddc38..0531e53 100644
--- a/hailort/hailort_service/hailort_rpc_service.hpp
+++ b/hailort/hailort_service/hailort_rpc_service.hpp
@@ -16,6 +16,7 @@
 #else
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wconversion"
+#pragma GCC diagnostic ignored "-Wunused-parameter"
 #endif
 #include <grpcpp/grpcpp.h>
 #include "hailort_rpc.grpc.pb.h"
@@ -25,8 +26,11 @@
 #pragma GCC diagnostic pop
 #endif
 
-#include <thread>
 #include "hailo/hailort.h"
+#include "hailo/network_group.hpp"
+#include "vdevice_callbacks_queue.hpp"
+
+#include <thread>
 
 namespace hailort
 {
@@ -50,6 +54,10 @@ public:
         VDevice_get_physical_devices_ids_Reply* reply) override;
     virtual grpc::Status VDevice_get_default_streams_interface(grpc::ServerContext*, const VDevice_get_default_streams_interface_Request* request,
         VDevice_get_default_streams_interface_Reply* reply) override;
+    virtual grpc::Status VDevice_get_callback_id(grpc::ServerContext*, const VDevice_get_callback_id_Request* request,
+        VDevice_get_callback_id_Reply* reply) override;
+    virtual grpc::Status VDevice_finish_callback_listener(grpc::ServerContext*, const VDevice_finish_callback_listener_Request* request,
+        VDevice_finish_callback_listener_Reply* reply) override;
 
     virtual grpc::Status InputVStreams_create(grpc::ServerContext *, const VStream_create_Request *request,
          VStreams_create_Reply *reply) override;
@@ -141,6 +149,9 @@ public:
     virtual grpc::Status ConfiguredNetworkGroup_get_default_stream_interface(grpc::ServerContext*,
         const ConfiguredNetworkGroup_get_default_stream_interface_Request *request,
         ConfiguredNetworkGroup_get_default_stream_interface_Reply *reply) override;
+    virtual grpc::Status ConfiguredNetworkGroup_shutdown(grpc::ServerContext*,
+        const ConfiguredNetworkGroup_shutdown_Request *request,
+        ConfiguredNetworkGroup_shutdown_Reply *reply) override;
     virtual grpc::Status ConfiguredNetworkGroup_get_output_vstream_groups(grpc::ServerContext*,
         const ConfiguredNetworkGroup_get_output_vstream_groups_Request *request,
         ConfiguredNetworkGroup_get_output_vstream_groups_Reply *reply) override;
@@ -177,22 +188,39 @@ public:
     virtual grpc::Status ConfiguredNetworkGroup_get_sorted_output_names(grpc::ServerContext*,
         const ConfiguredNetworkGroup_get_sorted_output_names_Request *request,
         ConfiguredNetworkGroup_get_sorted_output_names_Reply *reply) override;
+    virtual grpc::Status ConfiguredNetworkGroup_get_min_buffer_pool_size(grpc::ServerContext*,
+        const ConfiguredNetworkGroup_get_min_buffer_pool_size_Request *request,
+        ConfiguredNetworkGroup_get_min_buffer_pool_size_Reply *reply) override;
+    virtual grpc::Status ConfiguredNetworkGroup_get_layer_info(grpc::ServerContext*,
+        const ConfiguredNetworkGroup_get_layer_info_Request *request,
+        ConfiguredNetworkGroup_get_layer_info_Reply *reply) override;
+    virtual grpc::Status ConfiguredNetworkGroup_get_ops_metadata(grpc::ServerContext*,
+        const ConfiguredNetworkGroup_get_ops_metadata_Request *request,
+        ConfiguredNetworkGroup_get_ops_metadata_Reply *reply) override;
+    virtual grpc::Status ConfiguredNetworkGroup_set_nms_score_threshold(grpc::ServerContext*,
+        const ConfiguredNetworkGroup_set_nms_score_threshold_Request *request,
+        ConfiguredNetworkGroup_set_nms_score_threshold_Reply *reply) override;
+    virtual grpc::Status ConfiguredNetworkGroup_set_nms_iou_threshold(grpc::ServerContext*,
+        const ConfiguredNetworkGroup_set_nms_iou_threshold_Request *request,
+        ConfiguredNetworkGroup_set_nms_iou_threshold_Reply *reply) override;
+    virtual grpc::Status ConfiguredNetworkGroup_set_nms_max_bboxes_per_class(grpc::ServerContext*,
+        const ConfiguredNetworkGroup_set_nms_max_bboxes_per_class_Request *request,
+        ConfiguredNetworkGroup_set_nms_max_bboxes_per_class_Reply *reply) override;
     virtual grpc::Status ConfiguredNetworkGroup_get_stream_names_from_vstream_name(grpc::ServerContext*,
         const ConfiguredNetworkGroup_get_stream_names_from_vstream_name_Request *request,
         ConfiguredNetworkGroup_get_stream_names_from_vstream_name_Reply *reply) override;
     virtual grpc::Status ConfiguredNetworkGroup_get_vstream_names_from_stream_name(grpc::ServerContext*,
         const ConfiguredNetworkGroup_get_vstream_names_from_stream_name_Request *request,
         ConfiguredNetworkGroup_get_vstream_names_from_stream_name_Reply *reply) override;
+    virtual grpc::Status ConfiguredNetworkGroup_infer_async(grpc::ServerContext*,
+        const ConfiguredNetworkGroup_infer_async_Request *request,
+        ConfiguredNetworkGroup_infer_async_Reply *reply) override;
 
 private:
     void keep_alive();
     hailo_status flush_input_vstream(uint32_t handle);
     hailo_status abort_input_vstream(uint32_t handle);
     hailo_status abort_output_vstream(uint32_t handle);
-    hailo_status resume_input_vstream(uint32_t handle);
-    hailo_status resume_output_vstream(uint32_t handle);
-    bool is_input_vstream_aborted(uint32_t handle);
-    bool is_output_vstream_aborted(uint32_t handle);
     void abort_vstreams_by_pids(std::set<uint32_t> &pids);
     void remove_disconnected_clients();
     void update_client_id_timestamp(uint32_t pid);
@@ -200,6 +228,8 @@ private:
     std::mutex m_mutex;
     std::map<uint32_t, std::chrono::time_point<std::chrono::high_resolution_clock>> m_clients_pids;
     std::unique_ptr<std::thread> m_keep_alive;
+
+    std::mutex m_vdevice_creation_mutex;
 };
 
 }
diff --git a/hailort/hailort_service/service_resource_manager.hpp b/hailort/hailort_service/service_resource_manager.hpp
index bec2248..320b722 100644
--- a/hailort/hailort_service/service_resource_manager.hpp
+++ b/hailort/hailort_service/service_resource_manager.hpp
@@ -49,13 +49,29 @@ public:
     {
         std::unique_lock<std::mutex> lock(m_mutex);
         auto resource_expected = resource_lookup(handle);
-        assert(resource_expected);
+        CHECK_EXPECTED(resource_expected);
         auto resource = resource_expected.release();
 
         assert(contains(m_resources_mutexes, handle));
         std::shared_lock<std::shared_timed_mutex> resource_lock(m_resources_mutexes[handle]);
         lock.unlock();
-        K ret = lambda(resource->resource, args...);
+        auto ret = lambda(resource->resource, args...);
+
+        return ret;
+    }
+
+    template<class Func, typename... Args>
+    hailo_status execute(uint32_t handle, Func &lambda, Args... args)
+    {
+        std::unique_lock<std::mutex> lock(m_mutex);
+        auto resource_expected = resource_lookup(handle);
+        CHECK_EXPECTED_AS_STATUS(resource_expected);
+        auto resource = resource_expected.release();
+
+        assert(contains(m_resources_mutexes, handle));
+        std::shared_lock<std::shared_timed_mutex> resource_lock(m_resources_mutexes[handle]);
+        lock.unlock();
+        auto ret = lambda(resource->resource, args...);
 
         return ret;
     }
@@ -71,18 +87,18 @@ public:
         return index;
     }
 
-    uint32_t dup_handle(uint32_t handle, uint32_t pid)
+    Expected<uint32_t> dup_handle(uint32_t handle, uint32_t pid)
     {
         std::unique_lock<std::mutex> lock(m_mutex);
         auto resource_expected = resource_lookup(handle);
-        assert(resource_expected);
+        CHECK_EXPECTED(resource_expected);
         auto resource = resource_expected.release();
 
         assert(contains(m_resources_mutexes, handle));
         std::unique_lock<std::shared_timed_mutex> resource_lock(m_resources_mutexes[handle]);
         resource->pids.insert(pid);
 
-        return handle;
+        return Expected<uint32_t>(handle);
     }
 
     std::shared_ptr<T> release_resource(uint32_t handle, uint32_t pid)
diff --git a/hailort/hailort_service/vdevice_callbacks_queue.hpp b/hailort/hailort_service/vdevice_callbacks_queue.hpp
new file mode 100644
index 0000000..5eaab7f
--- /dev/null
+++ b/hailort/hailort_service/vdevice_callbacks_queue.hpp
@@ -0,0 +1,88 @@
+/**
+ * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+**/
+/**
+ * @file vdevice_callbacks_queue.hpp
+ * @brief Queue used for the callbacks in infer async over service.
+ * enqueue callback id means the transfer is done.
+ * dequeue a callback id means the client is signaled to call the callback on his side.
+ **/
+
+#ifndef _HAILO_VDEVICE_CALLBACKS_QUEUE_HPP_
+#define _HAILO_VDEVICE_CALLBACKS_QUEUE_HPP_
+
+#include "hailort_rpc_service.hpp"
+
+#include "hailo/hailort.h"
+#include "hailo/network_group.hpp"
+#include "hailo/hailort_common.hpp"
+#include "utils/thread_safe_queue.hpp"
+
+namespace hailort
+{
+
+#define MAX_QUEUE_SIZE (512) // Max inner reader-writer queue size
+
+class VDeviceCallbacksQueue final
+{
+public:
+    static Expected<std::unique_ptr<VDeviceCallbacksQueue>> create(uint32_t max_queue_size)
+    {
+        auto shutdown_event_exp = Event::create_shared(Event::State::not_signalled);
+        CHECK_EXPECTED(shutdown_event_exp);
+        auto shutdown_event = shutdown_event_exp.release();
+
+        auto cb_ids_queue = SpscQueue<ProtoCallbackIdentifier>::create(max_queue_size, shutdown_event, HAILO_INFINITE_TIMEOUT);
+        CHECK_EXPECTED(cb_ids_queue);
+
+        auto queue_ptr = make_unique_nothrow<VDeviceCallbacksQueue>(cb_ids_queue.release(), shutdown_event);
+        CHECK_AS_EXPECTED(nullptr != queue_ptr, HAILO_OUT_OF_HOST_MEMORY);
+
+        return queue_ptr;
+    }
+
+    VDeviceCallbacksQueue(SpscQueue<ProtoCallbackIdentifier> &&cb_ids_queue, EventPtr shutdown_event) :
+        m_callbacks_ids_queue(std::move(cb_ids_queue)), m_shutdown_event(shutdown_event)
+    {}
+
+    hailo_status enqueue(ProtoCallbackIdentifier &&callback_id)
+    {
+        std::unique_lock<std::mutex> lock(m_mutex);
+        auto status = m_callbacks_ids_queue.enqueue(std::move(callback_id));
+        CHECK_SUCCESS(status);
+
+        return HAILO_SUCCESS;
+    }
+
+    Expected<ProtoCallbackIdentifier> dequeue()
+    {
+        auto callback_id = m_callbacks_ids_queue.dequeue();
+        if (HAILO_SHUTDOWN_EVENT_SIGNALED == callback_id.status()) {
+            return make_unexpected(callback_id.status());
+        }
+        else if (HAILO_TIMEOUT == callback_id.status()) {
+            LOGGER__WARNING("Failed to dequeue callback_id because the queue is empty, status={}", HAILO_TIMEOUT);
+            return make_unexpected(callback_id.status());
+        }
+        CHECK_EXPECTED(callback_id);
+
+        return callback_id;
+    }
+
+    hailo_status shutdown()
+    {
+        return m_shutdown_event->signal();
+    }
+
+private:
+    std::mutex m_mutex;
+    uint32_t m_vdevice_handle;
+    // TODO: HRT-12346 - Use folly's MPMC? (for multiple devices)
+    SpscQueue<ProtoCallbackIdentifier> m_callbacks_ids_queue;
+    EventPtr m_shutdown_event;
+};
+
+} /* namespace hailort */
+
+#endif /* _HAILO_VDEVICE_CALLBACKS_QUEUE_HPP_ */
diff --git a/hailort/hailortcli/CMakeLists.txt b/hailort/hailortcli/CMakeLists.txt
index 0121401..4f05b5d 100644
--- a/hailort/hailortcli/CMakeLists.txt
+++ b/hailort/hailortcli/CMakeLists.txt
@@ -1,8 +1,9 @@
 cmake_minimum_required(VERSION 3.0.0)
 
 include(GNUInstallDirs)
-include(${HAILO_EXTERNALS_CMAKE_SCRIPTS}/spdlog.cmake)
+include(${HAILO_EXTERNALS_CMAKE_SCRIPTS}/cli11.cmake)
 include(${HAILO_EXTERNALS_CMAKE_SCRIPTS}/json.cmake)
+include(${HAILO_EXTERNALS_CMAKE_SCRIPTS}/spdlog.cmake)
 include(${HAILO_EXTERNALS_CMAKE_SCRIPTS}/readerwriterqueue.cmake)
 include(${HAILO_EXTERNALS_CMAKE_SCRIPTS}/dotwriter.cmake)
 
diff --git a/hailort/hailortcli/fw_control_command.cpp b/hailort/hailortcli/fw_control_command.cpp
index f249238..61dcc32 100644
--- a/hailort/hailortcli/fw_control_command.cpp
+++ b/hailort/hailortcli/fw_control_command.cpp
@@ -142,6 +142,8 @@ static std::string identity_arch_string(const hailo_device_identity_t &identity)
         return "HAILO15H";
     case HAILO_ARCH_PLUTO:
         return "PLUTO";
+    case HAILO_ARCH_HAILO15M:
+        return "HAILO15M";
     default:
         return "Unknown";
     }
@@ -222,7 +224,7 @@ hailo_status FwControlTestMemoriesCommand::execute_on_device(Device &device)
     auto status = device.test_chip_memories();
     CHECK_SUCCESS(status, "Failed memory test");
 
-    std::cout << "Memory test has completed succesfully" << std::endl;
+    std::cout << "Memory test has completed successfully" << std::endl;
     return HAILO_SUCCESS;
 }
 
diff --git a/hailort/hailortcli/run2/network_live_track.cpp b/hailort/hailortcli/run2/network_live_track.cpp
index bfbd4a2..0033816 100644
--- a/hailort/hailortcli/run2/network_live_track.cpp
+++ b/hailort/hailortcli/run2/network_live_track.cpp
@@ -17,11 +17,13 @@ size_t NetworkLiveTrack::max_ng_name = 0;
 std::mutex NetworkLiveTrack::mutex;
 
 NetworkLiveTrack::NetworkLiveTrack(const std::string &name, std::shared_ptr<ConfiguredNetworkGroup> cng,
-                                   LatencyMeterPtr overall_latency_meter, bool measure_fps, const std::string &hef_path) :
+    std::shared_ptr<ConfiguredInferModel> configured_infer_model, LatencyMeterPtr overall_latency_meter,
+    bool measure_fps, const std::string &hef_path) :
     m_name(name),
     m_count(0),
     m_last_get_time(),
     m_cng(cng),
+    m_configured_infer_model(configured_infer_model),
     m_overall_latency_meter(overall_latency_meter),
     m_measure_fps(measure_fps),
     m_hef_path(hef_path),
@@ -70,12 +72,22 @@ uint32_t NetworkLiveTrack::push_text_impl(std::stringstream &ss)
         ss << fmt::format("{}fps: {:.2f}", get_separator(), fps);
     }
 
-    auto hw_latency_measurement = m_cng->get_latency_measurement();
-    if (hw_latency_measurement) {
-        ss << fmt::format("{}hw latency: {:.2f} ms", get_separator(), InferResultsFormatUtils::latency_result_to_ms(hw_latency_measurement->avg_hw_latency));
+    if (m_cng) {
+        auto hw_latency_measurement = m_cng->get_latency_measurement();
+        if (hw_latency_measurement) {
+            ss << fmt::format("{}hw latency: {:.2f} ms", get_separator(), InferResultsFormatUtils::latency_result_to_ms(hw_latency_measurement->avg_hw_latency));
+        } else if (HAILO_NOT_AVAILABLE != hw_latency_measurement.status()) { // HAILO_NOT_AVAILABLE is a valid error, we ignore it
+            ss << fmt::format("{}hw latency: NaN (err)", get_separator());
+        }
     }
-    else if (HAILO_NOT_AVAILABLE != hw_latency_measurement.status()) { // HAILO_NOT_AVAILABLE is a valid error, we ignore it
-        ss << fmt::format("{}hw latency: NaN (err)", get_separator());
+    else {
+        auto hw_latency_measurement = m_configured_infer_model->get_hw_latency_measurement();
+        if (hw_latency_measurement) {
+            ss << fmt::format("{}hw latency: {:.2f} ms", get_separator(), InferResultsFormatUtils::latency_result_to_ms(hw_latency_measurement->avg_hw_latency));
+        }
+        else if (HAILO_NOT_AVAILABLE != hw_latency_measurement.status()) { // HAILO_NOT_AVAILABLE is a valid error, we ignore it
+            ss << fmt::format("{}hw latency: NaN (err)", get_separator());
+        }
     }
 
     if (m_overall_latency_meter) {
@@ -112,10 +124,19 @@ void NetworkLiveTrack::push_json_impl(nlohmann::ordered_json &json)
         network_group_json["FPS"] = std::to_string(fps);
     }
 
-    auto hw_latency_measurement = m_cng->get_latency_measurement();
-    if (hw_latency_measurement){
-        network_group_json["hw_latency"] = InferResultsFormatUtils::latency_result_to_ms(hw_latency_measurement->avg_hw_latency);
+    if (m_cng) {
+        auto hw_latency_measurement = m_cng->get_latency_measurement();
+        if (hw_latency_measurement){
+            network_group_json["hw_latency"] = InferResultsFormatUtils::latency_result_to_ms(hw_latency_measurement->avg_hw_latency);
+        }
     }
+    else {
+        auto hw_latency_measurement = m_configured_infer_model->get_hw_latency_measurement();
+        if (hw_latency_measurement){
+            network_group_json["hw_latency"] = InferResultsFormatUtils::latency_result_to_ms(hw_latency_measurement->avg_hw_latency);
+        }
+    }
+
 
     if (m_overall_latency_meter){
         auto overall_latency_measurement = m_overall_latency_meter->get_latency(false);
diff --git a/hailort/hailortcli/run2/network_live_track.hpp b/hailort/hailortcli/run2/network_live_track.hpp
index 6951690..8b3c9bf 100644
--- a/hailort/hailortcli/run2/network_live_track.hpp
+++ b/hailort/hailortcli/run2/network_live_track.hpp
@@ -11,6 +11,7 @@
 #define _HAILO_HAILORTCLI_RUN2_NETWORK_LIVE_TRACK_HPP_
 
 #include "hailo/hailort.h"
+#include "hailo/infer_model.hpp"
 #include "hailo/network_group.hpp"
 
 #include "common/latency_meter.hpp"
@@ -24,7 +25,8 @@ class NetworkLiveTrack : public LiveStats::Track
 {
 public:
     NetworkLiveTrack(const std::string &name, std::shared_ptr<hailort::ConfiguredNetworkGroup> cng,
-                     hailort::LatencyMeterPtr overall_latency_meter, bool measure_fps, const std::string &hef_path);
+        std::shared_ptr<hailort::ConfiguredInferModel> configured_infer_model,
+        hailort::LatencyMeterPtr overall_latency_meter, bool measure_fps, const std::string &hef_path);
     virtual ~NetworkLiveTrack() = default;
     virtual hailo_status start_impl() override;
     virtual uint32_t push_text_impl(std::stringstream &ss) override;
@@ -44,6 +46,7 @@ private:
     std::atomic<uint32_t> m_count;
     std::chrono::time_point<std::chrono::steady_clock> m_last_get_time;
     std::shared_ptr<hailort::ConfiguredNetworkGroup> m_cng;
+    std::shared_ptr<hailort::ConfiguredInferModel> m_configured_infer_model;
     hailort::LatencyMeterPtr m_overall_latency_meter;
     const bool m_measure_fps;
     const std::string &m_hef_path;
diff --git a/hailort/hailortcli/run2/network_runner.cpp b/hailort/hailortcli/run2/network_runner.cpp
index 96d2913..15a563a 100644
--- a/hailort/hailortcli/run2/network_runner.cpp
+++ b/hailort/hailortcli/run2/network_runner.cpp
@@ -87,9 +87,9 @@ StreamParams::StreamParams() : IoParams(), flags(HAILO_STREAM_FLAGS_NONE)
 }
 
 NetworkParams::NetworkParams() : hef_path(), net_group_name(), vstream_params(), stream_params(),
-    scheduling_algorithm(HAILO_SCHEDULING_ALGORITHM_ROUND_ROBIN), batch_size(HAILO_DEFAULT_BATCH_SIZE),
-    scheduler_threshold(0), scheduler_timeout_ms(0), framerate(UNLIMITED_FRAMERATE), measure_hw_latency(false),
-    measure_overall_latency(false)
+    scheduling_algorithm(HAILO_SCHEDULING_ALGORITHM_ROUND_ROBIN), multi_process_service(false),
+    batch_size(HAILO_DEFAULT_BATCH_SIZE), scheduler_threshold(0), scheduler_timeout_ms(0),
+    framerate(UNLIMITED_FRAMERATE), measure_hw_latency(false),measure_overall_latency(false)
 {
 }
 
@@ -99,112 +99,216 @@ NetworkRunner::NetworkRunner(const NetworkParams &params, const std::string &nam
     m_params(params),
     m_name(name),
     m_cng(cng),
+    m_infer_model(nullptr),
+    m_configured_infer_model(nullptr),
     m_overall_latency_meter(nullptr),
     m_latency_barrier(nullptr),
     m_last_measured_fps(0)
 {
 }
 
-Expected<std::shared_ptr<NetworkRunner>> NetworkRunner::create_shared(VDevice &vdevice, const NetworkParams &params)
+NetworkRunner::NetworkRunner(const NetworkParams &params, const std::string &name, VDevice &vdevice,
+    std::shared_ptr<InferModel> infer_model, std::shared_ptr<ConfiguredInferModel> configured_infer_model) :
+    m_vdevice(vdevice),
+    m_params(params),
+    m_name(name),
+    m_cng(nullptr),
+    m_infer_model(infer_model),
+    m_configured_infer_model(configured_infer_model),
+    m_overall_latency_meter(nullptr),
+    m_latency_barrier(nullptr)
 {
-    // The network params passed to the NetworkRunner may be changed by this function, hence we copy them.
-    auto final_net_params = params;
-
-    auto hef = Hef::create(final_net_params.hef_path);
-    CHECK_EXPECTED(hef);
+}
 
+Expected<std::string> NetworkRunner::get_network_group_name(const NetworkParams &params, const Hef &hef)
+{
     // Get NG's name if single
-    auto net_group_name = final_net_params.net_group_name;
+    auto net_group_name = params.net_group_name;
+
+    // if net_group_name is an empty string - take the name from hef
     if (net_group_name.empty()) {
-        auto net_groups_names = hef->get_network_groups_names();
-        CHECK_AS_EXPECTED(net_groups_names.size() == 1, HAILO_INVALID_ARGUMENT, "HEF {} doesn't contain a single NetworkGroup. Pass --name", final_net_params.hef_path);
+        auto net_groups_names = hef.get_network_groups_names();
+        CHECK_AS_EXPECTED(net_groups_names.size() == 1, HAILO_INVALID_ARGUMENT, "HEF {} doesn't contain a single NetworkGroup. Pass --name", params.hef_path);
         net_group_name = net_groups_names[0];
     }
 
-    auto cfg_params = vdevice.create_configure_params(hef.value(), net_group_name);
-    CHECK_EXPECTED(cfg_params);
-    cfg_params->batch_size = final_net_params.batch_size;
-    if (final_net_params.batch_size == HAILO_DEFAULT_BATCH_SIZE) {
-        // Changing batch_size to 1. If HAILO_DEFAULT_BATCH_SIZE is configured, the sched will send one frame per batch
-        final_net_params.batch_size = 1;
-    }
-    if (final_net_params.measure_hw_latency) {
-        cfg_params->latency |= HAILO_LATENCY_MEASURE;
-    }
-    if (final_net_params.is_async()) {
-        for (auto &stream_name_params_pair : cfg_params->stream_params_by_name) {
-            stream_name_params_pair.second.flags = HAILO_STREAM_FLAGS_ASYNC;
+    return net_group_name;
+}
+
+Expected<std::shared_ptr<FullAsyncNetworkRunner>> FullAsyncNetworkRunner::create_shared(VDevice &vdevice,
+    NetworkParams params)
+{
+        auto infer_model = vdevice.create_infer_model(params.hef_path);
+        CHECK_EXPECTED(infer_model);
+        auto infer_model_ptr = infer_model.release();
+
+        auto expected_net_group_name = get_network_group_name(params, infer_model_ptr->hef());
+        CHECK_EXPECTED(expected_net_group_name);
+
+        /* Configure Params */
+        infer_model_ptr->set_batch_size(params.batch_size);
+        if (params.batch_size == HAILO_DEFAULT_BATCH_SIZE) {
+            // Changing batch_size to 1 (after configuring the vdevice) - as we iterate over 'params.batch_size' in latency measurements scenarios
+            params.batch_size = 1;
+        }
+        if (params.measure_hw_latency) {
+            infer_model_ptr->set_hw_latency_measurement_flags(HAILO_LATENCY_MEASURE);
         }
-    } 
-    auto cfgr_net_groups = vdevice.configure(hef.value(), {{net_group_name, cfg_params.value()}});
-    CHECK_EXPECTED(cfgr_net_groups);
-    assert(1 == cfgr_net_groups->size());
-    auto cfgr_net_group = cfgr_net_groups.value()[0];
 
-    if (HAILO_SCHEDULING_ALGORITHM_NONE!= final_net_params.scheduling_algorithm) {
-        CHECK_SUCCESS_AS_EXPECTED(cfgr_net_group->set_scheduler_threshold(final_net_params.scheduler_threshold));
-        CHECK_SUCCESS_AS_EXPECTED(cfgr_net_group->set_scheduler_timeout(std::chrono::milliseconds(final_net_params.scheduler_timeout_ms)));
-        CHECK_SUCCESS_AS_EXPECTED(cfgr_net_group->set_scheduler_priority(final_net_params.scheduler_priority));
-    }
+        /* Pipeline Params */
+        for (const auto &input_name : infer_model_ptr->get_input_names()) {
+            auto input_params_it = std::find_if(params.vstream_params.begin(), params.vstream_params.end(),
+                [&input_name](const VStreamParams &params) -> bool {
+                    return params.name == input_name;
+                });
+            auto input_params = (input_params_it == params.vstream_params.end()) ? VStreamParams() : *input_params_it;
+
+            auto input_config = infer_model_ptr->input(input_name);
+            CHECK_EXPECTED(input_config);
+            input_config->set_format_order(input_params.params.user_buffer_format.order);
+            input_config->set_format_type(input_params.params.user_buffer_format.type);
+        }
+        for (const auto &output_name : infer_model_ptr->get_output_names()) {
+            auto output_params_it = std::find_if(params.vstream_params.begin(), params.vstream_params.end(),
+                [&output_name](const VStreamParams &params) -> bool {
+                    return params.name == output_name;
+                });
+            auto output_params = (output_params_it == params.vstream_params.end()) ? VStreamParams() : *output_params_it;
+
+            auto output_config = infer_model_ptr->output(output_name);
+            CHECK_EXPECTED(output_config);
+            output_config->set_format_order(output_params.params.user_buffer_format.order);
+            output_config->set_format_type(output_params.params.user_buffer_format.type);
+        }
 
-    std::shared_ptr<NetworkRunner> net_runner_ptr = nullptr;
-    switch (final_net_params.mode)
-    {
-    case InferenceMode::FULL:
-    {
-        std::map<std::string, hailo_vstream_params_t> vstreams_params;
-        for (auto &vstream_params : final_net_params.vstream_params) {
-            vstreams_params.emplace(vstream_params.name, vstream_params.params);
+        auto configured_model = infer_model_ptr->configure();
+        CHECK_EXPECTED(configured_model);
+        auto configured_infer_model_ptr = make_shared_nothrow<ConfiguredInferModel>(configured_model.release());
+        CHECK_NOT_NULL_AS_EXPECTED(configured_infer_model_ptr, HAILO_OUT_OF_HOST_MEMORY);
+
+        auto res = make_shared_nothrow<FullAsyncNetworkRunner>(params, expected_net_group_name.value(), vdevice,
+            infer_model_ptr, configured_infer_model_ptr);
+        CHECK_NOT_NULL_AS_EXPECTED(res, HAILO_OUT_OF_HOST_MEMORY);
+
+        if (params.measure_overall_latency || params.measure_hw_latency) {
+            CHECK_AS_EXPECTED((1 == res->get_input_names().size()), HAILO_INVALID_OPERATION,
+                "Latency measurement over multiple inputs network is not supported");
+
+            if (params.measure_overall_latency) {
+                auto overall_latency_meter = make_shared_nothrow<LatencyMeter>(std::set<std::string>{ "INFERENCE" }, // Since we check 'infer()' with single callback, we only address 1 output
+                    OVERALL_LATENCY_TIMESTAMPS_LIST_LENGTH);
+                CHECK_NOT_NULL_AS_EXPECTED(overall_latency_meter, HAILO_OUT_OF_HOST_MEMORY);
+                res->set_overall_latency_meter(overall_latency_meter);
+            }
+
+            // We use a barrier for both hw and overall latency
+            auto latency_barrier = make_shared_nothrow<Barrier>(1); // Only 1 frame at a time
+            CHECK_NOT_NULL_AS_EXPECTED(latency_barrier, HAILO_OUT_OF_HOST_MEMORY);
+            res->set_latency_barrier(latency_barrier);
         }
-        auto vstreams = create_vstreams(*cfgr_net_group, vstreams_params);
-        CHECK_EXPECTED(vstreams);
+    return res;
+}
 
-        auto net_runner = make_shared_nothrow<FullNetworkRunner>(final_net_params, net_group_name, vdevice,
-            std::move(vstreams->first), std::move(vstreams->second), cfgr_net_group);
-        CHECK_NOT_NULL_AS_EXPECTED(net_runner, HAILO_OUT_OF_HOST_MEMORY);
-        net_runner_ptr = std::static_pointer_cast<NetworkRunner>(net_runner);
-        break;
-    }
+Expected<std::shared_ptr<NetworkRunner>> NetworkRunner::create_shared(VDevice &vdevice, const NetworkParams &params)
+{
+    // The network params passed to the NetworkRunner may be changed by this function, hence we copy them.
+    auto final_net_params = params;
 
-    case InferenceMode::RAW:            // Fallthrough
-    case InferenceMode::RAW_ASYNC:      // Fallthrough
-    case InferenceMode::RAW_ASYNC_SINGLE_THREAD:
-    {
-        auto input_streams = cfgr_net_group->get_input_streams();
-        CHECK_AS_EXPECTED(input_streams.size() > 0, HAILO_INTERNAL_FAILURE);
+    std::shared_ptr<NetworkRunner> net_runner_ptr = nullptr;
+    if (InferenceMode::FULL_ASYNC == final_net_params.mode) {
+        auto runner_exp = FullAsyncNetworkRunner::create_shared(vdevice, final_net_params);
+        CHECK_EXPECTED(runner_exp);
+        net_runner_ptr = runner_exp.release();
+    } else {
+        auto hef = Hef::create(final_net_params.hef_path);
+        CHECK_EXPECTED(hef);
+
+        auto expected_net_group_name = get_network_group_name(final_net_params, hef.value());
+        CHECK_EXPECTED(expected_net_group_name);
+
+        auto cfg_params = vdevice.create_configure_params(hef.value(), expected_net_group_name.value());
+        CHECK_EXPECTED(cfg_params);
+        cfg_params->batch_size = final_net_params.batch_size;
+        if (final_net_params.batch_size == HAILO_DEFAULT_BATCH_SIZE) {
+            // Changing batch_size to 1 (after configuring the vdevice) - as we iterate over 'final_net_params.batch_size' in latency measurements scenarios
+            final_net_params.batch_size = 1;
+        }
+        if (final_net_params.measure_hw_latency) {
+            cfg_params->latency |= HAILO_LATENCY_MEASURE;
+        }
+        if (final_net_params.is_async()) {
+            for (auto &stream_name_params_pair : cfg_params->stream_params_by_name) {
+                stream_name_params_pair.second.flags = HAILO_STREAM_FLAGS_ASYNC;
+            }
+        }
+        auto cfgr_net_groups = vdevice.configure(hef.value(), {{expected_net_group_name.value(), cfg_params.value()}});
+        CHECK_EXPECTED(cfgr_net_groups);
+        assert(1 == cfgr_net_groups->size());
+        auto cfgr_net_group = cfgr_net_groups.value()[0];
+
+        if (HAILO_SCHEDULING_ALGORITHM_NONE != final_net_params.scheduling_algorithm) {
+            CHECK_SUCCESS_AS_EXPECTED(cfgr_net_group->set_scheduler_threshold(final_net_params.scheduler_threshold));
+            CHECK_SUCCESS_AS_EXPECTED(cfgr_net_group->set_scheduler_timeout(std::chrono::milliseconds(final_net_params.scheduler_timeout_ms)));
+            CHECK_SUCCESS_AS_EXPECTED(cfgr_net_group->set_scheduler_priority(final_net_params.scheduler_priority));
+        }
 
-        auto output_streams = cfgr_net_group->get_output_streams();
-        CHECK_AS_EXPECTED(output_streams.size() > 0, HAILO_INTERNAL_FAILURE);
+        switch (final_net_params.mode)
+        {
+        case InferenceMode::FULL:
+        {
+            std::map<std::string, hailo_vstream_params_t> vstreams_params;
+            for (auto &vstream_params : final_net_params.vstream_params) {
+                vstreams_params.emplace(vstream_params.name, vstream_params.params);
+            }
+            auto vstreams = create_vstreams(*cfgr_net_group, vstreams_params);
+            CHECK_EXPECTED(vstreams);
+
+            auto net_runner = make_shared_nothrow<FullNetworkRunner>(final_net_params, expected_net_group_name.value(), vdevice,
+                std::move(vstreams->first), std::move(vstreams->second), cfgr_net_group);
+            CHECK_NOT_NULL_AS_EXPECTED(net_runner, HAILO_OUT_OF_HOST_MEMORY);
+            net_runner_ptr = std::static_pointer_cast<NetworkRunner>(net_runner);
+            break;
+        }
+        case InferenceMode::RAW:            // Fallthrough
+        case InferenceMode::RAW_ASYNC:      // Fallthrough
+        case InferenceMode::RAW_ASYNC_SINGLE_THREAD:
+        {
+            auto input_streams = cfgr_net_group->get_input_streams();
+            CHECK_AS_EXPECTED(input_streams.size() > 0, HAILO_INTERNAL_FAILURE);
+
+            auto output_streams = cfgr_net_group->get_output_streams();
+            CHECK_AS_EXPECTED(output_streams.size() > 0, HAILO_INTERNAL_FAILURE);
+
+            auto net_runner = make_shared_nothrow<RawNetworkRunner>(final_net_params, expected_net_group_name.value(), vdevice,
+                std::move(input_streams), std::move(output_streams), cfgr_net_group);
+            CHECK_NOT_NULL_AS_EXPECTED(net_runner, HAILO_OUT_OF_HOST_MEMORY);
+            net_runner_ptr = std::static_pointer_cast<NetworkRunner>(net_runner);
+            break;
+        }
 
-        auto net_runner = make_shared_nothrow<RawNetworkRunner>(final_net_params, net_group_name, vdevice,
-            std::move(input_streams), std::move(output_streams), cfgr_net_group);
-        CHECK_NOT_NULL_AS_EXPECTED(net_runner, HAILO_OUT_OF_HOST_MEMORY);
-        net_runner_ptr = std::static_pointer_cast<NetworkRunner>(net_runner);
-        break;
-    }
+        default:
+            // Shouldn't get here
+            return make_unexpected(HAILO_INTERNAL_FAILURE);
+        }
 
-    default:
-        // Shouldn't get here
-        return make_unexpected(HAILO_INTERNAL_FAILURE);
-    }
+        if (final_net_params.measure_overall_latency || final_net_params.measure_hw_latency) {
+            auto input_names = net_runner_ptr->get_input_names();
+            auto output_names = net_runner_ptr->get_output_names();
 
-    if (final_net_params.measure_overall_latency || final_net_params.measure_hw_latency) {
-        auto input_names = net_runner_ptr->get_input_names();
-        auto output_names = net_runner_ptr->get_output_names();
+            CHECK_AS_EXPECTED((1 == input_names.size()), HAILO_INVALID_OPERATION,
+                "Latency measurement over multiple inputs network is not supported");
 
-        CHECK_AS_EXPECTED((1 == input_names.size()), HAILO_INVALID_OPERATION,
-            "Latency measurement over multiple inputs network is not supported");
+            if (final_net_params.measure_overall_latency) {
+                auto overall_latency_meter = make_shared_nothrow<LatencyMeter>(output_names, OVERALL_LATENCY_TIMESTAMPS_LIST_LENGTH);
+                CHECK_NOT_NULL_AS_EXPECTED(overall_latency_meter, HAILO_OUT_OF_HOST_MEMORY);
+                net_runner_ptr->set_overall_latency_meter(overall_latency_meter);
+            }
 
-        if (final_net_params.measure_overall_latency) {
-            auto overall_latency_meter = make_shared_nothrow<LatencyMeter>(output_names, OVERALL_LATENCY_TIMESTAMPS_LIST_LENGTH);
-            CHECK_NOT_NULL_AS_EXPECTED(overall_latency_meter, HAILO_OUT_OF_HOST_MEMORY);
-            net_runner_ptr->set_overall_latency_meter(overall_latency_meter);
+            // We use a barrier for both hw and overall latency
+            auto latency_barrier = make_shared_nothrow<Barrier>(input_names.size() + output_names.size());
+            CHECK_NOT_NULL_AS_EXPECTED(latency_barrier, HAILO_OUT_OF_HOST_MEMORY);
+            net_runner_ptr->set_latency_barrier(latency_barrier);
         }
-
-        // We use a barrier for both hw and overall latency
-        auto latency_barrier = make_shared_nothrow<Barrier>(input_names.size() + output_names.size());
-        CHECK_NOT_NULL_AS_EXPECTED(latency_barrier, HAILO_OUT_OF_HOST_MEMORY);
-        net_runner_ptr->set_latency_barrier(latency_barrier);
     }
 
     return net_runner_ptr;
@@ -222,17 +326,19 @@ hailo_status NetworkRunner::run(EventPtr shutdown_event, LiveStats &live_stats,
 {
     auto ang = std::unique_ptr<ActivatedNetworkGroup>(nullptr);
     if (HAILO_SCHEDULING_ALGORITHM_NONE == m_params.scheduling_algorithm) {
-        auto ang_exp = m_cng->activate();
-        if (!ang_exp) {
-            activation_barrier.terminate();
+        if (m_cng) {
+            auto ang_exp = m_cng->activate();
+            if (!ang_exp) {
+                activation_barrier.terminate();
+            }
+            CHECK_EXPECTED_AS_STATUS(ang_exp);
+            ang = ang_exp.release();
         }
-        CHECK_EXPECTED_AS_STATUS(ang_exp);
-        ang = ang_exp.release();
     }
 
     // If we measure latency (hw or overall) we send frames one at a time. Hence we don't measure fps.
     const auto measure_fps = !m_params.measure_hw_latency && !m_params.measure_overall_latency;
-    auto net_live_track = std::make_shared<NetworkLiveTrack>(m_name, m_cng, m_overall_latency_meter, measure_fps, m_params.hef_path);
+    auto net_live_track = std::make_shared<NetworkLiveTrack>(m_name, m_cng, m_configured_infer_model, m_overall_latency_meter, measure_fps, m_params.hef_path);
     live_stats.add(net_live_track, 1); //support progress over multiple outputs
 
 #if defined(_MSC_VER)
@@ -241,7 +347,7 @@ hailo_status NetworkRunner::run(EventPtr shutdown_event, LiveStats &live_stats,
 
     activation_barrier.arrive_and_wait();
 
-    if (m_params.mode == InferenceMode::RAW_ASYNC_SINGLE_THREAD) {
+    if ((InferenceMode::RAW_ASYNC_SINGLE_THREAD == m_params.mode) || (InferenceMode::FULL_ASYNC == m_params.mode)) {
         return run_single_thread_async_infer(shutdown_event, net_live_track);
     } else {
         auto threads = start_inference_threads(shutdown_event, net_live_track);
@@ -278,17 +384,6 @@ double NetworkRunner::get_last_measured_fps()
     return m_last_measured_fps;
 }
 
-hailo_vstream_params_t update_quantize_flag_in_vstream_param(const hailo_vstream_info_t &vstream_info, const hailo_vstream_params_t &old_vstream_params)
-{
-    hailo_vstream_params_t res = old_vstream_params;
-    if ((HAILO_FORMAT_TYPE_FLOAT32 == old_vstream_params.user_buffer_format.type) || (HailoRTCommon::is_nms(vstream_info))) {
-        res.user_buffer_format.flags &= (~HAILO_FORMAT_FLAGS_QUANTIZED);
-    } else {
-        res.user_buffer_format.flags |= (HAILO_FORMAT_FLAGS_QUANTIZED);
-    }
-    return res;
-}
-
 Expected<std::pair<std::vector<InputVStream>, std::vector<OutputVStream>>> NetworkRunner::create_vstreams(
     ConfiguredNetworkGroup &net_group, const std::map<std::string, hailo_vstream_params_t> &params)
 {//TODO: support network name
@@ -298,14 +393,11 @@ Expected<std::pair<std::vector<InputVStream>, std::vector<OutputVStream>>> Netwo
     auto input_vstreams_info = net_group.get_input_vstream_infos();
     CHECK_EXPECTED(input_vstreams_info);
     for (auto &input_vstream_info : input_vstreams_info.value()) {
-        auto elem_it = params.find(input_vstream_info.name);
-        if (elem_it != params.end()) {
-            auto vstream_param = update_quantize_flag_in_vstream_param(input_vstream_info, elem_it->second);
-            input_vstreams_params.emplace(input_vstream_info.name, vstream_param);
+        if (params.end() != params.find(input_vstream_info.name)) {
             match_count++;
+            input_vstreams_params.emplace(input_vstream_info.name, params.at(input_vstream_info.name));
         } else {
-            auto vstream_param = update_quantize_flag_in_vstream_param(input_vstream_info, HailoRTDefaults::get_vstreams_params());
-            input_vstreams_params.emplace(input_vstream_info.name, vstream_param);
+            input_vstreams_params.emplace(input_vstream_info.name, HailoRTDefaults::get_vstreams_params());
         }
     }
 
@@ -313,15 +405,11 @@ Expected<std::pair<std::vector<InputVStream>, std::vector<OutputVStream>>> Netwo
     auto output_vstreams_info = net_group.get_output_vstream_infos();
     CHECK_EXPECTED(output_vstreams_info);
     for (auto &output_vstream_info : output_vstreams_info.value()) {
-        auto elem_it = params.find(output_vstream_info.name);
-        if (elem_it != params.end()) {
-            auto vstream_param = update_quantize_flag_in_vstream_param(output_vstream_info, elem_it->second);
-            output_vstreams_params.emplace(output_vstream_info.name, vstream_param);
+        if (params.end() != params.find(output_vstream_info.name)) {
             match_count++;
-        }
-        else {
-            auto vstream_param = update_quantize_flag_in_vstream_param(output_vstream_info, HailoRTDefaults::get_vstreams_params());
-            output_vstreams_params.emplace(output_vstream_info.name, vstream_param);
+            output_vstreams_params.emplace(output_vstream_info.name, params.at(output_vstream_info.name));
+        } else {
+            output_vstreams_params.emplace(output_vstream_info.name, HailoRTDefaults::get_vstreams_params());
         }
     }
 
@@ -383,12 +471,7 @@ Expected<std::vector<AsyncThreadPtr<hailo_status>>> FullNetworkRunner::start_inf
 
 void FullNetworkRunner::stop()
 {
-    for (auto &input_vstream : m_input_vstreams) {
-        (void) input_vstream.abort();
-    }
-    for (auto &output_vstream : m_output_vstreams) {
-        (void) output_vstream.abort();
-    }
+    (void) m_cng->shutdown();
 }
 
 std::set<std::string> FullNetworkRunner::get_input_names()
@@ -423,6 +506,158 @@ VStreamParams FullNetworkRunner::get_params(const std::string &name)
     return VStreamParams();
 }
 
+
+FullAsyncNetworkRunner::FullAsyncNetworkRunner(const NetworkParams &params, const std::string &name, VDevice &vdevice,
+    std::shared_ptr<InferModel> infer_model, std::shared_ptr<ConfiguredInferModel> configured_infer_model) :
+    NetworkRunner(params, name, vdevice, infer_model, configured_infer_model)
+{
+}
+
+void FullAsyncNetworkRunner::stop()
+{}
+
+std::set<std::string> FullAsyncNetworkRunner::get_input_names()
+{
+    std::set<std::string> results;
+    for (const auto &name : m_infer_model->get_input_names()) {
+        results.insert(name);
+    }
+    return results;
+}
+
+std::set<std::string> FullAsyncNetworkRunner::get_output_names()
+{
+    std::set<std::string> results;
+    for (const auto &name : m_infer_model->get_output_names()) {
+        results.insert(name);
+    }
+    return results;
+}
+
+VStreamParams FullAsyncNetworkRunner::get_params(const std::string &name)
+{
+    for (const auto &params : m_params.vstream_params) {
+        if (name == params.name) {
+            return params;
+        }
+    }
+    return VStreamParams();
+}
+
+Expected<AsyncInferJob> FullAsyncNetworkRunner::create_infer_job(const ConfiguredInferModel::Bindings &bindings,
+    std::weak_ptr<NetworkLiveTrack> net_live_track_weak, FramerateThrottle &frame_rate_throttle, hailo_status &inference_status)
+{
+    frame_rate_throttle.throttle();
+    if (m_overall_latency_meter) {
+        m_overall_latency_meter->add_start_sample(std::chrono::steady_clock::now().time_since_epoch());
+    }
+    auto job = m_configured_infer_model->run_async(bindings, [=, &inference_status] (const AsyncInferCompletionInfo &completion_info) {
+        if (HAILO_SUCCESS != completion_info.status) {
+            inference_status = completion_info.status;
+            LOGGER__ERROR("Failed in infer async request");
+            return;
+        }
+        if (m_overall_latency_meter) {
+            m_overall_latency_meter->add_end_sample("INFERENCE", std::chrono::steady_clock::now().time_since_epoch());
+        }
+        if (auto net_live_track = net_live_track_weak.lock()) {
+            /* Using weak_ptr as net_live_track holds a reference to m_configured_infer_model (for stuff like latency measurement),
+                so there's a circular dependency */
+            net_live_track->progress();
+        }
+    });
+    CHECK_EXPECTED(job);
+    return job.release();
+}
+
+hailo_status FullAsyncNetworkRunner::run_single_thread_async_infer(EventPtr shutdown_event,
+    std::shared_ptr<NetworkLiveTrack> net_live_track)
+{
+    auto signal_event_scope_guard = SignalEventScopeGuard(*shutdown_event);
+
+    std::map<std::string, Buffer> inputs_buffer_pool;
+    const uint8_t const_byte = 0xAB;
+    for (const auto &input_name : get_input_names()) {
+        inputs_buffer_pool[input_name] = {};
+        auto input_config = m_infer_model->input(input_name);
+        CHECK_EXPECTED_AS_STATUS(input_config);
+
+        auto params = get_params(input_name);
+        if (params.input_file_path.empty()) {
+            auto constant_buffer = Buffer::create(input_config->get_frame_size(), const_byte, BufferStorageParams::create_dma());
+            CHECK_EXPECTED_AS_STATUS(constant_buffer);
+            inputs_buffer_pool[input_name] = constant_buffer.release();
+        } else {
+            auto buffer = read_binary_file(params.input_file_path, BufferStorageParams::create_dma());
+            CHECK_EXPECTED_AS_STATUS(buffer);
+            inputs_buffer_pool[input_name] = buffer.release();
+        }
+    }
+
+    std::map<std::string, Buffer> outputs_buffer_pool;
+    for (const auto &output_name : get_output_names()) {
+        outputs_buffer_pool[output_name] = {};
+        auto output_config = m_infer_model->output(output_name);
+        CHECK_EXPECTED_AS_STATUS(output_config);
+
+        auto constant_buffer = Buffer::create(output_config->get_frame_size(), 0, BufferStorageParams::create_dma());
+        CHECK_EXPECTED_AS_STATUS(constant_buffer);
+        outputs_buffer_pool[output_name] = constant_buffer.release();
+    }
+
+    std::unique_ptr<ConfiguredInferModelActivationGuard> guard = nullptr;
+    if (HAILO_SCHEDULING_ALGORITHM_NONE != m_params.scheduling_algorithm) {
+        auto status = m_configured_infer_model->set_scheduler_threshold(m_params.scheduler_threshold);
+        CHECK_SUCCESS(status);
+
+        status = m_configured_infer_model->set_scheduler_timeout(std::chrono::milliseconds(m_params.scheduler_timeout_ms));
+        CHECK_SUCCESS(status);
+
+        status = m_configured_infer_model->set_scheduler_priority(m_params.scheduler_priority);
+        CHECK_SUCCESS(status);
+    } else {
+        auto guard_exp = ConfiguredInferModelActivationGuard::create(m_configured_infer_model);
+        CHECK_EXPECTED_AS_STATUS(guard_exp);
+        guard = guard_exp.release();
+    }
+
+    auto bindings = m_configured_infer_model->create_bindings();
+    CHECK_EXPECTED_AS_STATUS(bindings);
+
+    for (auto &pair : inputs_buffer_pool) {
+        auto &name = pair.first;
+        auto &buffer = pair.second;
+        bindings->input(name)->set_buffer(hailort::MemoryView(buffer));
+    }
+    for (auto &pair : outputs_buffer_pool) {
+        auto &name = pair.first;
+        auto &buffer = pair.second;
+        bindings->output(name)->set_buffer(hailort::MemoryView(buffer));
+    }
+
+    FramerateThrottle frame_rate_throttle(m_params.framerate);
+
+    AsyncInferJob last_job;
+    auto inference_status = HAILO_SUCCESS;
+    while (HAILO_TIMEOUT == shutdown_event->wait(std::chrono::milliseconds(0)) && (HAILO_SUCCESS == inference_status)) {
+        for (uint32_t frames_in_cycle = 0; frames_in_cycle < m_params.batch_size; frames_in_cycle++) {
+            if (HAILO_SUCCESS == m_configured_infer_model->wait_for_async_ready(HAILO_INFINITE_TIMEOUT)) {
+                auto job_exp = create_infer_job(*bindings, net_live_track, frame_rate_throttle, inference_status);
+                CHECK_EXPECTED_AS_STATUS(job_exp);
+                last_job = job_exp.release();
+                last_job.detach();
+            }
+        }
+        if (m_latency_barrier) {
+            // When measuring latency we want to send 'batch' frames at a time
+            last_job.wait(HAILO_INFINITE_TIMEOUT);
+        }
+    }
+    last_job.wait(HAILO_INFINITE_TIMEOUT);
+
+    return inference_status;
+}
+
 RawNetworkRunner::RawNetworkRunner(const NetworkParams &params, const std::string &name, VDevice &vdevice,
                                    InputStreamRefVector &&input_streams, OutputStreamRefVector &&output_streams,
                                    std::shared_ptr<ConfiguredNetworkGroup> cng) :
@@ -570,12 +805,7 @@ hailo_status RawNetworkRunner::run_single_thread_async_infer(EventPtr shutdown_e
 
 void RawNetworkRunner::stop()
 {
-    for (auto &input_stream : m_input_streams) {
-        (void) input_stream.get().abort();
-    }
-    for (auto &output_stream : m_output_streams) {
-        (void) output_stream.get().abort();
-    }
+    m_cng->shutdown();
 }
 
 std::set<std::string> RawNetworkRunner::get_input_names()
diff --git a/hailort/hailortcli/run2/network_runner.hpp b/hailort/hailortcli/run2/network_runner.hpp
index ffe3385..d0d0376 100644
--- a/hailort/hailortcli/run2/network_runner.hpp
+++ b/hailort/hailortcli/run2/network_runner.hpp
@@ -24,6 +24,7 @@
 #include "hailo/vstream.hpp"
 #include "hailo/event.hpp"
 #include "hailo/network_group.hpp"
+#include "hailo/infer_model.hpp"
 #include "hailo/expected.hpp"
 #include "hailo/buffer.hpp"
 
@@ -37,6 +38,7 @@ constexpr std::chrono::milliseconds SYNC_EVENT_TIMEOUT(1000);
 
 enum class InferenceMode {
     FULL,
+    FULL_ASYNC,
 
     RAW,
     RAW_ASYNC,
@@ -74,6 +76,7 @@ struct NetworkParams
     std::vector<VStreamParams> vstream_params;
     std::vector<StreamParams> stream_params;
     hailo_scheduling_algorithm_t scheduling_algorithm;
+    bool multi_process_service;
 
     // Network parameters
     uint16_t batch_size;
@@ -90,7 +93,7 @@ struct NetworkParams
 
     bool is_async() const
     {
-        return (mode == InferenceMode::RAW_ASYNC) || (mode == InferenceMode::RAW_ASYNC_SINGLE_THREAD);
+        return (mode == InferenceMode::RAW_ASYNC) || (mode == InferenceMode::RAW_ASYNC_SINGLE_THREAD) || (mode == InferenceMode::FULL_ASYNC);
     }
 };
 
@@ -121,6 +124,8 @@ public:
 
     NetworkRunner(const NetworkParams &params, const std::string &name,
         VDevice &vdevice, std::shared_ptr<ConfiguredNetworkGroup> cng);
+    NetworkRunner(const NetworkParams &params, const std::string &name,
+        VDevice &vdevice, std::shared_ptr<InferModel> infer_model, std::shared_ptr<ConfiguredInferModel> configured_infer_model);
     virtual ~NetworkRunner() = default;
 
     hailo_status run(EventPtr shutdown_event, LiveStats &live_stats, Barrier &activation_barrier);
@@ -134,6 +139,7 @@ public:
 
 protected:
     static bool inference_succeeded(hailo_status status);
+    static Expected<std::string> get_network_group_name(const NetworkParams &params, const Hef &hef);
     // Use 'inference_succeeded(async_thread->get())' to check for a thread's success
     virtual Expected<std::vector<AsyncThreadPtr<hailo_status>>> start_inference_threads(EventPtr shutdown_event,
         std::shared_ptr<NetworkLiveTrack> net_live_track) = 0;
@@ -304,6 +310,8 @@ protected:
     const NetworkParams m_params;
     std::string m_name;
     std::shared_ptr<ConfiguredNetworkGroup> m_cng;
+    std::shared_ptr<InferModel> m_infer_model;
+    std::shared_ptr<ConfiguredInferModel> m_configured_infer_model;
     LatencyMeterPtr m_overall_latency_meter;
     BarrierPtr m_latency_barrier;
     double m_last_measured_fps;
@@ -339,6 +347,70 @@ private:
     std::vector<OutputVStream> m_output_vstreams;
 };
 
+class FullAsyncNetworkRunner : public NetworkRunner
+{
+public:
+    class ConfiguredInferModelActivationGuard final {
+    public:
+        static Expected<std::unique_ptr<ConfiguredInferModelActivationGuard>> create(
+            std::shared_ptr<ConfiguredInferModel> configured_infer_model)
+        {
+            auto status = HAILO_UNINITIALIZED;
+            auto ptr = std::make_unique<ConfiguredInferModelActivationGuard>(ConfiguredInferModelActivationGuard(configured_infer_model, status));
+            CHECK_NOT_NULL_AS_EXPECTED(ptr, HAILO_OUT_OF_HOST_MEMORY);
+            CHECK_SUCCESS_AS_EXPECTED(status);
+
+            return ptr;
+        }
+
+        ~ConfiguredInferModelActivationGuard()
+        {
+            if (HAILO_SUCCESS == m_activation_status) {
+                (void)m_configured_infer_model->deactivate();
+            }
+        }
+
+        ConfiguredInferModelActivationGuard(const ConfiguredInferModelActivationGuard &) = delete;
+        ConfiguredInferModelActivationGuard &operator=(const ConfiguredInferModelActivationGuard &) = delete;
+        ConfiguredInferModelActivationGuard &operator=(ConfiguredInferModelActivationGuard &&other) = delete;
+        ConfiguredInferModelActivationGuard(ConfiguredInferModelActivationGuard &&other) :
+            m_configured_infer_model(other.m_configured_infer_model), m_activation_status(std::exchange(other.m_activation_status, HAILO_UNINITIALIZED))
+        {};
+
+    private:
+        ConfiguredInferModelActivationGuard(std::shared_ptr<ConfiguredInferModel> configured_infer_model, hailo_status &status) :
+            m_configured_infer_model(configured_infer_model), m_activation_status(HAILO_UNINITIALIZED)
+        {
+            status = m_configured_infer_model->activate();
+            m_activation_status = status;
+        }
+
+        std::shared_ptr<ConfiguredInferModel> m_configured_infer_model;
+        hailo_status m_activation_status;
+    };
+
+    static Expected<std::shared_ptr<FullAsyncNetworkRunner>> create_shared(VDevice &vdevice, NetworkParams params);
+
+    FullAsyncNetworkRunner(const NetworkParams &params, const std::string &name, VDevice &vdevice, std::shared_ptr<InferModel> infer_model,
+        std::shared_ptr<ConfiguredInferModel> configured_infer_model);
+
+    virtual Expected<std::vector<AsyncThreadPtr<hailo_status>>> start_inference_threads(EventPtr /*shutdown_event*/,
+        std::shared_ptr<NetworkLiveTrack> /*net_live_track*/) override
+    {
+        return make_unexpected(HAILO_NOT_IMPLEMENTED);
+    };
+
+    virtual hailo_status run_single_thread_async_infer(EventPtr, std::shared_ptr<NetworkLiveTrack>) override;
+
+    Expected<AsyncInferJob> create_infer_job(const ConfiguredInferModel::Bindings &bindings,
+        std::weak_ptr<NetworkLiveTrack> net_live_track, FramerateThrottle &frame_rate_throttle, hailo_status &inference_status);
+
+    virtual void stop() override;
+    virtual std::set<std::string> get_input_names() override;
+    virtual std::set<std::string> get_output_names() override;
+    VStreamParams get_params(const std::string &name);
+};
+
 class RawNetworkRunner : public NetworkRunner
 {
 public:
diff --git a/hailort/hailortcli/run2/run2_command.cpp b/hailort/hailortcli/run2/run2_command.cpp
index 53ec37f..6de243e 100644
--- a/hailort/hailortcli/run2/run2_command.cpp
+++ b/hailort/hailortcli/run2/run2_command.cpp
@@ -208,12 +208,6 @@ VStreamApp::VStreamApp(const std::string &description, const std::string &name,
             { "i420", HAILO_FORMAT_ORDER_I420 }
         }))
         ->default_val("auto");
-
-    auto quantized_option = format_opt_group->add_flag("-q,--quantized,!--no-quantized",
-        "Whether or not data is quantized. This flag is ignored - Determine if the data requires quantization is decided by the src-data and dst-data types.")
-        ->default_val(true); // default_val() must be after run_callback_for_default()
-
-        hailo_deprecate_options(format_opt_group, { std::make_shared<OptionDeprecation>(quantized_option) }, false);
 }
 
 CLI::Option* VStreamApp::add_flag_callback(CLI::App *app, const std::string &name, const std::string &description,
@@ -242,16 +236,6 @@ StreamApp::StreamApp(const std::string &description, const std::string &name, CL
     add_option("--input-file", m_stream_params.input_file_path,
         "Input file path. If not given, random data will be used. File format should be raw binary data with size that is a factor of the input shape size")
         ->default_val("");
-
-    // TODO: async option (HRT-9580)
-    // TODO: flag callback?
-    // add_flag_callback(format_opt_group, "-q,--quantized,!--no-quantized", "Whether or not data is quantized",
-    //     [this](bool result){
-    //         m_params.params.user_buffer_format.flags = result ?
-    //             static_cast<hailo_format_flags_t>(m_params.params.user_buffer_format.flags | HAILO_FORMAT_FLAGS_QUANTIZED) :
-    //             static_cast<hailo_format_flags_t>(m_params.params.user_buffer_format.flags & (~HAILO_FORMAT_FLAGS_QUANTIZED));})
-    //     ->run_callback_for_default()
-    //     ->default_val(true); // default_val() must be after run_callback_for_default()
 }
 
 /** NetworkGroupNameValidator */
@@ -294,9 +278,6 @@ NetworkApp::NetworkApp(const std::string &description, const std::string &name)
     auto run_params = add_option_group("Run Parameters");
     run_params->add_option("--framerate", m_params.framerate, "Input vStreams framerate")->default_val(UNLIMITED_FRAMERATE);
 
-    // TODO: support multiple scheduling algorithms
-    m_params.scheduling_algorithm = HAILO_SCHEDULING_ALGORITHM_ROUND_ROBIN;
-
     auto vstream_subcommand = add_io_app_subcom<VStreamApp>("Set vStream", "set-vstream", hef_path_option, net_group_name_option);
     auto stream_subcommand = add_io_app_subcom<StreamApp>("Set Stream", "set-stream", hef_path_option, net_group_name_option);
     // TODO: doesn't seam to be working (HRT-9886)
@@ -334,19 +315,22 @@ public:
     InferenceMode get_mode() const;
     const std::string &get_output_json_path();
 
-    void set_scheduling_algorithm(hailo_scheduling_algorithm_t scheduling_algorithm);
-    void set_inference_mode();
-    void set_measure_latency();
+    void update_network_params();
     void set_batch_size(uint16_t batch_size);
 
 private:
     void add_measure_fw_actions_subcom();
     void add_net_app_subcom();
+
+    bool is_ethernet_device() const;
+    void validate_and_set_scheduling_algorithm();
+
     std::vector<NetworkParams> m_network_params;
     uint32_t m_time_to_run;
     InferenceMode m_mode;
+    hailo_scheduling_algorithm_t m_scheduling_algorithm = HAILO_SCHEDULING_ALGORITHM_MAX_ENUM;
     std::string m_stats_json_path;
-    std::vector<std::string> m_device_id;
+    std::vector<std::string> m_device_ids;
     uint32_t m_device_count;
     bool m_multi_process_service;
     std::string m_group_id;
@@ -373,26 +357,35 @@ Run2::Run2() : CLI::App("Run networks", "run2")
     add_option("-m,--mode", m_mode, "Inference mode")
         ->transform(HailoCheckedTransformer<InferenceMode>({
             { "full", InferenceMode::FULL },
+            { "full_async", InferenceMode::FULL_ASYNC },
             { "raw", InferenceMode::RAW },
             { "raw_async", InferenceMode::RAW_ASYNC },
             { "raw_async_single_thread", InferenceMode::RAW_ASYNC_SINGLE_THREAD, OptionVisibility::HIDDEN }
         }))->default_val("full");
     add_option("-j,--json", m_stats_json_path, "If set save statistics as json to the specified path")
-    ->default_val("")
-    ->check(FileSuffixValidator(JSON_SUFFIX));
+        ->default_val("")
+        ->check(FileSuffixValidator(JSON_SUFFIX));
+
+    add_option("--scheduling-algorithm", m_scheduling_algorithm, "Scheduling algorithm")
+        ->transform(HailoCheckedTransformer<hailo_scheduling_algorithm_t>({
+            { "round_robin", HAILO_SCHEDULING_ALGORITHM_ROUND_ROBIN },
+            { "none", HAILO_SCHEDULING_ALGORITHM_NONE },
+        }));
 
     auto vdevice_options_group = add_option_group("VDevice Options");
 
-    auto dev_id_opt = vdevice_options_group->add_option("-s,--device-id", m_device_id,
+    auto dev_id_opt = vdevice_options_group->add_option("-s,--device-id", m_device_ids,
         "Device id, same as returned from `hailortcli scan` command. For multiple devices, use space as separator.");
 
     vdevice_options_group->add_option("--device-count", m_device_count, "VDevice device count")
         ->default_val(HAILO_DEFAULT_DEVICE_COUNT)
         ->check(CLI::PositiveNumber)
         ->excludes(dev_id_opt);
-
     vdevice_options_group->add_option("--group-id", m_group_id, "VDevice group id")
         ->default_val(HAILO_DEFAULT_VDEVICE_GROUP_ID);
+    auto multi_process_flag = vdevice_options_group
+        ->add_flag("--multi-process-service", m_multi_process_service,"VDevice multi process service")
+        ->default_val(false);
 
     auto measurement_options_group = add_option_group("Measurement Options");
 
@@ -411,21 +404,17 @@ Run2::Run2() : CLI::App("Run networks", "run2")
     auto measure_temp_opt = measurement_options_group->add_flag("--measure-temp", m_measure_temp, "Measure chip temperature")
         ->default_val(false);
 
-    auto multi_process_flag = vdevice_options_group->add_flag("--multi-process-service", m_multi_process_service, "VDevice multi process service")
-        ->default_val(false);
-
     if (VDevice::service_over_ip_mode()) {
         multi_process_flag
         ->excludes(measure_power_opt)
         ->excludes(measure_current_opt)
         ->excludes(measure_temp_opt);
         // When working with service over ip - client doesn't have access to physical devices
-    } else {
-        (void)measure_power_opt;
-        (void)measure_current_opt;
-        (void)measure_temp_opt;
-        (void)multi_process_flag;
     }
+
+    parse_complete_callback([this]() {
+        validate_and_set_scheduling_algorithm();
+    });
 }
 
 void Run2::add_measure_fw_actions_subcom()
@@ -510,8 +499,8 @@ bool Run2::get_measure_overall_latency()
 std::vector<hailo_device_id_t> Run2::get_dev_ids()
 {
     std::vector<hailo_device_id_t> res;
-    res.reserve(m_device_id.size());
-    for (auto &id_str : m_device_id) {
+    res.reserve(m_device_ids.size());
+    for (auto &id_str : m_device_ids) {
         hailo_device_id_t id = {};
         std::memset(id.id, 0, sizeof(id.id));
         std::strncpy(id.id, id_str.c_str(), sizeof(id.id) - 1);
@@ -525,25 +514,14 @@ uint32_t Run2::get_device_count()
     return m_device_count;
 }
 
-void Run2::set_inference_mode()
+void Run2::update_network_params()
 {
     for (auto &params : m_network_params) {
         params.mode = m_mode;
-    }
-}
-
-void Run2::set_scheduling_algorithm(hailo_scheduling_algorithm_t scheduling_algorithm)
-{
-    for (auto &params: m_network_params) {
-        params.scheduling_algorithm = scheduling_algorithm;
-    }
-}
-
-void Run2::set_measure_latency()
-{
-    for (auto &params : m_network_params) {
+        params.multi_process_service = m_multi_process_service;
         params.measure_hw_latency = m_measure_hw_latency;
         params.measure_overall_latency = m_measure_overall_latency;
+        params.scheduling_algorithm = m_scheduling_algorithm;
     }
 }
 
@@ -584,6 +562,51 @@ const std::string &Run2::get_output_json_path()
     return m_stats_json_path;
 }
 
+static bool is_valid_ip(const std::string &ip)
+{
+    int a,b,c,d;
+    return (4 == sscanf(ip.c_str(),"%d.%d.%d.%d", &a, &b, &c, &d)) &&
+        IS_FIT_IN_UINT8(a) && IS_FIT_IN_UINT8(b) && IS_FIT_IN_UINT8(c) && IS_FIT_IN_UINT8(d);
+}
+
+bool Run2::is_ethernet_device() const
+{
+    if (m_device_ids.empty()) {
+        // By default, if no device ids are given we don't scan for ethernet devices.
+        return false;
+    }
+    return is_valid_ip(m_device_ids[0]);
+}
+
+void Run2::validate_and_set_scheduling_algorithm()
+{
+    if (m_scheduling_algorithm == HAILO_SCHEDULING_ALGORITHM_NONE) {
+        PARSE_CHECK(1 == get_network_params().size(), "When setting --scheduling-algorithm=none only one model is allowed");
+    }
+
+    if (is_ethernet_device()) {
+        PARSE_CHECK((m_scheduling_algorithm == HAILO_SCHEDULING_ALGORITHM_MAX_ENUM) ||
+                    (m_scheduling_algorithm == HAILO_SCHEDULING_ALGORITHM_NONE),
+                    "On ethernet devices, only --scheduling-algorithm=none is supported");
+        PARSE_CHECK(1 == get_network_params().size(), "On Ethernet device only one model is allowed");
+        m_scheduling_algorithm = HAILO_SCHEDULING_ALGORITHM_NONE;
+    }
+
+    if (get_measure_fw_actions()) {
+        PARSE_CHECK((m_scheduling_algorithm == HAILO_SCHEDULING_ALGORITHM_MAX_ENUM) ||
+                    (m_scheduling_algorithm == HAILO_SCHEDULING_ALGORITHM_NONE),
+                    "When measuring fw actions, only --scheduling-algorithm=none is allowed");
+        PARSE_CHECK(1 == get_network_params().size(),
+            "Only one model is allowed when measuring fw actions");
+        m_scheduling_algorithm = HAILO_SCHEDULING_ALGORITHM_NONE;
+    }
+
+    if (HAILO_SCHEDULING_ALGORITHM_MAX_ENUM == m_scheduling_algorithm) {
+        // algorithm wasn't passed, using ROUND_ROBIN as default
+        m_scheduling_algorithm = HAILO_SCHEDULING_ALGORITHM_ROUND_ROBIN;
+    }
+}
+
 /** Run2Command */
 Run2Command::Run2Command(CLI::App &parent_app) : Command(parent_app.add_subcommand(std::make_shared<Run2>()))
 {
@@ -602,18 +625,13 @@ static hailo_status wait_for_threads(std::vector<AsyncThreadPtr<hailo_status>> &
     return last_error_status;
 }
 
-bool is_valid_ip(const std::string &ip)
-{
-    int a,b,c,d;
-    return (4 == sscanf(ip.c_str(),"%d.%d.%d.%d", &a, &b, &c, &d)) &&
-        IS_FIT_IN_UINT8(a) && IS_FIT_IN_UINT8(b) && IS_FIT_IN_UINT8(c) && IS_FIT_IN_UINT8(d);
-}
-
 std::string get_str_infer_mode(const InferenceMode& infer_mode)
 {
     switch(infer_mode){
     case InferenceMode::FULL:
         return "full";
+    case InferenceMode::FULL_ASYNC:
+        return "full_async";
     case InferenceMode::RAW:
         return "raw";
     case InferenceMode::RAW_ASYNC:
@@ -655,12 +673,6 @@ Expected<std::unique_ptr<VDevice>> Run2::create_vdevice()
     if (!dev_ids.empty()) {
         vdevice_params.device_count = static_cast<uint32_t>(dev_ids.size());
         vdevice_params.device_ids = dev_ids.data();
-        // Disable scheduler for eth VDevice
-        if ((1 == dev_ids.size()) && (is_valid_ip(dev_ids[0].id))) {
-            vdevice_params.scheduling_algorithm = HAILO_SCHEDULING_ALGORITHM_NONE;
-            CHECK_AS_EXPECTED(1 == get_network_params().size(), HAILO_INVALID_OPERATION, "On Ethernet inference only one model is allowed");
-            set_scheduling_algorithm(HAILO_SCHEDULING_ALGORITHM_NONE);
-        }
     } else {
         vdevice_params.device_count = get_device_count();
     }
@@ -672,13 +684,12 @@ Expected<std::unique_ptr<VDevice>> Run2::create_vdevice()
         CHECK_AS_EXPECTED(!(get_measure_hw_latency() || get_measure_overall_latency()), HAILO_INVALID_OPERATION, "Latency measurement is not allowed when collecting runtime data");
         CHECK_AS_EXPECTED((get_mode() == InferenceMode::RAW) || (get_mode() == InferenceMode::RAW_ASYNC), HAILO_INVALID_OPERATION,
             "'measure-fw-actions' is only supported with '--mode=raw'. Received mode: '{}'", get_str_infer_mode(get_mode()));
-
-        vdevice_params.scheduling_algorithm = HAILO_SCHEDULING_ALGORITHM_NONE;
-        set_scheduling_algorithm(HAILO_SCHEDULING_ALGORITHM_NONE);
     }
 
     vdevice_params.group_id = get_group_id().c_str();
     vdevice_params.multi_process_service = get_multi_process_service();
+    assert(HAILO_SCHEDULING_ALGORITHM_MAX_ENUM != m_scheduling_algorithm);
+    vdevice_params.scheduling_algorithm = m_scheduling_algorithm;
 
     return VDevice::create(vdevice_params);
 }
@@ -757,8 +768,7 @@ hailo_status Run2Command::execute()
 {
     Run2 *app = reinterpret_cast<Run2*>(m_app);
 
-    app->set_inference_mode();
-    app->set_measure_latency();
+    app->update_network_params();
 
     CHECK(0 < app->get_network_params().size(), HAILO_INVALID_OPERATION, "Nothing to run");
 
@@ -767,7 +777,7 @@ hailo_status Run2Command::execute()
         LOGGER__WARNING("Measuring latency; frames are sent one at a time and FPS will not be measured");
     }
 
-    if (1 == app->get_network_params().size()) {
+    if (1 == app->get_network_params().size() && (HAILO_SCHEDULING_ALGORITHM_ROUND_ROBIN == app->get_network_params().begin()->scheduling_algorithm)) {
         LOGGER__WARNING("\"hailortcli run2\" is not optimized for single model usage. It is recommended to use \"hailortcli run\" command for a single model");
     }
 
diff --git a/hailort/hailortcli/run_command.cpp b/hailort/hailortcli/run_command.cpp
index 56535f2..63edf7c 100644
--- a/hailort/hailortcli/run_command.cpp
+++ b/hailort/hailortcli/run_command.cpp
@@ -445,28 +445,6 @@ hailo_status recv_loop(const inference_runner_params &params, RecvObject &recv_o
     return HAILO_SUCCESS;
 }
 
-template<typename SendObject, typename RecvObject>
-hailo_status abort_streams(std::vector<std::reference_wrapper<SendObject>> &send_objects,
-    std::vector<std::reference_wrapper<RecvObject>> &recv_objects)
-{
-    auto status = HAILO_SUCCESS; // Best effort
-    for (auto &output_stream : recv_objects) {
-        auto abort_status = output_stream.get().abort();
-        if (HAILO_SUCCESS != abort_status) {
-            LOGGER__ERROR("Failed to abort output stream {}", output_stream.get().name());
-            status = abort_status;
-        }
-    }
-    for (auto &input_stream : send_objects) {
-        auto abort_status = input_stream.get().abort();
-        if (HAILO_SUCCESS != abort_status) {
-            LOGGER__ERROR("Failed to abort input stream {}", input_stream.get().name());
-            status = abort_status;
-        }
-    }
-    return status;
-}
-
 Expected<std::map<std::string, std::vector<InputVStream>>> create_input_vstreams(ConfiguredNetworkGroup &configured_net_group,
     const inference_runner_params &params)
 {
@@ -705,7 +683,7 @@ static hailo_status run_streaming_impl(std::shared_ptr<ConfiguredNetworkGroup> c
         auto status = wait_for_exit_with_timeout(std::chrono::seconds(params.time_to_run));
         CHECK_SUCCESS(status);
 
-        status = abort_streams(send_objects, recv_objects);
+        status = configured_net_group->shutdown();
         barrier.terminate();
         CHECK_SUCCESS(status);
     }
diff --git a/hailort/libhailort/CMakeLists.txt b/hailort/libhailort/CMakeLists.txt
index c333356..d40ec84 100644
--- a/hailort/libhailort/CMakeLists.txt
+++ b/hailort/libhailort/CMakeLists.txt
@@ -2,7 +2,7 @@ cmake_minimum_required(VERSION 3.0.0)
 # set(CMAKE_C_CLANG_TIDY "clang-tidy;-checks=*")
 
 set(HAILORT_MAJOR_VERSION    4)
-set(HAILORT_MINOR_VERSION    15)
+set(HAILORT_MINOR_VERSION    16)
 set(HAILORT_REVISION_VERSION 0)
 
 # Add the cmake folder so the modules there are found
diff --git a/hailort/libhailort/bindings/gstreamer/CMakeLists.txt b/hailort/libhailort/bindings/gstreamer/CMakeLists.txt
index 64b4b37..6a278f2 100644
--- a/hailort/libhailort/bindings/gstreamer/CMakeLists.txt
+++ b/hailort/libhailort/bindings/gstreamer/CMakeLists.txt
@@ -8,7 +8,7 @@ if(NOT CMAKE_HOST_UNIX)
     message(FATAL_ERROR "Only unix hosts are supported, stopping build")
 endif()
 
-find_package(HailoRT 4.15.0 EXACT REQUIRED)
+find_package(HailoRT 4.16.0 EXACT REQUIRED)
 
 # GST_PLUGIN_DEFINE needs PACKAGE to be defined
 set(GST_HAILO_PACKAGE_NAME "hailo")
@@ -25,6 +25,7 @@ add_library(gsthailo SHARED
     gst-hailo/gsthailonet.cpp
     gst-hailo/gsthailosend.cpp
     gst-hailo/gsthailorecv.cpp
+    gst-hailo/gsthailonet2.cpp
     gst-hailo/gsthailodevicestats.cpp
     gst-hailo/common.cpp
     gst-hailo/network_group_handle.cpp
diff --git a/hailort/libhailort/bindings/gstreamer/gst-hailo/common.cpp b/hailort/libhailort/bindings/gstreamer/gst-hailo/common.cpp
index 87ee585..f116564 100644
--- a/hailort/libhailort/bindings/gstreamer/gst-hailo/common.cpp
+++ b/hailort/libhailort/bindings/gstreamer/gst-hailo/common.cpp
@@ -25,4 +25,66 @@ HailoElemProperty<gchar*>::~HailoElemProperty()
     if (nullptr != m_value) {
         g_free(m_value);
     }
+}
+
+GType gst_scheduling_algorithm_get_type (void)
+{
+    static GType scheduling_algorithm_type = 0;
+
+    /* Tightly coupled to hailo_scheduling_algorithm_e */
+
+    if (!scheduling_algorithm_type) {
+        static GEnumValue algorithm_types[] = {
+            { HAILO_SCHEDULING_ALGORITHM_NONE,         "Scheduler is not active", "HAILO_SCHEDULING_ALGORITHM_NONE" },
+            { HAILO_SCHEDULING_ALGORITHM_ROUND_ROBIN,  "Round robin",             "HAILO_SCHEDULING_ALGORITHM_ROUND_ROBIN" },
+            { HAILO_SCHEDULING_ALGORITHM_MAX_ENUM,     NULL,                      NULL },
+        };
+
+        scheduling_algorithm_type =
+            g_enum_register_static ("GstHailoSchedulingAlgorithms", algorithm_types);
+    }
+
+    return scheduling_algorithm_type;
+}
+
+GType gst_hailo_format_type_get_type (void)
+{
+    static GType format_type_enum = 0;
+
+    /* Tightly coupled to hailo_format_type_t */
+
+    if (!format_type_enum) {
+        static GEnumValue format_types[] = {
+            { HAILO_FORMAT_TYPE_AUTO,     "auto",     "HAILO_FORMAT_TYPE_AUTO"},
+            { HAILO_FORMAT_TYPE_UINT8,    "uint8",    "HAILO_FORMAT_TYPE_UINT8"},
+            { HAILO_FORMAT_TYPE_UINT16,   "uint16",   "HAILO_FORMAT_TYPE_UINT16"},
+            { HAILO_FORMAT_TYPE_FLOAT32,  "float32",  "HAILO_FORMAT_TYPE_FLOAT32"},
+            { HAILO_FORMAT_TYPE_MAX_ENUM,  NULL,      NULL },
+        };
+
+        format_type_enum = g_enum_register_static ("GstHailoFormatTypes", format_types);
+    }
+
+    return format_type_enum;
+}
+
+bool do_versions_match(GstElement *self)
+{
+    hailo_version_t libhailort_version = {};
+    auto status = hailo_get_library_version(&libhailort_version);
+    if (HAILO_SUCCESS != status) {
+        GST_ELEMENT_ERROR(self, RESOURCE, FAILED, ("Fetching libhailort version has failed! status = %d", status), (NULL));
+        return false;
+    }
+
+    bool versions_match = ((HAILORT_MAJOR_VERSION == libhailort_version.major) &&
+        (HAILORT_MINOR_VERSION == libhailort_version.minor) &&
+        (HAILORT_REVISION_VERSION == libhailort_version.revision));
+    if (!versions_match) {
+        GST_ELEMENT_ERROR(self, RESOURCE, FAILED, ("libhailort version (%d.%d.%d) does not match gsthailonet version (%d.%d.%d)",
+            libhailort_version.major, libhailort_version.minor, libhailort_version.revision,
+            HAILORT_MAJOR_VERSION, HAILORT_MINOR_VERSION, HAILORT_REVISION_VERSION), (NULL));
+        return false;
+    }
+    return true;
 }
\ No newline at end of file
diff --git a/hailort/libhailort/bindings/gstreamer/gst-hailo/common.hpp b/hailort/libhailort/bindings/gstreamer/gst-hailo/common.hpp
index ee8d5a4..bbee72b 100644
--- a/hailort/libhailort/bindings/gstreamer/gst-hailo/common.hpp
+++ b/hailort/libhailort/bindings/gstreamer/gst-hailo/common.hpp
@@ -96,6 +96,74 @@ using namespace hailort;
         }                               \
     } while(0)
 
+#define _CHECK(cond, ret_val, ...)      \
+    do {                                \
+        if (!(cond)) {                  \
+            g_print(__VA_ARGS__); \
+            g_print("\n"); \
+            return (ret_val);           \
+        }                               \
+    } while(0)
+
+#define CHECK(cond, ret_val, ...) _CHECK((cond), (ret_val),  ##__VA_ARGS__)
+
+#define CHECK_AS_EXPECTED(cond, ret_val, ...) \
+    _CHECK((cond), (make_unexpected(ret_val)),  ##__VA_ARGS__)
+
+#define CHECK_NOT_NULL(arg, status) _CHECK(nullptr != (arg), status, "CHECK_NOT_NULL for %s failed", #arg)
+
+#define _CHECK_SUCCESS(status, ...)                                                                            \
+    do {                                                                                                                        \
+        const auto &__check_success_status = (status);                                                                          \
+        _CHECK(                                                                                                                 \
+            HAILO_SUCCESS == __check_success_status,                                                                            \
+            __check_success_status,                                                                                             \
+            "CHECK_SUCCESS failed with status=%d", status       \
+        );                                                                                                                      \
+    } while(0)
+#define CHECK_SUCCESS(status, ...) _CHECK_SUCCESS(status, "" __VA_ARGS__)
+
+#define _CHECK_SUCCESS_AS_EXPECTED(status, ...)                                                                       \
+    do {                                                                                                                               \
+        const auto &__check_success_status = (status);                                                                                 \
+        _CHECK(                                                                                                                        \
+            HAILO_SUCCESS == __check_success_status,                                                                                   \
+            make_unexpected(__check_success_status),                                                                                   \
+            "CHECK_SUCCESS_AS_EXPECTED failed with status=%d", status  \
+        );                                                                                                                             \
+    } while(0)
+#define CHECK_SUCCESS_AS_EXPECTED(status, ...) _CHECK_SUCCESS_AS_EXPECTED(status, "" __VA_ARGS__)
+
+#define _CHECK_EXPECTED_AS_STATUS(obj, ...)                                                                                      \
+    do {                                                                                                                                          \
+        const auto &__check_expected_obj = (obj);                                                                                                 \
+        _CHECK(                                                                                                                                   \
+            __check_expected_obj.has_value(),                                                                                                     \
+            __check_expected_obj.status(),                                                                                                        \
+            "CHECK_EXPECTED_AS_STATUS failed with status=%d", __check_expected_obj.status()       \
+        );                                                                                                                                        \
+    } while(0)
+#define CHECK_EXPECTED_AS_STATUS(obj, ...) _CHECK_EXPECTED_AS_STATUS(obj, "" __VA_ARGS__)
+
+#define _CHECK_EXPECTED(obj, ...)                                                                                      \
+    do {                                                                                                                                \
+        const auto &__check_expected_obj = (obj);                                                                                       \
+        _CHECK(                                                                                                                         \
+            __check_expected_obj.has_value(),                                                                                           \
+            make_unexpected(__check_expected_obj.status()),                                                                             \
+            "CHECK_EXPECTED failed with status=%d",  __check_expected_obj.status()       \
+        );                                                                                                                              \
+    } while(0)
+#define CHECK_EXPECTED(obj, ...) _CHECK_EXPECTED(obj, "" __VA_ARGS__)
+
+#define RGB_FEATURES_SIZE (3)
+#define RGBA_FEATURES_SIZE (4)
+#define GRAY8_FEATURES_SIZE (1)
+#define YUY2_FEATURES_SIZE (2)
+#define NV12_FEATURES_SIZE (3)
+#define NV21_FEATURES_SIZE (3)
+#define I420_FEATURES_SIZE (3)
+
 // From https://stackoverflow.com/questions/57092289/do-stdmake-shared-and-stdmake-unique-have-a-nothrow-version
 template <class T, class... Args>
 static inline std::unique_ptr<T> make_unique_nothrow(Args&&... args)
@@ -144,4 +212,12 @@ private:
 template<>
 HailoElemProperty<gchar*>::~HailoElemProperty();
 
+#define GST_TYPE_SCHEDULING_ALGORITHM (gst_scheduling_algorithm_get_type ())
+GType gst_scheduling_algorithm_get_type (void);
+
+#define GST_TYPE_HAILO_FORMAT_TYPE (gst_hailo_format_type_get_type ())
+GType gst_hailo_format_type_get_type (void);
+
+bool do_versions_match(GstElement *self);
+
 #endif /* _GST_HAILO_COMMON_HPP_ */
\ No newline at end of file
diff --git a/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailonet.cpp b/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailonet.cpp
index c5aed45..d4c6421 100644
--- a/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailonet.cpp
+++ b/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailonet.cpp
@@ -31,51 +31,6 @@
 GST_DEBUG_CATEGORY_STATIC(gst_hailonet_debug_category);
 #define GST_CAT_DEFAULT gst_hailonet_debug_category
 
-#define GST_TYPE_SCHEDULING_ALGORITHM (gst_scheduling_algorithm_get_type ())
-static GType
-gst_scheduling_algorithm_get_type (void)
-{
-    static GType scheduling_algorithm_type = 0;
-
-    /* Tightly coupled to hailo_scheduling_algorithm_e */
-
-    if (!scheduling_algorithm_type) {
-        static GEnumValue algorithm_types[] = {
-            { HAILO_SCHEDULING_ALGORITHM_NONE,         "Scheduler is not active", "HAILO_SCHEDULING_ALGORITHM_NONE" },
-            { HAILO_SCHEDULING_ALGORITHM_ROUND_ROBIN,  "Round robin",             "HAILO_SCHEDULING_ALGORITHM_ROUND_ROBIN" },
-            { HAILO_SCHEDULING_ALGORITHM_MAX_ENUM,     NULL,                      NULL },
-        };
-
-        scheduling_algorithm_type =
-            g_enum_register_static ("GstHailoSchedulingAlgorithms", algorithm_types);
-    }
-
-    return scheduling_algorithm_type;
-}
-
-#define GST_TYPE_HAILO_FORMAT_TYPE (gst_hailo_format_type_get_type ())
-static GType
-gst_hailo_format_type_get_type (void)
-{
-    static GType format_type_enum = 0;
-
-    /* Tightly coupled to hailo_format_type_t */
-    
-    if (!format_type_enum) {
-        static GEnumValue format_types[] = {
-            { HAILO_FORMAT_TYPE_AUTO,     "auto",     "HAILO_FORMAT_TYPE_AUTO"},
-            { HAILO_FORMAT_TYPE_UINT8,    "uint8",    "HAILO_FORMAT_TYPE_UINT8"},
-            { HAILO_FORMAT_TYPE_UINT16,   "uint16",   "HAILO_FORMAT_TYPE_UINT16"},
-            { HAILO_FORMAT_TYPE_FLOAT32,  "float32",  "HAILO_FORMAT_TYPE_FLOAT32"},
-            { HAILO_FORMAT_TYPE_MAX_ENUM,  NULL,      NULL },
-        };
-
-        format_type_enum = g_enum_register_static ("GstHailoFormatTypes", format_types);
-    }
-
-    return format_type_enum;
-}
-
 constexpr std::chrono::milliseconds WAIT_FOR_FLUSH_TIMEOUT_MS(1000);
 
 static void gst_hailonet_set_property(GObject *object, guint property_id, const GValue *value, GParamSpec *pspec);
@@ -105,8 +60,6 @@ enum
     PROP_SCHEDULER_THRESHOLD,
     PROP_SCHEDULER_PRIORITY,
     PROP_MULTI_PROCESS_SERVICE,
-    PROP_INPUT_QUANTIZED,
-    PROP_OUTPUT_QUANTIZED,
     PROP_INPUT_FORMAT_TYPE,
     PROP_OUTPUT_FORMAT_TYPE,
     PROP_NMS_SCORE_THRESHOLD,
@@ -200,14 +153,6 @@ static void gst_hailonet_class_init(GstHailoNetClass *klass)
         g_param_spec_boolean("multi-process-service", "Should run over HailoRT service", "Controls wether to run HailoRT over its service. "
             "To use this property, the service should be active and scheduling-algorithm should be set. Defaults to false.",
             HAILO_DEFAULT_MULTI_PROCESS_SERVICE, (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
-    g_object_class_install_property(gobject_class, PROP_INPUT_QUANTIZED,
-        g_param_spec_boolean("input-quantized", "Is the input quantized or not", "Deprecated parameter that will be ignored. "
-        "Determine whether to quantize (scale) the data will be decided by the src-data and dst-data types.",
-            true, (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
-    g_object_class_install_property(gobject_class, PROP_OUTPUT_QUANTIZED,
-        g_param_spec_boolean("output-quantized", "Should the output be quantized or de-quantized","Deprecated parameter that will be ignored. "
-        "Determine whether to de-quantize (rescale) the data will be decided by the src-data and dst-data types.",
-            true, (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
     g_object_class_install_property(gobject_class, PROP_INPUT_FORMAT_TYPE,
         g_param_spec_enum("input-format-type", "Input format type", "Input format type(auto, float32, uint16, uint8). Default value is auto."
             "Gets values from the enum GstHailoFormatType. ",
@@ -531,22 +476,6 @@ void HailoNetImpl::set_property(GObject *object, guint property_id, const GValue
         }
         m_props.m_multi_process_service = g_value_get_boolean(value);
         break;
-    case PROP_INPUT_QUANTIZED:
-        g_warning("'input-quantized' is a deprecated parameter that will be ignored.");
-        if (m_was_configured) {
-            g_warning("The network was already configured so changing the quantized flag will not take place!");
-            break;
-        }
-        m_props.m_input_quantized = g_value_get_boolean(value);
-        break;
-    case PROP_OUTPUT_QUANTIZED:
-        g_warning("'output-quantized' is a deprecated parameter that will be ignored.");
-        if (m_was_configured) {
-            g_warning("The network was already configured so changing the quantized flag will not take place!");
-            break;
-        }
-        m_props.m_output_quantized = g_value_get_boolean(value);
-        break;
     case PROP_INPUT_FORMAT_TYPE:
         if (m_was_configured) {
             g_warning("The network was already configured so changing the format type will not take place!");
@@ -655,12 +584,6 @@ void HailoNetImpl::get_property(GObject *object, guint property_id, GValue *valu
     case PROP_MULTI_PROCESS_SERVICE:
         g_value_set_boolean(value, m_props.m_multi_process_service.get());
         break;
-    case PROP_INPUT_QUANTIZED:
-        g_value_set_boolean(value, m_props.m_input_quantized.get());
-        break;
-    case PROP_OUTPUT_QUANTIZED:
-        g_value_set_boolean(value, m_props.m_output_quantized.get());
-        break;
     case PROP_INPUT_FORMAT_TYPE:
         g_value_set_enum(value, m_props.m_input_format_type.get());
         break;
@@ -770,14 +693,8 @@ hailo_status HailoNetImpl::configure_network_group()
         GST_CHECK_SUCCESS(status, m_element, RESOURCE, "Setting scheduler priority failed, status = %d", status);
     }
 
-    auto input_quantized = (m_props.m_input_quantized.was_changed()) ? static_cast<bool>(m_props.m_input_quantized.get()) :
-        (m_props.m_input_format_type.get() != HAILO_FORMAT_TYPE_FLOAT32);
-
-    auto output_quantized = (m_props.m_output_quantized.was_changed()) ? static_cast<bool>(m_props.m_output_quantized.get()) :
-        (m_props.m_output_format_type.get() != HAILO_FORMAT_TYPE_FLOAT32);
-
     auto vstreams = m_net_group_handle->create_vstreams(m_props.m_network_name.get(), m_props.m_scheduling_algorithm.get(), m_output_formats,
-        input_quantized, output_quantized, m_props.m_input_format_type.get(), m_props.m_output_format_type.get());
+        m_props.m_input_format_type.get(), m_props.m_output_format_type.get());
     GST_CHECK_EXPECTED_AS_STATUS(vstreams, m_element, RESOURCE, "Creating vstreams failed, status = %d", status);
 
     GST_HAILOSEND(m_hailosend)->impl->set_input_vstreams(std::move(vstreams->first));
@@ -969,30 +886,9 @@ hailo_status HailoNetImpl::signal_was_flushed_event()
     return m_was_flushed_event->signal();
 }
 
-static bool do_versions_match(GstHailoNet *self)
-{
-    hailo_version_t libhailort_version = {};
-    auto status = hailo_get_library_version(&libhailort_version);
-    if (HAILO_SUCCESS != status) {
-        GST_ELEMENT_ERROR(self, RESOURCE, FAILED, ("Fetching libhailort version has failed! status = %d", status), (NULL));
-        return false;
-    }
-
-    bool versions_match = ((HAILORT_MAJOR_VERSION == libhailort_version.major) &&
-        (HAILORT_MINOR_VERSION == libhailort_version.minor) &&
-        (HAILORT_REVISION_VERSION == libhailort_version.revision));
-    if (!versions_match) {
-        GST_ELEMENT_ERROR(self, RESOURCE, FAILED, ("libhailort version (%d.%d.%d) does not match gsthailonet version (%d.%d.%d)",
-            libhailort_version.major, libhailort_version.minor, libhailort_version.revision,
-            HAILORT_MAJOR_VERSION, HAILORT_MINOR_VERSION, HAILORT_REVISION_VERSION), (NULL));
-        return false;
-    }
-    return true;
-}
-
 static void gst_hailonet_init(GstHailoNet *self)
 {
-    if (!do_versions_match(self)) {
+    if (!do_versions_match(GST_ELEMENT(self))) {
         return;
     }
 
diff --git a/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailonet.hpp b/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailonet.hpp
index 142c30f..8f15195 100644
--- a/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailonet.hpp
+++ b/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailonet.hpp
@@ -54,7 +54,7 @@ public:
     HailoNetProperties() : m_device_id(nullptr), m_hef_path(nullptr), m_network_name(nullptr), m_batch_size(HAILO_DEFAULT_BATCH_SIZE),
         m_is_active(false), m_device_count(0), m_vdevice_key(DEFAULT_VDEVICE_KEY), m_scheduling_algorithm(HAILO_SCHEDULING_ALGORITHM_ROUND_ROBIN),
         m_scheduler_timeout_ms(HAILO_DEFAULT_SCHEDULER_TIMEOUT_MS), m_scheduler_threshold(HAILO_DEFAULT_SCHEDULER_THRESHOLD), m_scheduler_priority(HAILO_SCHEDULER_PRIORITY_NORMAL),
-        m_multi_process_service(HAILO_DEFAULT_MULTI_PROCESS_SERVICE), m_input_quantized(true), m_output_quantized(true), m_input_format_type(HAILO_FORMAT_TYPE_AUTO),
+        m_multi_process_service(HAILO_DEFAULT_MULTI_PROCESS_SERVICE), m_input_format_type(HAILO_FORMAT_TYPE_AUTO),
         m_output_format_type(HAILO_FORMAT_TYPE_AUTO), m_nms_score_threshold(0), m_nms_iou_threshold(0), m_nms_max_proposals_per_class(0)
 
     {}
@@ -71,8 +71,6 @@ public:
     HailoElemProperty<guint32> m_scheduler_threshold;
     HailoElemProperty<guint8> m_scheduler_priority;
     HailoElemProperty<gboolean> m_multi_process_service;
-    HailoElemProperty<gboolean> m_input_quantized;
-    HailoElemProperty<gboolean> m_output_quantized;
     HailoElemProperty<hailo_format_type_t> m_input_format_type;
     HailoElemProperty<hailo_format_type_t> m_output_format_type;
     HailoElemProperty<gfloat> m_nms_score_threshold;
diff --git a/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailonet2.cpp b/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailonet2.cpp
new file mode 100644
index 0000000..4eae2b0
--- /dev/null
+++ b/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailonet2.cpp
@@ -0,0 +1,1313 @@
+/*
+ * Copyright (c) 2021-2023 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the LGPL 2.1 license (https://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ */
+#include "gsthailonet2.hpp"
+#include "metadata/tensor_meta.hpp"
+#include "hailo/buffer.hpp"
+#include "hailo/hailort_common.hpp"
+#include "hailo/hailort_defaults.hpp"
+
+#include <algorithm>
+#include <unordered_map>
+
+#define WAIT_FOR_ASYNC_READY_TIMEOUT (std::chrono::milliseconds(10000))
+#define ERROR(msg, ...) g_print(msg, ##__VA_ARGS__)
+
+enum
+{
+    PROP_0,
+    PROP_HEF_PATH,
+    PROP_BATCH_SIZE,
+    PROP_DEVICE_ID,
+    PROP_DEVICE_COUNT,
+    PROP_VDEVICE_GROUP_ID,
+    PROP_IS_ACTIVE,
+    PROP_OUTPUTS_MIN_POOL_SIZE,
+    PROP_OUTPUTS_MAX_POOL_SIZE,
+    PROP_SCHEDULING_ALGORITHM,
+    PROP_SCHEDULER_TIMEOUT_MS,
+    PROP_SCHEDULER_THRESHOLD,
+    PROP_SCHEDULER_PRIORITY,
+    PROP_INPUT_FORMAT_TYPE,
+    PROP_OUTPUT_FORMAT_TYPE,
+    PROP_NMS_SCORE_THRESHOLD,
+    PROP_NMS_IOU_THRESHOLD,
+    PROP_NMS_MAX_PROPOSALS_PER_CLASS,
+    PROP_INPUT_FROM_META,
+    PROP_MULTI_PROCESS_SERVICE,
+
+    // Deprecated
+    PROP_VDEVICE_KEY,
+};
+
+static GstStaticPadTemplate sink_template = GST_STATIC_PAD_TEMPLATE("sink", GST_PAD_SINK, GST_PAD_ALWAYS, GST_STATIC_CAPS_ANY);
+static GstStaticPadTemplate src_template = GST_STATIC_PAD_TEMPLATE("src", GST_PAD_SRC, GST_PAD_ALWAYS, GST_STATIC_CAPS_ANY);
+
+G_DEFINE_TYPE (GstHailoAllocator, gst_hailo_allocator, GST_TYPE_ALLOCATOR);
+G_DEFINE_TYPE (GstHailoNet2, gst_hailonet2, GST_TYPE_ELEMENT);
+
+static std::atomic_uint32_t hailonet_count(0);
+
+static GstMemory *gst_hailo_allocator_alloc(GstAllocator* allocator, gsize size, GstAllocationParams* /*params*/) {
+    GstHailoAllocator *hailo_allocator = GST_HAILO_ALLOCATOR(allocator);
+    auto buffer = Buffer::create(size, BufferStorageParams::create_dma());
+    if (!buffer) {
+        ERROR("Creating buffer for allocator has failed, status = %d\n", buffer.status());
+        return nullptr;
+    }
+
+    GstMemory *memory = gst_memory_new_wrapped(static_cast<GstMemoryFlags>(0), buffer->data(),
+        buffer->size(), 0, buffer->size(), nullptr, nullptr);
+    if (nullptr == memory) {
+        ERROR("Creating new GstMemory for allocator has failed!\n");
+        return nullptr;
+    }
+
+    hailo_allocator->buffers[memory] = std::move(buffer.release());
+    return memory;
+}
+
+static void gst_hailo_allocator_free(GstAllocator* allocator, GstMemory *mem) {
+    GstHailoAllocator *hailo_allocator = GST_HAILO_ALLOCATOR(allocator);
+    hailo_allocator->buffers.erase(mem);
+}
+
+static void gst_hailo_allocator_class_init(GstHailoAllocatorClass* klass) {
+    GstAllocatorClass* allocator_class = GST_ALLOCATOR_CLASS(klass);
+
+    allocator_class->alloc = gst_hailo_allocator_alloc;
+    allocator_class->free = gst_hailo_allocator_free;
+}
+
+static void gst_hailo_allocator_init(GstHailoAllocator* allocator) {
+    allocator->buffers = std::unordered_map<GstMemory*, Buffer>();
+}
+
+static hailo_status gst_hailonet2_deconfigure(GstHailoNet2 *self)
+{
+    // This will wakeup any blocking calls to deuque
+    for (auto &name_pool_pair : self->output_buffer_pools) {
+        gst_buffer_pool_set_flushing(name_pool_pair.second, TRUE);
+    }
+
+    std::unique_lock<std::mutex> lock(self->infer_mutex);
+    self->configured_infer_model.reset();
+    self->is_configured = false;
+    return HAILO_SUCCESS;
+}
+
+static hailo_status gst_hailonet2_free(GstHailoNet2 *self)
+{
+    std::unique_lock<std::mutex> lock(self->infer_mutex);
+    self->configured_infer_model.reset();
+    self->infer_model.reset();
+    self->vdevice.reset();
+
+    {
+        std::unique_lock<std::mutex> lock(self->thread_queue_mutex);
+        self->is_thread_running = false;
+    }
+    self->thread_cv.notify_all();
+
+    if (self->thread.joinable()) {
+        self->thread.join();
+    }
+
+    if (nullptr != self->input_queue) {
+        gst_queue_array_free(self->input_queue);
+    }
+
+    if (nullptr != self->thread_queue) {
+        gst_queue_array_free(self->thread_queue);
+    }
+
+    if (nullptr != self->input_caps) {
+        gst_caps_unref(self->input_caps);
+    }
+
+    for (auto &name_pool_pair : self->output_buffer_pools) {
+        gboolean result = gst_buffer_pool_set_active(name_pool_pair.second, FALSE);
+        CHECK(result, HAILO_INTERNAL_FAILURE, "Could not release buffer pool");
+        gst_object_unref(name_pool_pair.second);
+    }
+
+    gst_object_unref(self->allocator);
+
+    self->props.free_strings();
+
+    return HAILO_SUCCESS;
+}
+
+static hailo_status gst_hailonet2_set_format_types(GstHailoNet2 *self, std::shared_ptr<InferModel> infer_model)
+{
+    if (self->props.m_input_format_type.was_changed()) {
+        for (const auto &input_name : infer_model->get_input_names()) {
+            auto input = infer_model->input(input_name);
+            CHECK_EXPECTED_AS_STATUS(input);
+
+            input->set_format_type(self->props.m_input_format_type.get());
+        }
+    }
+    if (self->props.m_output_format_type.was_changed()) {
+        for (const auto &output_name : infer_model->get_output_names()) {
+            auto output = infer_model->output(output_name);
+            CHECK_EXPECTED_AS_STATUS(output);
+
+            output->set_format_type(self->props.m_output_format_type.get());
+        }
+    }
+
+    return HAILO_SUCCESS;
+}
+
+static hailo_status gst_hailonet2_set_nms_params(GstHailoNet2 *self, std::shared_ptr<InferModel> infer_model)
+{
+     // Check that if one of the NMS params are changed, we have NMS outputs in the model
+    auto has_nms_output = std::any_of(infer_model->outputs().begin(), infer_model->outputs().end(), [](const auto &output)
+    {
+        return output.is_nms();
+    });
+
+    for (const auto &output_name : infer_model->get_output_names()) {
+        auto output = infer_model->output(output_name);
+        CHECK_EXPECTED_AS_STATUS(output);
+
+        if (self->props.m_nms_score_threshold.was_changed()) {
+            CHECK(has_nms_output, HAILO_INVALID_OPERATION, "NMS score threshold is set, but there is no NMS output in this model.");
+            if (output->is_nms()) {
+                output->set_nms_score_threshold(self->props.m_nms_score_threshold.get());
+            }
+        }
+        if (self->props.m_nms_iou_threshold.was_changed()) {
+            CHECK(has_nms_output, HAILO_INVALID_OPERATION, "NMS IoU threshold is set, but there is no NMS output in this model.");
+            if (output->is_nms()) {
+                output->set_nms_iou_threshold(self->props.m_nms_iou_threshold.get());
+            }
+        }
+        if (self->props.m_nms_max_proposals_per_class.was_changed()) {
+            CHECK(has_nms_output, HAILO_INVALID_OPERATION, "NMS max proposals per class is set, but there is no NMS output in this model.");
+            if (output->is_nms()) {
+                output->set_nms_max_proposals_per_class(self->props.m_nms_max_proposals_per_class.get());
+            }
+        }
+    }
+
+    return HAILO_SUCCESS;
+}
+
+static hailo_status gst_hailonet2_set_scheduler_params(GstHailoNet2 *self, std::shared_ptr<ConfiguredInferModel> configured_infer_model)
+{
+    if (self->props.m_scheduler_timeout_ms.was_changed()) {
+        auto millis = std::chrono::milliseconds(self->props.m_scheduler_timeout_ms.get());
+        auto status = configured_infer_model->set_scheduler_timeout(millis);
+        CHECK_SUCCESS(status, "Setting scheduler timeout failed, status = %d", status);
+    }
+    if (self->props.m_scheduler_threshold.was_changed()) {
+        auto status = configured_infer_model->set_scheduler_threshold(self->props.m_scheduler_threshold.get());
+        CHECK_SUCCESS(status, "Setting scheduler threshold failed, status = %d", status);
+    }
+    if (self->props.m_scheduler_priority.was_changed()) {
+        auto status = configured_infer_model->set_scheduler_priority(self->props.m_scheduler_priority.get());
+        CHECK_SUCCESS(status, "Setting scheduler priority failed, status = %d", status);
+    }
+
+    return HAILO_SUCCESS;
+}
+
+static Expected<GstBufferPool*> gst_hailonet2_create_buffer_pool(GstHailoNet2 *self, size_t frame_size)
+{
+    GstBufferPool *pool = gst_buffer_pool_new();
+
+    GstStructure *config = gst_buffer_pool_get_config(pool);
+    gst_buffer_pool_config_set_params(config, nullptr, static_cast<guint>(frame_size), self->props.m_outputs_min_pool_size.get(),
+        self->props.m_outputs_max_pool_size.get());
+
+    gst_buffer_pool_config_set_allocator(config, GST_ALLOCATOR(self->allocator), nullptr);
+
+    gboolean result = gst_buffer_pool_set_config(pool, config);
+    CHECK_AS_EXPECTED(result, HAILO_INTERNAL_FAILURE, "Could not set config buffer pool");
+
+    result = gst_buffer_pool_set_active(pool, TRUE);
+    CHECK_AS_EXPECTED(result, HAILO_INTERNAL_FAILURE, "Could not set buffer pool as active");
+
+    return pool;
+}
+
+static hailo_status gst_hailonet2_configure(GstHailoNet2 *self)
+{
+    if (self->is_configured) {
+        return HAILO_SUCCESS;
+    }
+
+    for (auto &name_pool_pair : self->output_buffer_pools) {
+        gst_buffer_pool_set_flushing(name_pool_pair.second, FALSE);
+    }
+
+    self->infer_model->set_batch_size(self->props.m_batch_size.get());
+
+    auto status = gst_hailonet2_set_format_types(self, self->infer_model);
+    CHECK_SUCCESS(status);
+
+    status = gst_hailonet2_set_nms_params(self, self->infer_model);
+    CHECK_SUCCESS(status);
+
+    // In RGB formats, Gstreamer is padding each row to 4.
+    for (const auto &input_name : self->infer_model->get_input_names()) {
+        // TODO (HRT-12492): change transformations to be togglable
+        if (self->props.m_input_from_meta.get()) {
+            // do not apply transformations when taking input from meta
+            self->infer_model->input(input_name)->set_format_order(HAILO_FORMAT_ORDER_NHCW);
+        } else if (self->infer_model->input(input_name)->format().order == HAILO_FORMAT_ORDER_NHWC) {
+            self->infer_model->input(input_name)->set_format_order(HAILO_FORMAT_ORDER_RGB4);
+        }
+    }
+
+    if (self->props.m_input_from_meta.get()) {
+        for (const auto &output_name : self->infer_model->get_output_names()) {
+            // TODO (HRT-12492): change transformations to be togglable
+            // do not apply transformations when taking output to meta
+            self->infer_model->output(output_name)->set_format_order(HAILO_FORMAT_ORDER_NHCW);
+        }
+    }
+
+    auto configured_infer_model = self->infer_model->configure();
+    CHECK_EXPECTED_AS_STATUS(configured_infer_model);
+
+    auto ptr = make_shared_nothrow<ConfiguredInferModel>(configured_infer_model.release());
+    CHECK_NOT_NULL(ptr, HAILO_OUT_OF_HOST_MEMORY);
+    self->configured_infer_model = ptr;
+
+    status = gst_hailonet2_set_scheduler_params(self, self->configured_infer_model);
+    CHECK_SUCCESS(status);
+
+    self->is_configured = true;
+    return HAILO_SUCCESS;
+}
+
+static hailo_status gst_hailonet2_allocate_infer_resources(GstHailoNet2 *self)
+{
+    auto bindings = self->configured_infer_model->create_bindings();
+    CHECK_EXPECTED_AS_STATUS(bindings);
+    self->infer_bindings = std::move(bindings.release());
+
+    self->output_buffer_pools = std::unordered_map<std::string, GstBufferPool*>();
+    self->output_vstream_infos = std::unordered_map<std::string, hailo_vstream_info_t>();
+
+    auto async_queue_size = self->configured_infer_model->get_async_queue_size();
+    CHECK_EXPECTED_AS_STATUS(async_queue_size);
+    self->input_queue = gst_queue_array_new(static_cast<guint>(async_queue_size.value()));
+    self->thread_queue = gst_queue_array_new(static_cast<guint>(async_queue_size.value()));
+    self->is_thread_running = true;
+    self->thread = std::thread([self] () {
+        while (self->is_thread_running) {
+            GstBuffer *buffer = nullptr;
+            {
+                std::unique_lock<std::mutex> lock(self->thread_queue_mutex);
+                self->thread_cv.wait(lock, [self] () {
+                    return (self->buffers_in_thread_queue > 0) || !self->is_thread_running;
+                });
+                if (!self->is_thread_running) {
+                    break;
+                }
+
+                buffer = static_cast<GstBuffer*>(gst_queue_array_pop_head(self->thread_queue));
+                self->buffers_in_thread_queue--;
+            }
+            if (GST_IS_PAD(self->srcpad)) { // Checking because we fail here when exiting the application
+                GstFlowReturn ret = gst_pad_push(self->srcpad, buffer);
+                if ((GST_FLOW_OK != ret) && (GST_FLOW_FLUSHING != ret) && (!self->has_got_eos)) {
+                    ERROR("gst_pad_push failed with status = %d\n", ret);
+                    break;
+                }
+            }
+        }
+    });
+
+    for (auto &output : self->infer_model->outputs()) {
+        auto buffer_pool = gst_hailonet2_create_buffer_pool(self, output.get_frame_size());
+        CHECK_EXPECTED_AS_STATUS(buffer_pool);
+
+        self->output_buffer_pools[output.name()] = buffer_pool.release();
+    }
+
+    auto vstream_infos = self->infer_model->hef().get_output_vstream_infos();
+    CHECK_EXPECTED_AS_STATUS(vstream_infos);
+
+    for (const auto &vstream_info : vstream_infos.value()) {
+        self->output_vstream_infos[vstream_info.name] = vstream_info;
+    }
+
+    return HAILO_SUCCESS;
+}
+
+static GstStateChangeReturn gst_hailonet2_change_state(GstElement *element, GstStateChange transition)
+{
+    GstStateChangeReturn ret = GST_ELEMENT_CLASS(gst_hailonet2_parent_class)->change_state(element, transition);
+    if (GST_STATE_CHANGE_FAILURE == ret) {
+        return ret;
+    }
+
+    GstHailoNet2 *self = GST_HAILONET2(element);
+    switch (transition) {
+    case GST_STATE_CHANGE_PAUSED_TO_PLAYING:
+    {
+        auto status = gst_hailonet2_configure(self);
+        if (HAILO_SUCCESS != status) {
+            return GST_STATE_CHANGE_FAILURE;
+        }
+        break;
+    }
+    case GST_STATE_CHANGE_PLAYING_TO_PAUSED:
+    {
+        auto status = gst_hailonet2_deconfigure(self);
+        if (HAILO_SUCCESS != status) {
+            return GST_STATE_CHANGE_FAILURE;
+        }
+        break;
+    }
+    case GST_STATE_CHANGE_READY_TO_NULL:
+    {
+        auto status = gst_hailonet2_free(self);
+        if (HAILO_SUCCESS != status) {
+            return GST_STATE_CHANGE_FAILURE;
+        }
+        break;
+    }
+    default:
+        break;
+    }
+
+    return ret;
+}
+
+static hailo_status gst_hailonet2_toggle_activation(GstHailoNet2 *self, gboolean old_is_active, gboolean new_is_active)
+{
+    if (self->props.m_scheduling_algorithm.was_changed() && (HAILO_SCHEDULING_ALGORITHM_NONE != self->props.m_scheduling_algorithm.get())) {
+        g_error("scheduling-algorithm different than HAILO_SCHEDULING_ALGORITHM_NONE in combination with 'is-active' is not supported.");
+        return HAILO_INVALID_OPERATION;
+    }
+
+    if (self->has_called_activate) {
+        if (old_is_active && !new_is_active) {
+            self->configured_infer_model->deactivate();
+        } else if (!old_is_active && new_is_active) {
+            auto status = self->configured_infer_model->activate();
+            CHECK_SUCCESS(status);
+        } else {
+            g_warning("Trying to change is-active property from %d to %d", old_is_active, new_is_active);
+        }
+    }
+
+    self->props.m_is_active = new_is_active;
+    return HAILO_SUCCESS;
+}
+
+static void gst_hailonet2_set_property(GObject *object, guint property_id, const GValue *value, GParamSpec *pspec)
+{
+    GstHailoNet2 *self = GST_HAILONET2(object);
+    switch (property_id) {
+    case PROP_HEF_PATH:
+        if (self->is_configured) {
+            g_warning("The network was already configured so changing the HEF path will not take place!");
+            break;
+        }
+        if (nullptr != self->props.m_hef_path.get()) {
+            g_free(self->props.m_hef_path.get());
+        }
+        self->props.m_hef_path = g_strdup(g_value_get_string(value));
+        break;
+    case PROP_BATCH_SIZE:
+        if (self->is_configured) {
+            g_warning("The network was already configured so changing the batch size will not take place!");
+            break;
+        }
+        self->props.m_batch_size = static_cast<guint16>(g_value_get_uint(value));
+        break;
+    case PROP_DEVICE_ID:
+        if (0 != self->props.m_device_count.get()) {
+            g_error("device-id and device-count excludes eachother. received device-id=%s, device-count=%d",
+                g_value_get_string(value), self->props.m_device_count.get());
+            break;
+        }
+        if (self->is_configured) {
+            g_warning("The network was already configured so changing the device ID will not take place!");
+            break;
+        }
+        if (nullptr != self->props.m_device_id.get()) {
+            g_free(self->props.m_device_id.get());
+        }
+        self->props.m_device_id = g_strdup(g_value_get_string(value));
+        break;
+    case PROP_DEVICE_COUNT:
+        if (nullptr != self->props.m_device_id.get()) {
+            g_error("device-id and device-count excludes eachother. received device-id=%s, device-count=%d",
+                self->props.m_device_id.get(), g_value_get_uint(value));
+            break;
+        }
+        if (self->is_configured) {
+            g_warning("The network was already configured so changing the device count will not take place!");
+            break;
+        }
+        self->props.m_device_count = static_cast<guint16>(g_value_get_uint(value));
+        break;
+    case PROP_VDEVICE_GROUP_ID:
+        if (self->is_configured) {
+            g_warning("The network was already configured so changing the vdevice group ID will not take place!");
+            break;
+        }
+        if (nullptr != self->props.m_vdevice_group_id.get()) {
+            g_free(self->props.m_vdevice_group_id.get());
+        }
+        self->props.m_vdevice_group_id = g_strdup(g_value_get_string(value));
+        break;
+    case PROP_IS_ACTIVE:
+        (void)gst_hailonet2_toggle_activation(self, self->props.m_is_active.get(), g_value_get_boolean(value));
+        break;
+    case PROP_OUTPUTS_MIN_POOL_SIZE:
+        if (self->is_configured) {
+            g_warning("The network was already configured so changing the outputs minimum pool size will not take place!");
+            break;
+        }
+        self->props.m_outputs_min_pool_size = g_value_get_uint(value);
+        break;
+    case PROP_OUTPUTS_MAX_POOL_SIZE:
+        if (self->is_configured) {
+            g_warning("The network was already configured so changing the outputs maximum pool size will not take place!");
+            break;
+        }
+        self->props.m_outputs_max_pool_size = g_value_get_uint(value);
+        break;
+    case PROP_SCHEDULING_ALGORITHM:
+        if (self->is_configured) {
+            g_warning("The network was already configured so changing the scheduling algorithm will not take place!");
+            break;
+        }
+        if (self->props.m_is_active.was_changed() && (g_value_get_enum(value) != HAILO_SCHEDULING_ALGORITHM_NONE)) {
+            g_error("scheduling-algorithm different than HAILO_SCHEDULING_ALGORITHM_NONE in combination with 'is-active' is not supported.");
+            break;
+        }
+        self->props.m_scheduling_algorithm = static_cast<hailo_scheduling_algorithm_t>(g_value_get_enum(value));
+        break;
+    case PROP_SCHEDULER_TIMEOUT_MS:
+        if (self->is_configured) {
+            g_warning("The network was already configured so changing the scheduling timeout will not take place!");
+            break;
+        }
+        self->props.m_scheduler_timeout_ms = g_value_get_uint(value);
+        break;
+    case PROP_SCHEDULER_THRESHOLD:
+        if (self->is_configured) {
+            g_warning("The network was already configured so changing the scheduling threshold will not take place!");
+            break;
+        }
+        self->props.m_scheduler_threshold = g_value_get_uint(value);
+        break;
+    case PROP_SCHEDULER_PRIORITY:
+        if (self->is_configured) {
+            g_warning("The network was already configured so changing the scheduling priority will not take place!");
+            break;
+        }
+        self->props.m_scheduler_priority = static_cast<guint8>(g_value_get_uint(value));
+        break;
+    case PROP_INPUT_FORMAT_TYPE:
+        if (self->is_configured) {
+            g_warning("The network was already configured so changing the format type will not take place!");
+            break;
+        }
+        self->props.m_input_format_type = static_cast<hailo_format_type_t>(g_value_get_enum(value));
+        break;
+    case PROP_OUTPUT_FORMAT_TYPE:
+        if (self->is_configured) {
+            g_warning("The network was already configured so changing the format type will not take place!");
+            break;
+        }
+        self->props.m_output_format_type = static_cast<hailo_format_type_t>(g_value_get_enum(value));
+        break;
+    case PROP_NMS_SCORE_THRESHOLD:
+        if (self->is_configured) {
+            g_warning("The network was already configured so changing the score threshold will not take place!");
+            break;
+        }
+        self->props.m_nms_score_threshold = static_cast<gfloat>(g_value_get_float(value));
+        break;
+    case PROP_NMS_IOU_THRESHOLD:
+        if (self->is_configured) {
+            g_warning("The network was already configured so changing the IoU threshold will not take place!");
+            break;
+        }
+        self->props.m_nms_iou_threshold = static_cast<gfloat>(g_value_get_float(value));
+        break;
+    case PROP_NMS_MAX_PROPOSALS_PER_CLASS:
+        if (self->is_configured) {
+            g_warning("The network was already configured so changing the max proposals per class will not take place!");
+            break;
+        }
+        self->props.m_nms_max_proposals_per_class = static_cast<guint32>(g_value_get_uint(value));
+        break;
+    case PROP_INPUT_FROM_META:
+        if (self->is_configured) {
+            g_warning("The network was already configured so changing the input method will not take place!");
+            break;
+        }
+        self->props.m_input_from_meta = g_value_get_boolean(value);
+        break;
+    case PROP_MULTI_PROCESS_SERVICE:
+        if (self->is_configured) {
+            g_warning("The network was already configured so changing the multi-process-service property will not take place!");
+            break;
+        }
+        self->props.m_multi_process_service = g_value_get_boolean(value); // TODO: do something with this
+        break;
+    
+    // Deprecated
+    case PROP_VDEVICE_KEY:
+        if (self->is_configured) {
+            g_warning("The network was already configured so changing the vdevice key will not take place!");
+            break;
+        }
+        self->props.m_vdevice_key = static_cast<guint32>(g_value_get_uint(value));
+        break;
+    default:
+        G_OBJECT_WARN_INVALID_PROPERTY_ID(object, property_id, pspec);
+        break;
+    }
+}
+
+static void gst_hailonet2_get_property(GObject *object, guint property_id, GValue *value, GParamSpec *pspec)
+{
+    GstHailoNet2 *self = GST_HAILONET2(object);
+    switch (property_id) {
+    case PROP_HEF_PATH:
+        g_value_set_string(value, self->props.m_hef_path.get());
+        break;
+    case PROP_BATCH_SIZE:
+        g_value_set_uint(value, self->props.m_batch_size.get());
+        break;
+    case PROP_DEVICE_ID:
+        g_value_set_string(value, self->props.m_device_id.get());
+        break;
+    case PROP_DEVICE_COUNT:
+        g_value_set_uint(value, self->props.m_device_count.get());
+        break;
+    case PROP_VDEVICE_GROUP_ID:
+        g_value_set_string(value, self->props.m_vdevice_group_id.get());
+        break;
+    case PROP_IS_ACTIVE:
+        g_value_set_boolean(value, self->props.m_is_active.get());
+        break;
+    case PROP_OUTPUTS_MIN_POOL_SIZE:
+        g_value_set_uint(value, self->props.m_outputs_min_pool_size.get());
+        break;
+    case PROP_OUTPUTS_MAX_POOL_SIZE:
+        g_value_set_uint(value, self->props.m_outputs_max_pool_size.get());
+        break;
+    case PROP_SCHEDULING_ALGORITHM:
+        g_value_set_enum(value, self->props.m_scheduling_algorithm.get());
+        break;
+    case PROP_SCHEDULER_TIMEOUT_MS:
+        g_value_set_uint(value, self->props.m_scheduler_timeout_ms.get());
+        break;
+    case PROP_SCHEDULER_THRESHOLD:
+        g_value_set_uint(value, self->props.m_scheduler_threshold.get());
+        break;
+    case PROP_SCHEDULER_PRIORITY:
+        g_value_set_uint(value, self->props.m_scheduler_priority.get());
+        break;
+    case PROP_INPUT_FORMAT_TYPE:
+        g_value_set_enum(value, self->props.m_input_format_type.get());
+        break;
+    case PROP_OUTPUT_FORMAT_TYPE:
+        g_value_set_enum(value, self->props.m_output_format_type.get());
+        break;
+    case PROP_NMS_SCORE_THRESHOLD:
+        g_value_set_float(value, self->props.m_nms_score_threshold.get());
+        break;
+    case PROP_NMS_IOU_THRESHOLD:
+        g_value_set_float(value, self->props.m_nms_iou_threshold.get());
+        break;
+    case PROP_NMS_MAX_PROPOSALS_PER_CLASS:
+        g_value_set_uint(value, self->props.m_nms_max_proposals_per_class.get());
+        break; 
+    case PROP_INPUT_FROM_META:
+        g_value_set_boolean(value, self->props.m_input_from_meta.get());
+        break;
+    case PROP_MULTI_PROCESS_SERVICE:
+        g_value_set_boolean(value, self->props.m_multi_process_service.get());
+        break;
+    
+    // Deprecated
+    case PROP_VDEVICE_KEY:
+        g_value_set_uint(value, self->props.m_vdevice_key.get());
+        break;
+    default:
+        G_OBJECT_WARN_INVALID_PROPERTY_ID(object, property_id, pspec);
+        break;
+    }
+}
+
+static void gst_hailonet2_class_init(GstHailoNet2Class *klass)
+{
+    GObjectClass *gobject_class = G_OBJECT_CLASS(klass);
+    GstElementClass *element_class = GST_ELEMENT_CLASS(klass);
+
+    gst_element_class_add_pad_template(element_class, gst_static_pad_template_get(&sink_template));
+    gst_element_class_add_pad_template(element_class, gst_static_pad_template_get(&src_template));
+    element_class->change_state = gst_hailonet2_change_state;
+
+    gst_element_class_set_static_metadata(element_class,
+        "hailonet element", "Hailo/Network",
+        "Configure and Activate Hailo Network. "
+            "Supports the \"flush\" signal which blocks until there are no buffers currently processesd in the element. "
+            "When deactivating a hailonet during runtime (via set_property of \"is-active\" to False), make sure that no frames are being pushed into the "
+            "hailonet, since this operation waits until there are no frames coming in.",
+        PLUGIN_AUTHOR);
+
+    gobject_class->set_property = gst_hailonet2_set_property;
+    gobject_class->get_property = gst_hailonet2_get_property;
+    g_object_class_install_property(gobject_class, PROP_HEF_PATH,
+        g_param_spec_string("hef-path", "HEF Path Location", "Location of the HEF file to read", nullptr,
+            (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
+    g_object_class_install_property(gobject_class, PROP_BATCH_SIZE,
+        g_param_spec_uint("batch-size", "Inference Batch", "How many frame to send in one batch",
+            MIN_GSTREAMER_BATCH_SIZE, MAX_GSTREAMER_BATCH_SIZE, HAILO_DEFAULT_BATCH_SIZE,
+            (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
+    g_object_class_install_property(gobject_class, PROP_OUTPUTS_MIN_POOL_SIZE,
+        g_param_spec_uint("outputs-min-pool-size", "Outputs Minimun Pool Size", "The minimum amount of buffers to allocate for each output layer",
+            0, std::numeric_limits<uint32_t>::max(), MIN_OUTPUTS_POOL_SIZE, (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
+    g_object_class_install_property(gobject_class, PROP_OUTPUTS_MAX_POOL_SIZE,
+        g_param_spec_uint("outputs-max-pool-size", "Outputs Maximum Pool Size",
+            "The maximum amount of buffers to allocate for each output layer or 0 for unlimited", 0, std::numeric_limits<uint32_t>::max(),
+            MAX_OUTPUTS_POOL_SIZE, (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
+
+    g_object_class_install_property(gobject_class, PROP_DEVICE_ID,
+        g_param_spec_string("device-id", "Device ID", "Device ID ([<domain>]:<bus>:<device>.<func>, same as in lspci command). Excludes device-count.", NULL,
+            (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
+    g_object_class_install_property(gobject_class, PROP_DEVICE_COUNT,
+        g_param_spec_uint("device-count", "Number of devices to use", "Number of physical devices to use. Excludes device-id.", HAILO_DEFAULT_DEVICE_COUNT,
+            std::numeric_limits<uint16_t>::max(), HAILO_DEFAULT_DEVICE_COUNT, (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
+    g_object_class_install_property(gobject_class, PROP_VDEVICE_GROUP_ID,
+        g_param_spec_string("vdevice-group-id",
+            "VDevice Group ID to share vdevices across hailonets",
+            "Used to share VDevices across different hailonet instances", HAILO_DEFAULT_VDEVICE_GROUP_ID,
+            (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
+
+    // TODO (HRT-12306): Change is-active behavior
+    g_object_class_install_property(gobject_class, PROP_IS_ACTIVE,
+        g_param_spec_boolean("is-active", "Is Network Activated", "Controls whether this element should be active. "
+            "By default, the hailonet element will not be active unless it is the only one. "
+            "Setting this property in combination with 'scheduling-algorithm' different than HAILO_SCHEDULING_ALGORITHM_NONE is not supported.", false,
+        (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
+
+    g_object_class_install_property(gobject_class, PROP_SCHEDULING_ALGORITHM,
+        g_param_spec_enum("scheduling-algorithm", "Scheduling policy for automatic network group switching", "Controls the Model Scheduler algorithm of HailoRT. "
+            "Gets values from the enum GstHailoSchedulingAlgorithms. "
+            "Using Model Scheduler algorithm different than HAILO_SCHEDULING_ALGORITHM_NONE, excludes the property 'is-active'. "
+            "When using the same VDevice across multiple hailonets, all should have the same 'scheduling-algorithm'. ",
+            GST_TYPE_SCHEDULING_ALGORITHM, HAILO_SCHEDULING_ALGORITHM_ROUND_ROBIN,
+        (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
+    g_object_class_install_property(gobject_class, PROP_SCHEDULER_TIMEOUT_MS,
+        g_param_spec_uint("scheduler-timeout-ms", "Timeout for for scheduler in ms", "The maximum time period that may pass before getting run time from the scheduler,"
+            " as long as at least one send request has been sent.",
+            HAILO_DEFAULT_SCHEDULER_TIMEOUT_MS, std::numeric_limits<uint32_t>::max(), HAILO_DEFAULT_SCHEDULER_TIMEOUT_MS, (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
+    g_object_class_install_property(gobject_class, PROP_SCHEDULER_THRESHOLD,
+        g_param_spec_uint("scheduler-threshold", "Frames threshold for scheduler", "The minimum number of send requests required before the hailonet is considered ready to get run time from the scheduler.",
+            HAILO_DEFAULT_SCHEDULER_THRESHOLD, std::numeric_limits<uint32_t>::max(), HAILO_DEFAULT_SCHEDULER_THRESHOLD, (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
+    g_object_class_install_property(gobject_class, PROP_SCHEDULER_PRIORITY,
+        g_param_spec_uint("scheduler-priority", "Priority index for scheduler", "When the scheduler will choose the next hailonet to run, higher priority will be prioritized in the selection. "
+            "Bigger number represent higher priority",
+            HAILO_SCHEDULER_PRIORITY_MIN, HAILO_SCHEDULER_PRIORITY_MAX, HAILO_SCHEDULER_PRIORITY_NORMAL, (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
+
+    g_object_class_install_property(gobject_class, PROP_INPUT_FORMAT_TYPE,
+        g_param_spec_enum("input-format-type", "Input format type", "Input format type(auto, float32, uint16, uint8). Default value is auto."
+            "Gets values from the enum GstHailoFormatType. ",
+            GST_TYPE_HAILO_FORMAT_TYPE, HAILO_FORMAT_TYPE_AUTO,
+        (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
+    g_object_class_install_property(gobject_class, PROP_OUTPUT_FORMAT_TYPE,
+        g_param_spec_enum("output-format-type", "Output format type", "Output format type(auto, float32, uint16, uint8). Default value is auto."
+            "Gets values from the enum GstHailoFormatType. ",
+            GST_TYPE_HAILO_FORMAT_TYPE, HAILO_FORMAT_TYPE_AUTO,
+        (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
+    g_object_class_install_property(gobject_class, PROP_INPUT_FROM_META,
+        g_param_spec_boolean("input-from-meta", "Enable input from meta", "Take network input from metadata instead of video frame.", false,
+            (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
+
+    g_object_class_install_property(gobject_class, PROP_NMS_SCORE_THRESHOLD,
+        g_param_spec_float("nms-score-threshold", "NMS score threshold", "Threshold used for filtering out candidates. Any box with score<TH is suppressed.",
+            0, 1, 0, (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
+    g_object_class_install_property(gobject_class, PROP_NMS_IOU_THRESHOLD,
+        g_param_spec_float("nms-iou-threshold", "NMS IoU threshold", "Intersection over union overlap Threshold, used in the NMS iterative elimination process where potential duplicates of detected items are suppressed.",
+            0, 1, 0, (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
+    g_object_class_install_property(gobject_class, PROP_NMS_MAX_PROPOSALS_PER_CLASS,
+        g_param_spec_uint("nms-max-proposals-per-class", "NMS max proposals per class", "Set a limit for the maximum number of boxes per class.",
+            0, std::numeric_limits<uint32_t>::max(), 0, (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
+
+    g_object_class_install_property(gobject_class, PROP_MULTI_PROCESS_SERVICE,
+        g_param_spec_boolean("multi-process-service", "Should run over HailoRT service", "Controls wether to run HailoRT over its service. "
+            "To use this property, the service should be active and scheduling-algorithm should be set. Defaults to false.",
+            HAILO_DEFAULT_MULTI_PROCESS_SERVICE, (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
+
+    // Deprecated
+    g_object_class_install_property(gobject_class, PROP_VDEVICE_KEY,
+        g_param_spec_uint("vdevice-key",
+            "Deprecated: Indicate whether to re-use or re-create vdevice",
+            "Deprecated: Use vdevice-group-id instead. Relevant only when 'device-count' is passed. If not passed, the created vdevice will be unique to this hailonet." \
+            "if multiple hailonets share 'vdevice-key' and 'device-count', the created vdevice will be shared between those hailonets",
+            MIN_VALID_VDEVICE_KEY, std::numeric_limits<uint32_t>::max(), MIN_VALID_VDEVICE_KEY, (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
+
+    // See information about the "flush" signal in the element description
+    g_signal_new(
+        "flush",
+        GST_TYPE_HAILONET2,
+        G_SIGNAL_ACTION,
+        0, nullptr, nullptr, nullptr, G_TYPE_NONE, 0
+    );
+}
+
+static void gst_hailonet2_push_buffer_to_thread(GstHailoNet2 *self, GstBuffer *buffer)
+{
+    {
+        std::unique_lock<std::mutex> lock(self->thread_queue_mutex);
+        gst_queue_array_push_tail(self->thread_queue, buffer);
+        self->buffers_in_thread_queue++;
+    }
+    self->thread_cv.notify_all();
+}
+
+// TODO (HRT-12490): reduce code duplication with gst_hailonet2_async_infer
+static hailo_status gst_hailonet2_async_infer_multi_input(GstHailoNet2 *self, GstBuffer *buffer, const std::unordered_map<std::string, uint8_t*> &input_buffers)
+{
+    {
+        std::unique_lock<std::mutex> lock(self->input_queue_mutex);
+        for (auto name : self->infer_model->get_input_names())
+        {
+            auto status = self->infer_bindings.input(name)->set_buffer(MemoryView(input_buffers.at(name),
+                self->infer_model->input(name)->get_frame_size()));
+            CHECK_SUCCESS(status); 
+        }
+        
+        gst_queue_array_push_tail(self->input_queue, buffer);
+    }
+
+    struct TensorInfo {
+        GstBuffer *buffer;
+        GstMapInfo buffer_info;
+    };
+    std::unordered_map<std::string, TensorInfo> tensors;
+    for (auto &output : self->infer_model->outputs()) {
+        GstBuffer *output_buffer = nullptr;
+        GstFlowReturn flow_result = gst_buffer_pool_acquire_buffer(self->output_buffer_pools[output.name()], &output_buffer, nullptr);
+        if (GST_FLOW_FLUSHING == flow_result) {
+            return HAILO_STREAM_ABORTED_BY_USER;
+        }
+        CHECK(GST_FLOW_OK == flow_result, HAILO_INTERNAL_FAILURE, "Acquire buffer failed!");
+
+        GstMapInfo buffer_info;
+        gboolean result = gst_buffer_map(output_buffer, &buffer_info, GST_MAP_WRITE);
+        CHECK(result, HAILO_INTERNAL_FAILURE, "Failed mapping buffer!");
+
+        auto status = self->infer_bindings.output(output.name())->set_buffer(MemoryView(buffer_info.data, buffer_info.size));
+        CHECK_SUCCESS(status);
+
+        tensors[output.name()] = {output_buffer, buffer_info};
+    }
+
+    auto status = self->configured_infer_model->wait_for_async_ready(WAIT_FOR_ASYNC_READY_TIMEOUT);
+    CHECK_SUCCESS(status);
+
+    auto job = self->configured_infer_model->run_async(self->infer_bindings, [self, tensors] (const AsyncInferCompletionInfo &/*completion_info*/) {
+        GstBuffer *buffer = nullptr;
+        {
+            std::unique_lock<std::mutex> lock(self->input_queue_mutex);
+            buffer = static_cast<GstBuffer*>(gst_queue_array_pop_head(self->input_queue));
+        }
+
+        for (auto &output : self->infer_model->outputs()) {
+            auto info = tensors.at(output.name());
+            gst_buffer_unmap(info.buffer, &info.buffer_info);
+
+            GstHailoTensorMeta *buffer_meta = GST_TENSOR_META_ADD(info.buffer);
+            buffer_meta->info = self->output_vstream_infos[output.name()];
+
+            (void)gst_buffer_add_parent_buffer_meta(buffer, info.buffer);
+            gst_buffer_unref(info.buffer);
+        }
+
+        {
+            std::unique_lock<std::mutex> lock(self->flush_mutex);
+            self->ongoing_frames--;
+        }
+        self->flush_cv.notify_all();
+
+        {
+            std::unique_lock<std::mutex> lock(self->thread_queue_mutex);
+            gst_queue_array_push_tail(self->thread_queue, buffer);
+            self->buffers_in_thread_queue++;
+        }
+        self->thread_cv.notify_all();
+    });
+    CHECK_EXPECTED_AS_STATUS(job);
+    job->detach();
+
+    return HAILO_SUCCESS;
+}
+
+static hailo_status gst_hailonet2_async_infer(GstHailoNet2 *self, GstBuffer * buffer, hailo_pix_buffer_t pix_buffer)
+{
+    {
+        std::unique_lock<std::mutex> lock(self->input_queue_mutex);
+        auto status = self->infer_bindings.input()->set_pix_buffer(pix_buffer);
+        CHECK_SUCCESS(status);
+        
+        gst_queue_array_push_tail(self->input_queue, buffer);
+    }
+
+    struct TensorInfo {
+        GstBuffer *buffer;
+        GstMapInfo buffer_info;
+    };
+    std::unordered_map<std::string, TensorInfo> tensors;
+    for (auto &output : self->infer_model->outputs()) {
+        GstBuffer *output_buffer = nullptr;
+        GstFlowReturn flow_result = gst_buffer_pool_acquire_buffer(self->output_buffer_pools[output.name()], &output_buffer, nullptr);
+        if (GST_FLOW_FLUSHING == flow_result) {
+            return HAILO_STREAM_ABORTED_BY_USER;
+        }
+        CHECK(GST_FLOW_OK == flow_result, HAILO_INTERNAL_FAILURE, "Acquire buffer failed!");
+
+        GstMapInfo buffer_info;
+        gboolean result = gst_buffer_map(output_buffer, &buffer_info, GST_MAP_WRITE);
+        CHECK(result, HAILO_INTERNAL_FAILURE, "Failed mapping buffer!");
+
+        auto status = self->infer_bindings.output(output.name())->set_buffer(MemoryView(buffer_info.data, buffer_info.size));
+        CHECK_SUCCESS(status);
+
+        tensors[output.name()] = {output_buffer, buffer_info};
+    }
+
+    auto status = self->configured_infer_model->wait_for_async_ready(WAIT_FOR_ASYNC_READY_TIMEOUT);
+    CHECK_SUCCESS(status);
+
+    auto job = self->configured_infer_model->run_async(self->infer_bindings, [self, tensors] (const AsyncInferCompletionInfo &/*completion_info*/) {
+        GstBuffer *buffer = nullptr;
+        {
+            std::unique_lock<std::mutex> lock(self->input_queue_mutex);
+            buffer = static_cast<GstBuffer*>(gst_queue_array_pop_head(self->input_queue));
+        }
+
+        for (auto &output : self->infer_model->outputs()) {
+            auto info = tensors.at(output.name());
+            gst_buffer_unmap(info.buffer, &info.buffer_info);
+
+            GstHailoTensorMeta *buffer_meta = GST_TENSOR_META_ADD(info.buffer);
+            buffer_meta->info = self->output_vstream_infos[output.name()];
+
+            (void)gst_buffer_add_parent_buffer_meta(buffer, info.buffer);
+            gst_buffer_unref(info.buffer);
+        }
+
+        {
+            std::unique_lock<std::mutex> lock(self->flush_mutex);
+            self->ongoing_frames--;
+        }
+        self->flush_cv.notify_all();
+
+        gst_hailonet2_push_buffer_to_thread(self, buffer);
+    });
+    CHECK_EXPECTED_AS_STATUS(job);
+    job->detach();
+
+    return HAILO_SUCCESS;
+}
+
+static Expected<hailo_pix_buffer_t> gst_hailonet2_construct_pix_buffer(GstHailoNet2 *self, GstBuffer *buffer)
+{
+    GstVideoFrame frame;
+    auto result = gst_video_frame_map(&frame, &self->input_frame_info, buffer,
+        static_cast<GstMapFlags>(GST_MAP_READ | GST_VIDEO_FRAME_MAP_FLAG_NO_REF));
+    CHECK_AS_EXPECTED(result,HAILO_INTERNAL_FAILURE, "gst_video_frame_map failed!");
+
+    hailo_pix_buffer_t pix_buffer = {};
+    pix_buffer.index = 0;
+    pix_buffer.number_of_planes = GST_VIDEO_INFO_N_PLANES(&frame.info);
+
+    for (uint32_t plane_index = 0; plane_index < pix_buffer.number_of_planes; plane_index++) {
+        pix_buffer.planes[plane_index].bytes_used = GST_VIDEO_INFO_PLANE_STRIDE(&frame.info, plane_index) * GST_VIDEO_INFO_COMP_HEIGHT(&frame.info, plane_index);
+        pix_buffer.planes[plane_index].plane_size = GST_VIDEO_INFO_PLANE_STRIDE(&frame.info, plane_index) * GST_VIDEO_INFO_COMP_HEIGHT(&frame.info, plane_index);
+        pix_buffer.planes[plane_index].user_ptr = GST_VIDEO_FRAME_PLANE_DATA(&frame, plane_index);
+    }
+
+    gst_video_frame_unmap(&frame);
+    return pix_buffer;
+}
+
+static Expected<std::unordered_map<std::string, uint8_t*>> gst_hailonet2_read_input_buffers_from_meta(GstHailoNet2 *self, GstBuffer *buffer)
+{
+    std::unordered_map<std::string, uint8_t*> input_buffer_metas;
+    gpointer state = NULL;
+    GstMeta *meta;
+
+    while ((meta = gst_buffer_iterate_meta_filtered(buffer, &state, GST_PARENT_BUFFER_META_API_TYPE))) {
+        GstParentBufferMeta *parent_buffer_meta = reinterpret_cast<GstParentBufferMeta*>(meta);
+        GstMapInfo info;
+        gboolean map_succeeded = gst_buffer_map(parent_buffer_meta->buffer, &info, GST_MAP_READ);
+        if (!map_succeeded) {
+            // Failed to map, this buffer might not have a GstHailoTensorMeta, continue
+            continue;
+        }
+        GstHailoTensorMeta *tensor_meta = GST_TENSOR_META_GET(parent_buffer_meta->buffer);
+        if (!tensor_meta) {
+            // Not a tensor meta (this buffer is not a tensor), unmap and continue
+            gst_buffer_unmap(parent_buffer_meta->buffer, &info);
+            continue;
+        }
+        const hailo_vstream_info_t vstream_info = tensor_meta->info;
+        input_buffer_metas[vstream_info.name] = static_cast<uint8_t*>(info.data);
+        gst_buffer_unmap(parent_buffer_meta->buffer, &info);
+    }
+    CHECK_AS_EXPECTED(!input_buffer_metas.empty(),HAILO_INTERNAL_FAILURE, "No GstHailoTensorMeta was found in buffer!");
+
+    for (auto &input : self->infer_model->inputs()) {
+        CHECK_AS_EXPECTED(input_buffer_metas.find(input.name()) != input_buffer_metas.end(),
+            HAILO_INTERNAL_FAILURE, "No GstHailoTensorMeta was found in buffer for output: %s", input.name().c_str());
+    }
+
+    return input_buffer_metas;
+}
+
+static GstFlowReturn gst_hailonet2_chain(GstPad * /*pad*/, GstObject * parent, GstBuffer * buffer)
+{
+    GstHailoNet2 *self = GST_HAILONET2(parent);
+    std::unique_lock<std::mutex> lock(self->infer_mutex);
+
+    if (!self->props.m_is_active.get() || (nullptr == self->configured_infer_model)) {
+        gst_hailonet2_push_buffer_to_thread(self, buffer);
+        return GST_FLOW_OK;
+    }
+
+    {
+        std::unique_lock<std::mutex> lock(self->flush_mutex);
+        self->ongoing_frames++;
+    }
+
+    if (self->props.m_input_from_meta.get()) {
+        auto input_buffer_metas = gst_hailonet2_read_input_buffers_from_meta(self, buffer);
+        if (!input_buffer_metas) {
+            return GST_FLOW_ERROR;
+        }
+        auto status = gst_hailonet2_async_infer_multi_input(self, buffer, input_buffer_metas.value());
+        if ((HAILO_SUCCESS != status) && (HAILO_STREAM_ABORTED_BY_USER != status)) {
+            return GST_FLOW_ERROR;
+        }
+    } else {
+        auto pix_buffer = gst_hailonet2_construct_pix_buffer(self, buffer);
+        if (!pix_buffer) {
+            return GST_FLOW_ERROR;
+        }
+        auto status = gst_hailonet2_async_infer(self, buffer, pix_buffer.value());
+        if ((HAILO_SUCCESS != status) && (HAILO_STREAM_ABORTED_BY_USER != status)) {
+            return GST_FLOW_ERROR;
+        }
+    }
+
+    return GST_FLOW_OK;
+}
+
+static hailo_status gst_hailonet2_init_infer_model(GstHailoNet2 * self)
+{
+    auto vdevice_params = HailoRTDefaults::get_vdevice_params();
+
+    hailo_device_id_t device_id = {0};
+    if (self->props.m_device_id.was_changed()) {
+        auto expected_device_id = HailoRTCommon::to_device_id(self->props.m_device_id.get());
+        CHECK_EXPECTED_AS_STATUS(expected_device_id);
+        device_id = std::move(expected_device_id.release());
+
+        vdevice_params.device_ids = &device_id;
+    }
+    if (self->props.m_device_count.was_changed()) {
+        vdevice_params.device_count = self->props.m_device_count.get();
+    }
+    if (self->props.m_vdevice_group_id.was_changed()) {
+        vdevice_params.group_id = self->props.m_vdevice_group_id.get();
+    } else if (self->props.m_vdevice_key.was_changed()) {
+        auto key_str = std::to_string(self->props.m_vdevice_key.get());
+        vdevice_params.group_id = key_str.c_str();
+    }
+    if (self->props.m_scheduling_algorithm.was_changed()) {
+        vdevice_params.scheduling_algorithm = self->props.m_scheduling_algorithm.get();
+    }
+
+    auto vdevice = VDevice::create(vdevice_params);
+    CHECK_EXPECTED_AS_STATUS(vdevice);
+    self->vdevice = std::move(vdevice.release());
+
+    auto infer_model = self->vdevice->create_infer_model(self->props.m_hef_path.get());
+    CHECK_EXPECTED_AS_STATUS(infer_model);
+    self->infer_model = infer_model.release();
+
+    return HAILO_SUCCESS;
+}
+
+static const gchar *gst_hailonet2_get_format_string(const InferModel::InferStream &input)
+{
+    switch (input.format().order) {
+    case HAILO_FORMAT_ORDER_RGB4:
+    case HAILO_FORMAT_ORDER_NHWC:
+        if (input.shape().features == RGBA_FEATURES_SIZE) {
+            return "RGBA";
+        }
+        if (input.shape().features == GRAY8_FEATURES_SIZE) {
+            return "GRAY8";
+        }
+        /* Fallthrough */
+    case HAILO_FORMAT_ORDER_NHCW:
+    case HAILO_FORMAT_ORDER_FCR:
+    case HAILO_FORMAT_ORDER_F8CR:
+        if (input.shape().features == GRAY8_FEATURES_SIZE) {
+            return "GRAY8";
+        }
+        CHECK(RGB_FEATURES_SIZE == input.shape().features, nullptr,
+            "Features of input %s is not %d for RGB format! (features=%d)", input.name().c_str(), RGB_FEATURES_SIZE,
+            input.shape().features);
+        return "RGB";
+    case HAILO_FORMAT_ORDER_YUY2:
+        CHECK(YUY2_FEATURES_SIZE == input.shape().features, nullptr,
+            "Features of input %s is not %d for YUY2 format! (features=%d)", input.name().c_str(), YUY2_FEATURES_SIZE,
+            input.shape().features);
+        return "YUY2";
+    case HAILO_FORMAT_ORDER_NV12:
+        CHECK(NV12_FEATURES_SIZE == input.shape().features, nullptr,
+            "Features of input %s is not %d for NV12 format! (features=%d)", input.name().c_str(), NV12_FEATURES_SIZE,
+            input.shape().features);
+        return "NV12";
+    case HAILO_FORMAT_ORDER_NV21:
+        CHECK(NV21_FEATURES_SIZE == input.shape().features, nullptr,
+            "Features of input %s is not %d for NV21 format! (features=%d)", input.name().c_str(), NV21_FEATURES_SIZE,
+            input.shape().features);
+        return "NV21";
+    case HAILO_FORMAT_ORDER_I420:
+        CHECK(I420_FEATURES_SIZE == input.shape().features, nullptr,
+            "Features of input %s is not %d for I420 format! (features=%d)", input.name().c_str(), I420_FEATURES_SIZE,
+            input.shape().features);
+        return "I420";
+    default:
+        ERROR("Input %s has an unsupported format order! order = %d\n", input.name().c_str(), input.format().order);
+        return nullptr;
+    }
+}
+
+static uint32_t get_height_by_order(uint32_t original_height, hailo_format_order_t order)
+{
+    switch (order) {
+    case HAILO_FORMAT_ORDER_NV12:
+    case HAILO_FORMAT_ORDER_NV21:
+        return original_height * 2;
+    default:
+        break;
+    }
+    return original_height;
+}
+
+static GstCaps *gst_hailonet2_get_caps(GstHailoNet2 *self)
+{
+    if (nullptr == self->vdevice) {
+        auto status = gst_hailonet2_init_infer_model(self);
+        if (HAILO_SUCCESS != status) {
+            return nullptr;
+        }
+    }
+
+    // TODO (HRT-12491): check caps based on incoming metadata
+    if (self->props.m_input_from_meta.get()) {
+        GstCaps *new_caps = gst_caps_new_any();
+        self->input_caps = new_caps;
+        return gst_caps_copy(new_caps);
+    }
+
+    auto input = self->infer_model->input();
+    if (!input) {
+        ERROR("Getting input has failed\n");
+        return nullptr;
+    }
+
+    const gchar *format = gst_hailonet2_get_format_string(input.value());
+    if (nullptr == format) {
+        return nullptr;
+    }
+
+    GstCaps *new_caps = gst_caps_new_simple("video/x-raw",
+        "format", G_TYPE_STRING, format,
+        "width", G_TYPE_INT, input->shape().width,
+        "height", G_TYPE_INT, get_height_by_order(input->shape().height, input->format().order),
+        nullptr);
+
+    if (!gst_video_info_from_caps(&self->input_frame_info, new_caps)) {
+        ERROR("gst_video_info_from_caps failed\n");
+        return nullptr;
+    }
+
+    self->input_caps = new_caps;
+    return gst_caps_copy(new_caps);
+}
+
+static gboolean gst_hailonet2_handle_sink_query(GstPad * pad, GstObject * parent, GstQuery * query)
+{
+    GstHailoNet2 *self = GST_HAILONET2(parent);
+    switch (GST_QUERY_TYPE (query)) {
+    case GST_QUERY_CAPS:
+    {
+        GstCaps *caps = gst_hailonet2_get_caps(self);
+        gst_query_set_caps_result(query, caps);
+        gst_caps_unref(caps);
+        return TRUE;
+    }
+    case GST_QUERY_ALLOCATION:
+    {
+        // We implement this to make sure buffers are contiguous in memory
+        gst_query_add_allocation_meta(query, GST_VIDEO_META_API_TYPE, NULL);
+        return gst_pad_query_default(pad, parent, query);
+    }
+    default:
+        return gst_pad_query_default(pad, parent, query);
+    }
+}
+
+static gboolean gst_hailonet2_handle_caps_event(GstHailoNet2 *self, GstCaps */*caps*/)
+{
+    if (nullptr == self->input_caps) {
+        return FALSE;
+    }
+
+    GstCaps *caps_result = gst_pad_peer_query_caps(self->srcpad, self->input_caps);
+    if (gst_caps_is_empty(caps_result)) {
+        return FALSE;
+    }
+
+    if (gst_caps_is_any(caps_result)) {
+        gst_caps_unref(caps_result);
+        return TRUE;
+    }
+
+    GstCaps *outcaps = gst_caps_fixate(caps_result);
+    gboolean res = gst_pad_set_caps(self->srcpad, outcaps);
+    gst_caps_unref(outcaps);
+    return res;
+}
+
+static gboolean gst_hailonet2_sink_event(GstPad *pad, GstObject *parent, GstEvent *event)
+{
+    GstHailoNet2 *self = GST_HAILONET2(parent);
+    switch (GST_EVENT_TYPE(event)) {
+    case GST_EVENT_CAPS:
+    {
+        GstCaps *caps;
+        gst_event_parse_caps(event, &caps);
+        auto result = gst_hailonet2_handle_caps_event(self, caps);
+        gst_event_unref(event);
+        return result;
+    }
+    case GST_EVENT_EOS:
+        self->has_got_eos = true;
+        return gst_pad_push_event(self->srcpad, event);
+    default:
+        return gst_pad_event_default(pad, parent, event);
+    }
+}
+
+static GstPadProbeReturn gst_hailonet2_sink_probe(GstPad */*pad*/, GstPadProbeInfo */*info*/, gpointer user_data)
+{
+    GstHailoNet2 *self = static_cast<GstHailoNet2*>(user_data);
+    auto status = gst_hailonet2_configure(self);
+    if (HAILO_SUCCESS != status) {
+        return GST_PAD_PROBE_DROP;
+    }
+
+    status = gst_hailonet2_allocate_infer_resources(self);
+    if (HAILO_SUCCESS != status) {
+        return GST_PAD_PROBE_DROP;
+    }
+
+    if (HAILO_SCHEDULING_ALGORITHM_NONE != self->props.m_scheduling_algorithm.get()) {
+        self->props.m_is_active = true;
+        return GST_PAD_PROBE_REMOVE;
+    }
+
+    if ((1 == hailonet_count) && (!self->props.m_is_active.was_changed())) {
+        self->props.m_is_active = true;
+    }
+
+    if (self->props.m_is_active.get()) {
+        status = self->configured_infer_model->activate();
+        if (HAILO_SUCCESS != status) {
+            return GST_PAD_PROBE_DROP;
+        }
+    }
+
+    self->has_called_activate = true;
+    return GST_PAD_PROBE_REMOVE;
+}
+
+static void gst_hailonet2_flush_callback(GstHailoNet2 *self, gpointer /*data*/)
+{
+    std::unique_lock<std::mutex> lock(self->flush_mutex);
+    self->flush_cv.wait(lock, [self] () {
+        return 0 == self->ongoing_frames;
+    });
+}
+
+static void gst_hailonet2_init(GstHailoNet2 *self)
+{
+    if (!do_versions_match(GST_ELEMENT(self))) {
+        return;
+    }
+
+    self->sinkpad = gst_pad_new_from_static_template(&sink_template, "sink");
+    gst_pad_set_chain_function(self->sinkpad, gst_hailonet2_chain);
+    gst_pad_set_query_function(self->sinkpad, gst_hailonet2_handle_sink_query);
+    gst_pad_set_event_function(self->sinkpad, GST_DEBUG_FUNCPTR(gst_hailonet2_sink_event));
+    gst_element_add_pad(GST_ELEMENT (self), self->sinkpad);
+    gst_pad_add_probe(self->sinkpad, GST_PAD_PROBE_TYPE_BUFFER, static_cast<GstPadProbeCallback>(gst_hailonet2_sink_probe), self, nullptr);
+
+    self->srcpad = gst_pad_new_from_static_template(&src_template, "src");
+    gst_element_add_pad(GST_ELEMENT (self), self->srcpad);
+
+    self->input_caps = nullptr;
+    self->input_queue = nullptr;
+    self->thread_queue = nullptr;
+    self->is_thread_running = false;
+    self->has_got_eos = false;
+    self->buffers_in_thread_queue = 0;
+    self->props = HailoNet2Properties();
+    self->vdevice = nullptr;
+    self->is_configured = false;
+    self->has_called_activate = false;
+    self->ongoing_frames = 0;
+
+    gchar *parent_name = gst_object_get_name(GST_OBJECT(self));
+    gchar *name = g_strconcat(parent_name, ":hailo_allocator", NULL);
+    g_free(parent_name);
+
+    self->allocator = GST_HAILO_ALLOCATOR(g_object_new(GST_TYPE_HAILO_ALLOCATOR, "name", name, NULL));
+    gst_object_ref_sink(self->allocator);
+    g_free(name);
+
+    g_signal_connect(self, "flush", G_CALLBACK(gst_hailonet2_flush_callback), nullptr);
+
+    hailonet_count++;
+}
diff --git a/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailonet2.hpp b/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailonet2.hpp
new file mode 100644
index 0000000..6c0df95
--- /dev/null
+++ b/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailonet2.hpp
@@ -0,0 +1,170 @@
+/*
+ * Copyright (c) 2021-2023 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the LGPL 2.1 license (https://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ */
+#ifndef _GST_HAILONET2_HPP_
+#define _GST_HAILONET2_HPP_
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wconversion"
+#include <gst/gst.h>
+#pragma GCC diagnostic pop
+
+#include <gst/base/gstqueuearray.h>
+#include <gst/video/gstvideofilter.h>
+
+#include "hailo/infer_model.hpp"
+#include "common.hpp"
+
+#include <queue>
+#include <condition_variable>
+#include <mutex>
+#include <thread>
+
+using namespace hailort;
+
+G_BEGIN_DECLS
+
+#define GST_TYPE_HAILO_ALLOCATOR (gst_hailo_allocator_get_type())
+#define GST_HAILO_ALLOCATOR(obj) (G_TYPE_CHECK_INSTANCE_CAST ((obj), GST_TYPE_HAILO_ALLOCATOR, GstHailoAllocator))
+#define GST_HAILO_ALLOCATOR_CLASS(klass) (G_TYPE_CHECK_CLASS_CAST ((klass), GST_TYPE_HAILO_ALLOCATOR, GstHailoAllocatorClass))
+#define GST_IS_HAILO_ALLOCATOR(obj) (G_TYPE_CHECK_INSTANCE_TYPE ((obj), GST_TYPE_HAILO_ALLOCATOR))
+#define GST_IS_HAILO_ALLOCATOR_CLASS(klass) (G_TYPE_CHECK_CLASS_TYPE ((klass), GST_TYPE_HAILO_ALLOCATOR))
+
+#define MIN_OUTPUTS_POOL_SIZE (MAX_GSTREAMER_BATCH_SIZE)
+#define MAX_OUTPUTS_POOL_SIZE (MAX_GSTREAMER_BATCH_SIZE * 4)
+
+struct GstHailoAllocator
+{
+    GstAllocator parent;
+    std::unordered_map<GstMemory*, Buffer> buffers;
+};
+
+struct GstHailoAllocatorClass
+{
+    GstAllocatorClass parent;
+};
+
+GType gst_hailo_allocator_get_type(void);
+
+struct HailoNet2Properties final
+{
+public:
+    HailoNet2Properties() : m_hef_path(nullptr), m_batch_size(HAILO_DEFAULT_BATCH_SIZE),
+        m_device_id(nullptr), m_device_count(0), m_vdevice_group_id(nullptr), m_is_active(false),
+        m_outputs_min_pool_size(MIN_OUTPUTS_POOL_SIZE), m_outputs_max_pool_size(MAX_OUTPUTS_POOL_SIZE),
+        m_scheduling_algorithm(HAILO_SCHEDULING_ALGORITHM_ROUND_ROBIN), m_scheduler_timeout_ms(HAILO_DEFAULT_SCHEDULER_TIMEOUT_MS),
+        m_scheduler_threshold(HAILO_DEFAULT_SCHEDULER_THRESHOLD), m_scheduler_priority(HAILO_SCHEDULER_PRIORITY_NORMAL),
+        m_input_format_type(HAILO_FORMAT_TYPE_AUTO), m_output_format_type(HAILO_FORMAT_TYPE_AUTO),
+        m_nms_score_threshold(0), m_nms_iou_threshold(0), m_nms_max_proposals_per_class(0), m_input_from_meta(false),
+        m_multi_process_service(HAILO_DEFAULT_MULTI_PROCESS_SERVICE),
+        m_vdevice_key(DEFAULT_VDEVICE_KEY)
+    {}
+
+    void free_strings()
+    {
+      if (m_hef_path.was_changed()) {
+        g_free(m_hef_path.get());
+      }
+      if (m_device_id.was_changed()) {
+        g_free(m_device_id.get());
+      }
+      if (m_vdevice_group_id.was_changed()) {
+        g_free(m_vdevice_group_id.get());
+      }
+    }
+
+    HailoElemProperty<gchar*> m_hef_path;
+    HailoElemProperty<guint16> m_batch_size;
+    HailoElemProperty<gchar*> m_device_id;
+    HailoElemProperty<guint16> m_device_count;
+    HailoElemProperty<gchar*> m_vdevice_group_id;
+    HailoElemProperty<gboolean> m_is_active;
+    HailoElemProperty<guint> m_outputs_min_pool_size;
+    HailoElemProperty<guint> m_outputs_max_pool_size;
+    HailoElemProperty<hailo_scheduling_algorithm_t> m_scheduling_algorithm;
+    HailoElemProperty<guint32> m_scheduler_timeout_ms;
+    HailoElemProperty<guint32> m_scheduler_threshold;
+    HailoElemProperty<guint8> m_scheduler_priority;
+    HailoElemProperty<hailo_format_type_t> m_input_format_type;
+    HailoElemProperty<hailo_format_type_t> m_output_format_type;
+    HailoElemProperty<gfloat> m_nms_score_threshold;
+    HailoElemProperty<gfloat> m_nms_iou_threshold;
+    HailoElemProperty<guint32> m_nms_max_proposals_per_class;
+    HailoElemProperty<gboolean> m_input_from_meta;
+    HailoElemProperty<gboolean> m_multi_process_service;
+
+    // Deprecated
+    HailoElemProperty<guint32> m_vdevice_key;
+};
+
+typedef struct _GstHailoNet2 {
+  GstElement element;
+  GstPad *sinkpad;
+  GstPad *srcpad;
+  GstQueueArray *input_queue;
+  GstQueueArray *thread_queue;
+  std::atomic_uint32_t buffers_in_thread_queue;
+  std::thread thread;
+  HailoNet2Properties props;
+  GstCaps *input_caps;
+  std::atomic_bool is_thread_running;
+  std::atomic_bool has_got_eos;
+
+  std::unique_ptr<VDevice> vdevice;
+  std::shared_ptr<InferModel> infer_model;
+  std::shared_ptr<ConfiguredInferModel> configured_infer_model;
+  ConfiguredInferModel::Bindings infer_bindings;
+  bool is_configured;
+  std::mutex infer_mutex;
+
+  bool has_called_activate;
+  std::atomic_uint32_t ongoing_frames;
+  std::condition_variable flush_cv;
+  std::mutex flush_mutex;
+
+  GstVideoInfo input_frame_info;
+
+  GstHailoAllocator *allocator;
+  std::unordered_map<std::string, GstBufferPool*> output_buffer_pools;
+  std::unordered_map<std::string, hailo_vstream_info_t> output_vstream_infos;
+
+  std::mutex input_queue_mutex;
+  std::mutex thread_queue_mutex;
+  std::condition_variable thread_cv;
+} GstHailoNet2;
+
+typedef struct _GstHailoNet2Class {
+  GstElementClass parent_class;
+} GstHailoNet2Class;
+
+#define GST_TYPE_HAILONET2 (gst_hailonet2_get_type())
+#define GST_HAILONET2(obj) \
+  (G_TYPE_CHECK_INSTANCE_CAST((obj),GST_TYPE_HAILONET2,GstHailoNet2))
+#define GST_HAILONET2_CLASS(klass) \
+  (G_TYPE_CHECK_CLASS_CAST((klass),GST_TYPE_HAILONET2,GstHailoNet2Class))
+#define GST_IS_HAILONET2(obj) \
+  (G_TYPE_CHECK_INSTANCE_TYPE((obj),GST_TYPE_HAILONET2))
+#define GST_IS_HAILONET2_CLASS(klass) \
+  (G_TYPE_CHECK_CLASS_TYPE((klass),GST_TYPE_HAILONET2))
+
+GType gst_hailonet2_get_type (void);
+
+G_END_DECLS
+
+#endif /* _GST_HAILONET2_HPP_ */
\ No newline at end of file
diff --git a/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailoplugin.cpp b/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailoplugin.cpp
index dea6cdc..4ae413e 100644
--- a/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailoplugin.cpp
+++ b/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailoplugin.cpp
@@ -20,6 +20,7 @@
 #include "gsthailonet.hpp"
 #include "gsthailosend.hpp"
 #include "gsthailorecv.hpp"
+#include "gsthailonet2.hpp"
 #include "gsthailodevicestats.hpp"
 #include "metadata/tensor_meta.hpp"
 
@@ -31,7 +32,8 @@ static gboolean plugin_init(GstPlugin *plugin)
     return gst_element_register(plugin, "hailonet", GST_RANK_PRIMARY, GST_TYPE_HAILONET) &&
         gst_element_register(plugin, "hailodevicestats", GST_RANK_PRIMARY, GST_TYPE_HAILODEVICESTATS) &&
         gst_element_register(nullptr, "hailosend", GST_RANK_PRIMARY, GST_TYPE_HAILOSEND) &&
-        gst_element_register(nullptr, "hailorecv", GST_RANK_PRIMARY, GST_TYPE_HAILORECV);
+        gst_element_register(nullptr, "hailorecv", GST_RANK_PRIMARY, GST_TYPE_HAILORECV) &&
+        gst_element_register(plugin, "hailonet2", GST_RANK_PRIMARY, GST_TYPE_HAILONET2);
 }
 
 GST_PLUGIN_DEFINE(GST_VERSION_MAJOR, GST_VERSION_MINOR, hailo, "hailo gstreamer plugin", plugin_init, VERSION,
diff --git a/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailosend.cpp b/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailosend.cpp
index 25ca742..1c4f536 100644
--- a/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailosend.cpp
+++ b/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailosend.cpp
@@ -28,13 +28,6 @@
 
 GST_DEBUG_CATEGORY_STATIC(gst_hailosend_debug_category);
 #define GST_CAT_DEFAULT gst_hailosend_debug_category
-#define RGB_FEATURES_SIZE (3)
-#define RGBA_FEATURES_SIZE (4)
-#define GRAY8_FEATURES_SIZE (1)
-#define YUY2_FEATURES_SIZE (2)
-#define NV12_FEATURES_SIZE (3)
-#define NV21_FEATURES_SIZE (3)
-#define I420_FEATURES_SIZE (3)
 
 static void gst_hailosend_set_property(GObject *object, guint property_id, const GValue *value, GParamSpec *pspec);
 static void gst_hailosend_get_property(GObject *object, guint property_id, GValue *value, GParamSpec *pspec);
diff --git a/hailort/libhailort/bindings/gstreamer/gst-hailo/metadata/tensor_meta.hpp b/hailort/libhailort/bindings/gstreamer/gst-hailo/metadata/tensor_meta.hpp
index 82bb7d5..f60f7fa 100644
--- a/hailort/libhailort/bindings/gstreamer/gst-hailo/metadata/tensor_meta.hpp
+++ b/hailort/libhailort/bindings/gstreamer/gst-hailo/metadata/tensor_meta.hpp
@@ -53,7 +53,7 @@ inline const void *get_tensor_data(GstStructure *s) {
  * @brief This struct represents raw tensor metadata and contains instance of parent GstMeta and fields describing
  * inference result tensor. This metadata instances is attached to buffer by gvainference elements
  */
-struct GstHailoTensorMeta {
+struct HAILORTAPI GstHailoTensorMeta {
     GstMeta meta;              /**< parent meta object */
     hailo_vstream_info_t info; /**< struct that holds vstream info, e.g. shape, quant_info, layer_name etc... */
 };
@@ -62,14 +62,14 @@ struct GstHailoTensorMeta {
  * @brief This function registers, if needed, and returns GstMetaInfo for _GstHailoTensorMeta
  * @return GstMetaInfo* for registered type
  */
-const GstMetaInfo *gst_tensor_meta_get_info(void);
+HAILORTAPI const GstMetaInfo *gst_tensor_meta_get_info(void);
 
 /**
  * @brief This function registers, if needed, and returns a GType for api "GstHailoTensorMetaAPI" and associate it with
  * TENSOR_META_TAG tag
  * @return GType type
  */
-GType gst_tensor_meta_api_get_type(void);
+HAILORTAPI GType gst_tensor_meta_api_get_type(void);
 #define GST_TENSOR_META_API_TYPE (gst_tensor_meta_api_get_type())
 
 /**
diff --git a/hailort/libhailort/bindings/gstreamer/gst-hailo/network_group_handle.cpp b/hailort/libhailort/bindings/gstreamer/gst-hailo/network_group_handle.cpp
index 709c7d3..e2b0d08 100644
--- a/hailort/libhailort/bindings/gstreamer/gst-hailo/network_group_handle.cpp
+++ b/hailort/libhailort/bindings/gstreamer/gst-hailo/network_group_handle.cpp
@@ -185,8 +185,8 @@ hailo_status NetworkGroupHandle::set_scheduler_priority(const char *network_name
 }
 
 Expected<std::pair<std::vector<InputVStream>, std::vector<OutputVStream>>> NetworkGroupHandle::create_vstreams(const char *network_name,
-    hailo_scheduling_algorithm_t scheduling_algorithm, const std::vector<hailo_format_with_name_t> &output_formats, bool input_quantized, 
-    bool output_quantized, hailo_format_type_t input_format_type, hailo_format_type_t output_format_type)
+    hailo_scheduling_algorithm_t scheduling_algorithm, const std::vector<hailo_format_with_name_t> &output_formats,
+    hailo_format_type_t input_format_type, hailo_format_type_t output_format_type)
 {
     GST_CHECK(nullptr != network_name, make_unexpected(HAILO_INVALID_ARGUMENT), m_element, RESOURCE, "Got nullptr in network name!");
 
@@ -200,7 +200,7 @@ Expected<std::pair<std::vector<InputVStream>, std::vector<OutputVStream>>> Netwo
     auto expected_input_vstream_infos = hef()->get_input_vstream_infos(network_name);
     GST_CHECK_EXPECTED(expected_input_vstream_infos, m_element, RESOURCE, "Failed getting input vstream infos, status = %d",
         expected_input_vstream_infos.status());
-    auto expected_input_params_map = m_cng->make_input_vstream_params(input_quantized, input_format_type, HAILO_DEFAULT_VSTREAM_TIMEOUT_MS,
+    auto expected_input_params_map = m_cng->make_input_vstream_params({}, input_format_type, HAILO_DEFAULT_VSTREAM_TIMEOUT_MS,
         HAILO_DEFAULT_VSTREAM_QUEUE_SIZE, m_network_name);
     GST_CHECK_EXPECTED(expected_input_params_map, m_element, RESOURCE, "Failed making input vstream params, status = %d",
         expected_input_params_map.status());
@@ -223,7 +223,7 @@ Expected<std::pair<std::vector<InputVStream>, std::vector<OutputVStream>>> Netwo
     GST_CHECK(1 == input_vstreams->size(), make_unexpected(HAILO_INVALID_OPERATION), m_element, RESOURCE,
         "hailosend element supports only HEFs with one input for now!");
 
-    auto output_params_map = m_cng->make_output_vstream_params(output_quantized, output_format_type, HAILO_DEFAULT_VSTREAM_TIMEOUT_MS,
+    auto output_params_map = m_cng->make_output_vstream_params({}, output_format_type, HAILO_DEFAULT_VSTREAM_TIMEOUT_MS,
         HAILO_DEFAULT_VSTREAM_QUEUE_SIZE, m_network_name);
     GST_CHECK_EXPECTED(output_params_map, m_element, RESOURCE, "Failed making output vstream params, status = %d",
         output_params_map.status());
diff --git a/hailort/libhailort/bindings/gstreamer/gst-hailo/network_group_handle.hpp b/hailort/libhailort/bindings/gstreamer/gst-hailo/network_group_handle.hpp
index 69381dd..c989714 100644
--- a/hailort/libhailort/bindings/gstreamer/gst-hailo/network_group_handle.hpp
+++ b/hailort/libhailort/bindings/gstreamer/gst-hailo/network_group_handle.hpp
@@ -83,8 +83,8 @@ public:
         bool multi_process_service, const char *hef_path);
     hailo_status configure_network_group(const char *net_group_name, hailo_scheduling_algorithm_t scheduling_algorithm, uint16_t batch_size);
     Expected<std::pair<std::vector<InputVStream>, std::vector<OutputVStream>>> create_vstreams(const char *network_name,
-        hailo_scheduling_algorithm_t scheduling_algorithm, const std::vector<hailo_format_with_name_t> &output_formats, bool input_quantized, 
-        bool output_quantized, hailo_format_type_t input_format_type, hailo_format_type_t output_format_type);
+        hailo_scheduling_algorithm_t scheduling_algorithm, const std::vector<hailo_format_with_name_t> &output_formats,
+        hailo_format_type_t input_format_type, hailo_format_type_t output_format_type);
     hailo_status activate_network_group();
     Expected<bool> remove_network_group();
 
diff --git a/hailort/libhailort/bindings/python/platform/hailo_platform/pyhailort/pyhailort.py b/hailort/libhailort/bindings/python/platform/hailo_platform/pyhailort/pyhailort.py
index 5ef89c8..bbd7682 100644
--- a/hailort/libhailort/bindings/python/platform/hailo_platform/pyhailort/pyhailort.py
+++ b/hailort/libhailort/bindings/python/platform/hailo_platform/pyhailort/pyhailort.py
@@ -889,6 +889,12 @@ class InferVStreams(object):
                         self._net_group_name)
                     output_tensor_info = output_buffers_info[output_name].output_tensor_info
                     shape, dtype = output_tensor_info
+                    if (output_buffers_info[output_name].output_order == FormatOrder.HAILO_NMS_WITH_BYTE_MASK):
+                        # Note: In python bindings the output data gets converted to py::array with dtype=dtype.
+                        #   In `HAILO_NMS_WITH_BYTE_MASK` we would like to get the data as uint8 and convert it by it's format.
+                        #   Therefore we need to get it as uint8 instead of float32 and adjust the shape size.
+                        dtype = numpy.uint8
+                        shape[0] = shape[0] * 4
                     output_buffers[output_name] = numpy.empty([batch_size] + list(shape), dtype=dtype)
         return output_buffers, output_buffers_info
 
@@ -1061,49 +1067,60 @@ class InferVStreams(object):
         return False
 
 
-class HailoDetectionBox(object):
-# TODO: HRT-11492 - Add documentation to class and functions
+class HailoDetection(object):
+    """Represents Hailo detection information"""
 
-    def __init__(self, bbox, class_id, mask_size, mask):
-        self._bbox = bbox
-        self._mask_size = mask_size
-        self._mask = mask
-        self._class_id = class_id
-
-    @property
-    def bbox(self):
-        return self._bbox
+    def __init__(self, detection):
+        self._y_min = detection.box.y_min
+        self._x_min = detection.box.x_min
+        self._y_max = detection.box.y_max
+        self._x_max = detection.box.x_max
+        self._score = detection.score
+        self._class_id = detection.class_id
+        self._mask = detection.mask()
 
     @property
     def y_min(self):
-        return self._bbox[0]
+        """Get detection's box y_min coordinate"""
+        return self._y_min
 
     @property
     def x_min(self):
-        return self._bbox[1]
+        """Get detection's box x_min coordinate"""
+        return self._x_min
 
     @property
     def y_max(self):
-        return self._bbox[2]
+        """Get detection's box y_max coordinate"""
+        return self._y_max
 
     @property
     def x_max(self):
-        return self._bbox[3]
+        """Get detection's box x_max coordinate"""
+        return self._x_max
 
     @property
     def score(self):
-        return self._bbox[4]
+        """Get detection's score"""
+        return self._score
 
     @property
     def class_id(self):
+        """Get detection's class_id"""
         return self._class_id
 
-    @property
-    def mask_size(self):
-        return self._mask_size
-
     @property
     def mask(self):
+        """Byte Mask:
+        The mask is a binary mask that defines a region of interest (ROI) of the image.
+        Mask pixel values of 1 indicate image pixels that belong to the ROI.
+        Mask pixel values of 0 indicate image pixels that are part of the background.
+
+        The size of the mask is the size of the box, in the original input image's dimensions.
+        Mask width = ceil((x_max - x_min) * image_width)
+        Mask height = ceil((y_max - y_min) * image_height)
+        First pixel represents the pixel (x_min * image_width, y_min * image_height) in the original input image.
+        """
         return self._mask
 
 class HailoRTTransformUtils(object):
@@ -1156,15 +1173,6 @@ class HailoRTTransformUtils(object):
                                                        "Please compile again or provide a list of quant_infos.")
             _pyhailort.dequantize_output_buffer_in_place(raw_buffer, src_format_type, dst_format_type, elements_count, quant_info)
 
-    @staticmethod
-    def is_qp_valid(quant_info):
-        """Returns if quant_info is valid.
-
-        Args:
-            quant_info (:class:`~hailo_platform.pyhailort.pyhailort.QuantInfo`): The quantization info.
-        """
-        return _pyhailort.is_qp_valid(quant_info)
-
     @staticmethod
     def quantize_input_buffer(src_buffer, dst_buffer, elements_count, quant_info):
         """Quantize the data in input buffer `src_buffer` and output it to the buffer `dst_buffer`
@@ -1233,116 +1241,80 @@ class HailoRTTransformUtils(object):
     def _output_raw_buffer_to_nms_with_byte_mask_format(raw_output_buffer, number_of_classes, batch_size, image_height, image_width,
             max_bboxes_per_class, output_dtype, is_tf_format=False):
         if is_tf_format:
-            if os.environ.get('HAILO_TF_FORMAT_INTERNAL'):
-                return HailoRTTransformUtils._output_raw_buffer_to_nms_with_byte_mask_tf_format(raw_output_buffer, number_of_classes,
-                    batch_size, image_height, image_width, max_bboxes_per_class, output_dtype)
-            else:
-                raise HailoRTException("TF format is not supported with HAILO_NMS_WITH_BYTE_MASK format order")
+            return HailoRTTransformUtils._output_raw_buffer_to_nms_with_byte_mask_tf_format(raw_output_buffer, number_of_classes,
+                batch_size, image_height, image_width, max_bboxes_per_class, output_dtype)
         else:
-            return HailoRTTransformUtils._output_raw_buffer_to_nms_with_byte_mask_hailo_format(raw_output_buffer, number_of_classes)
+            return HailoRTTransformUtils._output_raw_buffer_to_nms_with_byte_mask_hailo_format(raw_output_buffer)
 
     @staticmethod
-    def _output_raw_buffer_to_nms_with_byte_mask_hailo_format(raw_output_buffer, number_of_classes):
+    def _output_raw_buffer_to_nms_with_byte_mask_hailo_format(raw_output_buffer):
         converted_output_buffer = []
         for frame in raw_output_buffer:
             converted_output_buffer.append(
-                HailoRTTransformUtils._output_raw_buffer_to_nms_with_byte_mask_hailo_format_single_frame(frame, number_of_classes))
+                HailoRTTransformUtils._output_raw_buffer_to_nms_with_byte_mask_hailo_format_single_frame(frame))
         return converted_output_buffer
 
     @staticmethod
-    def _output_raw_buffer_to_nms_with_byte_mask_hailo_format_single_frame(raw_output_buffer, number_of_classes):
-        offset = 0
+    def _output_raw_buffer_to_nms_with_byte_mask_hailo_format_single_frame(raw_output_buffer):
+        detections = _pyhailort.convert_nms_with_byte_mask_buffer_to_detections(raw_output_buffer)
         converted_output_frame = []
-        for class_i in range(number_of_classes):
-            class_bboxes_amount = int(raw_output_buffer[offset])
-            offset += 1
-            classes_boxes = []
-
-            if class_bboxes_amount != 0:
-                for bbox_i in range(class_bboxes_amount):
-                    bbox = raw_output_buffer[offset : offset + BBOX_PARAMS]
-                    offset += BBOX_PARAMS
+        for detection in detections:
+            converted_output_frame.append(HailoDetection(detection))
 
-                    bbox_mask_size_in_bytes = raw_output_buffer[offset]
-                    offset += 1
-                    bbox_mask_size = int(bbox_mask_size_in_bytes / 4)
-
-                    bbox_mask = raw_output_buffer[offset : (offset + bbox_mask_size)]
-                    offset += bbox_mask_size
-
-                    hailo_bbox = HailoDetectionBox(bbox, class_i, bbox_mask_size_in_bytes, bbox_mask)
-                    classes_boxes.append(hailo_bbox)
-
-            converted_output_frame.append(classes_boxes)
         return converted_output_frame
 
     @staticmethod
     def _output_raw_buffer_to_nms_with_byte_mask_tf_format(raw_output_buffer, number_of_classes, batch_size, image_height, image_width,
             max_bboxes_per_class, output_dtype):
-        offset = 0
-        # The + 1 is for the extra row containing the bbox coordinates, score and class_id
-        output_height = image_height + 1
+
+        BBOX_WITH_MASK_PARAMS = 6 # 4 coordinates + score + class_idx
+        BBOX_WITH_MASK_AXIS = 2
+        CLASSES_AXIS = 1
 
         # We create the tf_format buffer with reversed max_bboxes_per_class/features for performance optimization
-        converted_output_buffer = numpy.empty([batch_size, max_bboxes_per_class, output_height, image_width], dtype=output_dtype)
+        converted_output_buffer = numpy.empty([batch_size, max_bboxes_per_class, (image_height * image_width + BBOX_WITH_MASK_PARAMS)], dtype=output_dtype)
 
         for frame_idx in range(len(raw_output_buffer)):
-            offset = HailoRTTransformUtils._output_raw_buffer_to_nms_with_byte_mask_tf_format_single_frame(
+            HailoRTTransformUtils._output_raw_buffer_to_nms_with_byte_mask_tf_format_single_frame(
                 raw_output_buffer[frame_idx], converted_output_buffer[frame_idx], number_of_classes, max_bboxes_per_class,
-                image_height, image_width, offset)
-        converted_output_buffer = numpy.moveaxis(converted_output_buffer, 1, 3)
+                image_height, image_width)
+        converted_output_buffer = numpy.moveaxis(converted_output_buffer, CLASSES_AXIS, BBOX_WITH_MASK_AXIS)
+        converted_output_buffer = numpy.expand_dims(converted_output_buffer, 1)
         return converted_output_buffer
 
     @staticmethod
     def _output_raw_buffer_to_nms_with_byte_mask_tf_format_single_frame(raw_output_buffer, converted_output_frame, number_of_classes,
-        max_boxes, image_height, image_width, offset):
-
-        detections = []
-        for class_i in range(number_of_classes):
-            class_bboxes_amount = int(raw_output_buffer[offset])
-            offset += 1
-
-            if class_bboxes_amount != 0:
-                for bbox_i in range(class_bboxes_amount):
-                    bbox = raw_output_buffer[offset : offset + BBOX_PARAMS]
-                    offset += BBOX_PARAMS
-
-                    bbox_mask_size_in_bytes = raw_output_buffer[offset]
-                    offset += 1
-                    bbox_mask_size = int(bbox_mask_size_in_bytes // 4)
-
-                    bbox_mask = raw_output_buffer[offset : (offset + bbox_mask_size)]
-                    offset += bbox_mask_size
-
-                    y_min = bbox[0] * image_height
-                    x_min = bbox[1] * image_width
-                    bbox_width = round((bbox[3] - bbox[1]) * image_width)
-                    resized_mask = numpy.empty([image_height, image_width])
-
-                    for i in range(bbox_mask_size):
-                        if (bbox_mask[i] == 1):
-                            x = int(x_min + (i % bbox_width))
-                            y = int(y_min + (i // bbox_width))
-                            if (x >= image_width):
-                                x = image_width - 1
-                            if ( y >= image_height):
-                                y = image_height - 1
-                            resized_mask[y][x] = 1
-
-                    padding = image_width - len(bbox)
-                    bbox_padded = numpy.pad(bbox, pad_width=(0, padding), mode='constant')
-                    bbox_padded[len(bbox)] = class_i
-
-                    converted_detection = numpy.append(resized_mask ,[bbox_padded], axis=0)
-                    detections.append((bbox[4], converted_detection))
-
-        detections.sort(key=lambda tup: tup[0], reverse=True)
-        for detection_idx in range(len(detections)):
-            if (detection_idx >= max_boxes):
-                return offset
-            converted_output_frame[detection_idx] = detections[detection_idx][1]
+        max_boxes, image_height, image_width):
+
+        detections = _pyhailort.convert_nms_with_byte_mask_buffer_to_detections(raw_output_buffer)
+        bbox_idx = 0
+        for detection in detections:
+            if (bbox_idx >= max_boxes):
+                return
+            bbox = numpy.array([detection.box.y_min, detection.box.x_min, detection.box.y_max, detection.box.x_max,
+                    detection.score, detection.class_id])
+            bbox_mask = detection.mask()
+
+            y_min = numpy.ceil(bbox[0] * image_height)
+            x_min = numpy.ceil(bbox[1] * image_width)
+            bbox_width = numpy.ceil((bbox[3] - bbox[1]) * image_width)
+            resized_mask = numpy.zeros(image_height*image_width, dtype="uint8")
+
+            for i in range(bbox_mask.size):
+                if (bbox_mask[i] == 1):
+                    x = int(x_min + (i % bbox_width))
+                    y = int(y_min + (i // bbox_width))
+                    if (x >= image_width):
+                        x = image_width - 1
+                    if ( y >= image_height):
+                        y = image_height - 1
+                    idx = (image_width * y) + x
+                    resized_mask[idx] = 1
+
+            bbox_with_mask = numpy.append(bbox, resized_mask)
+            converted_output_frame[bbox_idx] = bbox_with_mask
+            bbox_idx += 1
 
-        return offset
 
     @staticmethod
     def _get_format_type(dtype):
@@ -1515,7 +1487,7 @@ class HailoFormatFlags(_pyhailort.FormatFlags):
 
 SUPPORTED_PROTOCOL_VERSION = 2
 SUPPORTED_FW_MAJOR = 4
-SUPPORTED_FW_MINOR = 15
+SUPPORTED_FW_MINOR = 16
 SUPPORTED_FW_REVISION = 0
 
 MEGA_MULTIPLIER = 1000.0 * 1000.0
@@ -2706,8 +2678,7 @@ class InputVStreamParams(object):
         Args:
             configured_network (:class:`ConfiguredNetwork`): The configured network group for which
                 the params are created.
-            quantized (bool): Deprecated parameter that will be ignored. Determine whether to quantize (scale)
-                the data will be decided by the src-data and dst-data types.
+            quantized: Unused.
             format_type (:class:`~hailo_platform.pyhailort.pyhailort.FormatType`): The
                 default format type of the data for all input virtual streams.
                 The default is :attr:`~hailo_platform.pyhailort.pyhailort.FormatType.AUTO`,
@@ -2725,16 +2696,13 @@ class InputVStreamParams(object):
         """
         if format_type is None:
             format_type = FormatType.AUTO
-        if quantized is None:
-            quantized = format_type != FormatType.FLOAT32
         if timeout_ms is None:
             timeout_ms = DEFAULT_VSTREAM_TIMEOUT_MS
         if queue_size is None:
             queue_size = DEFAULT_VSTREAM_QUEUE_SIZE
         name = network_name if network_name is not None else ""
         with ExceptionWrapper():
-            return configured_network._configured_network.make_input_vstream_params(name, quantized,
-                format_type, timeout_ms, queue_size)
+            return configured_network._configured_network.make_input_vstream_params(name, format_type, timeout_ms, queue_size)
 
     @staticmethod
     def make_from_network_group(configured_network, quantized=None, format_type=None, timeout_ms=None, queue_size=None, network_name=None):
@@ -2744,8 +2712,7 @@ class InputVStreamParams(object):
         Args:
             configured_network (:class:`ConfiguredNetwork`): The configured network group for which
                 the params are created.
-            quantized (bool): Deprecated parameter that will be ignored. Determine whether to quantize (scale)
-                the data will be decided by the src-data and dst-data types.
+            quantized: Unused.
             format_type (:class:`~hailo_platform.pyhailort.pyhailort.FormatType`): The
                 default format type of the data for all input virtual streams.
                 The default is :attr:`~hailo_platform.pyhailort.pyhailort.FormatType.AUTO`,
@@ -2761,7 +2728,8 @@ class InputVStreamParams(object):
             dict: The created virtual streams params. The keys are the vstreams names. The values are the
             params.
         """
-        return InputVStreamParams.make(configured_network, quantized, format_type, timeout_ms, queue_size, network_name)
+        return InputVStreamParams.make(configured_network=configured_network, format_type=format_type, timeout_ms=timeout_ms,
+            queue_size=queue_size, network_name=network_name)
 
 
 class OutputVStreamParams(object):
@@ -2775,8 +2743,7 @@ class OutputVStreamParams(object):
         Args:
             configured_network (:class:`ConfiguredNetwork`): The configured network group for which
                 the params are created.
-            quantized (bool): Deprecated parameter that will be ignored. Determine whether to de-quantize (rescale)
-                the data will be decided by the src-data and dst-data types.
+            quantized: Unused.
             format_type (:class:`~hailo_platform.pyhailort.pyhailort.FormatType`): The
                 default format type of the data for all output virtual streams.
                 The default is :attr:`~hailo_platform.pyhailort.pyhailort.FormatType.AUTO`,
@@ -2794,16 +2761,13 @@ class OutputVStreamParams(object):
         """
         if format_type is None:
             format_type = FormatType.AUTO
-        if quantized is None:
-            quantized = format_type != FormatType.FLOAT32
         if timeout_ms is None:
             timeout_ms = DEFAULT_VSTREAM_TIMEOUT_MS
         if queue_size is None:
             queue_size = DEFAULT_VSTREAM_QUEUE_SIZE
         name = network_name if network_name is not None else ""
         with ExceptionWrapper():
-            return configured_network._configured_network.make_output_vstream_params(name, quantized,
-                format_type, timeout_ms, queue_size)
+            return configured_network._configured_network.make_output_vstream_params(name, format_type, timeout_ms, queue_size)
 
     @staticmethod
     def make_from_network_group(configured_network, quantized=None, format_type=None, timeout_ms=None, queue_size=None, network_name=None):
@@ -2813,8 +2777,7 @@ class OutputVStreamParams(object):
         Args:
             configured_network (:class:`ConfiguredNetwork`): The configured network group for which
                 the params are created.
-            quantized (bool): Deprecated parameter that will be ignored. Determine whether to de-quantize (rescale)
-                the data will be decided by the src-data and dst-data types.
+            quantized: Unused.
             format_type (:class:`~hailo_platform.pyhailort.pyhailort.FormatType`): The
                 default format type of the data for all output virtual streams.
                 The default is :attr:`~hailo_platform.pyhailort.pyhailort.FormatType.AUTO`,
@@ -2830,7 +2793,8 @@ class OutputVStreamParams(object):
             dict: The created virtual streams params. The keys are the vstreams names. The values are the
             params.
         """
-        return OutputVStreamParams.make(configured_network, quantized, format_type, timeout_ms, queue_size, network_name)
+        return OutputVStreamParams.make(configured_network=configured_network, format_type=format_type, timeout_ms=timeout_ms,
+            queue_size=queue_size, network_name=network_name)
 
     @staticmethod
     def make_groups(configured_network, quantized=None, format_type=None, timeout_ms=None, queue_size=None):
@@ -2840,8 +2804,7 @@ class OutputVStreamParams(object):
         Args:
             configured_network (:class:`ConfiguredNetwork`): The configured network group for which
                 the params are created.
-            quantized (bool): Deprecated parameter that will be ignored. Determine whether to de-quantize (rescale)
-                the data will be decided by the src-data and dst-data types.
+            quantized: Unused.
             format_type (:class:`~hailo_platform.pyhailort.pyhailort.FormatType`): The
                 default format type of the data for all output virtual streams.
                 The default is :attr:`~hailo_platform.pyhailort.pyhailort.FormatType.AUTO`,
@@ -2855,7 +2818,7 @@ class OutputVStreamParams(object):
             list of dicts: Each element in the list represent a group of params, where the keys are the vstreams names, and the values are the
             params. The params groups are splitted with respect to their underlying streams for multi process usges.
         """
-        all_params = OutputVStreamParams.make(configured_network, quantized=quantized, format_type=format_type, timeout_ms=timeout_ms, queue_size=queue_size)
+        all_params = OutputVStreamParams.make(configured_network=configured_network, format_type=format_type, timeout_ms=timeout_ms, queue_size=queue_size)
         low_level_streams_names = [stream_info.name for stream_info in configured_network.get_output_stream_infos()]
         stream_name_to_vstream_names = {stream_name: configured_network.get_vstream_names_from_stream_name(stream_name) for stream_name in low_level_streams_names}
         results = []
@@ -2994,7 +2957,7 @@ class OutputLayerUtils(object):
 
         if self._is_nms:
             self._quantized_empty_bbox = numpy.asarray([0] * BBOX_PARAMS, dtype=self.output_dtype)
-            if not (self._user_buffer_format.flags & _pyhailort.FormatFlags.QUANTIZED):
+            if self.output_dtype == numpy.float32:
                 HailoRTTransformUtils.dequantize_output_buffer_in_place(self._quantized_empty_bbox, self.output_dtype,
                     BBOX_PARAMS, self._vstream_info.quant_info)
 
diff --git a/hailort/libhailort/bindings/python/platform/hailo_tutorials/notebooks/HRT_0_Inference_Tutorial.ipynb b/hailort/libhailort/bindings/python/platform/hailo_tutorials/notebooks/HRT_0_Inference_Tutorial.ipynb
index b6a7427..2ecfab9 100644
--- a/hailort/libhailort/bindings/python/platform/hailo_tutorials/notebooks/HRT_0_Inference_Tutorial.ipynb
+++ b/hailort/libhailort/bindings/python/platform/hailo_tutorials/notebooks/HRT_0_Inference_Tutorial.ipynb
@@ -62,10 +62,8 @@
     "network_group_params = network_group.create_params()\n",
     "\n",
     "# Create input and output virtual streams params\n",
-    "# Quantized argument signifies whether or not the incoming data is already quantized.\n",
-    "# Data is quantized by HailoRT if and only if quantized == False .\n",
-    "input_vstreams_params = InputVStreamParams.make(network_group, quantized=False, format_type=FormatType.FLOAT32)\n",
-    "output_vstreams_params = OutputVStreamParams.make(network_group, quantized=True, format_type=FormatType.UINT8)\n",
+    "input_vstreams_params = InputVStreamParams.make(network_group, format_type=FormatType.FLOAT32)\n",
+    "output_vstreams_params = OutputVStreamParams.make(network_group, format_type=FormatType.UINT8)\n",
     "\n",
     "# Define dataset params\n",
     "input_vstream_info = hef.get_input_vstream_infos()[0]\n",
@@ -108,6 +106,8 @@
     "\n",
     "This section shows how to run streaming inference using multiple processes in Python.\n",
     "\n",
+    "Note: This flow is not supported on Windows.\n",
+    "\n",
     "We will not use infer. Instead we will use a send and receive model.\n",
     "The send function and the receive function will run in different processes."
    ]
diff --git a/hailort/libhailort/bindings/python/platform/hailo_tutorials/notebooks/HRT_2_Inference_Tutorial_Multi_Process_Service.ipynb b/hailort/libhailort/bindings/python/platform/hailo_tutorials/notebooks/HRT_2_Inference_Tutorial_Multi_Process_Service.ipynb
index ffd700c..3dcd500 100644
--- a/hailort/libhailort/bindings/python/platform/hailo_tutorials/notebooks/HRT_2_Inference_Tutorial_Multi_Process_Service.ipynb
+++ b/hailort/libhailort/bindings/python/platform/hailo_tutorials/notebooks/HRT_2_Inference_Tutorial_Multi_Process_Service.ipynb
@@ -5,7 +5,7 @@
    "metadata": {},
    "source": [
     "\n",
-    "# Python inference tutorial - Multi Process Service and Model Scheduler\n",
+    "# Python Inference Tutorial - Multi Process Service and Model Scheduler\n",
     "\n",
     "This tutorial will walk you through the inference process using The Model Scheduler.\n",
     "\n",
@@ -77,10 +77,8 @@
     "        network_group = network_groups[0]\n",
     "\n",
     "        # Create input and output virtual streams params\n",
-    "        # Quantized argument signifies whether or not the incoming data is already quantized.\n",
-    "        # Data is quantized by HailoRT if and only if quantized == False.\n",
-    "        input_vstreams_params = InputVStreamParams.make(network_group, quantized=False, format_type=FormatType.FLOAT32)\n",
-    "        output_vstreams_params = OutputVStreamParams.make(network_group, quantized=True, format_type=FormatType.UINT8)\n",
+    "        input_vstreams_params = InputVStreamParams.make(network_group, format_type=FormatType.FLOAT32)\n",
+    "        output_vstreams_params = OutputVStreamParams.make(network_group, format_type=FormatType.UINT8)\n",
     "\n",
     "        # Define dataset params\n",
     "        input_vstream_info = hef.get_input_vstream_infos()[0]\n",
diff --git a/hailort/libhailort/bindings/python/platform/setup.py b/hailort/libhailort/bindings/python/platform/setup.py
index 4d37b82..52fa311 100644
--- a/hailort/libhailort/bindings/python/platform/setup.py
+++ b/hailort/libhailort/bindings/python/platform/setup.py
@@ -69,6 +69,6 @@ if __name__ == "__main__":
             "linux_aarch64",
         ],
         url="https://hailo.ai/",
-        version="4.15.0",
+        version="4.16.0",
         zip_safe=False,
     )
diff --git a/hailort/libhailort/bindings/python/src/CMakeLists.txt b/hailort/libhailort/bindings/python/src/CMakeLists.txt
index c10045a..bef9d2a 100644
--- a/hailort/libhailort/bindings/python/src/CMakeLists.txt
+++ b/hailort/libhailort/bindings/python/src/CMakeLists.txt
@@ -49,7 +49,7 @@ set_target_properties(_pyhailort PROPERTIES
     # VISIBILITY_INLINES_HIDDEN YES
 )
 
-find_package(HailoRT 4.15.0 EXACT REQUIRED)
+find_package(HailoRT 4.16.0 EXACT REQUIRED)
 
 target_link_libraries(_pyhailort PRIVATE HailoRT::libhailort)
 if(WIN32)
diff --git a/hailort/libhailort/bindings/python/src/hef_api.cpp b/hailort/libhailort/bindings/python/src/hef_api.cpp
index 7cb3641..9d5fa2d 100644
--- a/hailort/libhailort/bindings/python/src/hef_api.cpp
+++ b/hailort/libhailort/bindings/python/src/hef_api.cpp
@@ -111,22 +111,6 @@ py::list HefWrapper::get_vstream_names_from_stream_name(const std::string &strea
     return py::cast(results.release());
 }
 
-py::dict HefWrapper::get_input_vstreams_params(const std::string &name, bool quantized, hailo_format_type_t format_type,
-    uint32_t timeout_ms, uint32_t queue_size)
-{
-    auto result = hef->make_input_vstream_params(name, quantized, format_type, timeout_ms, queue_size);
-    VALIDATE_EXPECTED(result);
-    return py::cast(result.value());
-}
-
-py::dict HefWrapper::get_output_vstreams_params(const std::string &name, bool quantized, hailo_format_type_t format_type,
-    uint32_t timeout_ms, uint32_t queue_size)
-{
-    auto result = hef->make_output_vstream_params(name, quantized, format_type, timeout_ms, queue_size);
-    VALIDATE_EXPECTED(result);
-    return py::cast(result.value());
-}
-
 py::list HefWrapper::get_input_vstream_infos(const std::string &name)
 {
     auto result = hef->get_input_vstream_infos(name);
@@ -215,8 +199,6 @@ void HefWrapper::initialize_python_module(py::module &m)
         .def("get_udp_rates_dict", &HefWrapper::get_udp_rates_dict)
         .def("create_configure_params", &HefWrapper::create_configure_params)
         .def("create_configure_params_mipi_input", &HefWrapper::create_configure_params_mipi_input)
-        .def("get_input_vstreams_params", &HefWrapper::get_input_vstreams_params)
-        .def("get_output_vstreams_params", &HefWrapper::get_output_vstreams_params)
         .def("get_input_vstream_infos", &HefWrapper::get_input_vstream_infos)
         .def("get_output_vstream_infos", &HefWrapper::get_output_vstream_infos)
         .def("get_all_vstream_infos", &HefWrapper::get_all_vstream_infos)
diff --git a/hailort/libhailort/bindings/python/src/hef_api.hpp b/hailort/libhailort/bindings/python/src/hef_api.hpp
index b0905ce..47f49e9 100644
--- a/hailort/libhailort/bindings/python/src/hef_api.hpp
+++ b/hailort/libhailort/bindings/python/src/hef_api.hpp
@@ -44,10 +44,6 @@ public:
     std::string get_vstream_name_from_original_name(const std::string &original_name, const std::string &net_group_name);
     py::list get_stream_names_from_vstream_name(const std::string &vstream_name, const std::string &net_group_name);
     py::list get_vstream_names_from_stream_name(const std::string &stream_name, const std::string &net_group_name);
-    py::dict get_input_vstreams_params(const std::string &name, bool quantized, hailo_format_type_t format_type,
-        uint32_t timeout_ms, uint32_t queue_size);
-    py::dict get_output_vstreams_params(const std::string &name, bool quantized, hailo_format_type_t format_type,
-        uint32_t timeout_ms, uint32_t queue_size);
     py::list get_input_vstream_infos(const std::string &name);
     py::list get_output_vstream_infos(const std::string &name);
     py::list get_all_vstream_infos(const std::string &name);
diff --git a/hailort/libhailort/bindings/python/src/network_group_api.hpp b/hailort/libhailort/bindings/python/src/network_group_api.hpp
index 51bb407..4f5f209 100644
--- a/hailort/libhailort/bindings/python/src/network_group_api.hpp
+++ b/hailort/libhailort/bindings/python/src/network_group_api.hpp
@@ -223,18 +223,18 @@ public:
         return py::cast(result.release());
     }
 
-    auto make_input_vstream_params(const std::string &name, bool quantized, hailo_format_type_t format_type,
+    auto make_input_vstream_params(const std::string &name, hailo_format_type_t format_type,
         uint32_t timeout_ms, uint32_t queue_size)
     {
-        auto result = get().make_input_vstream_params(quantized, format_type, timeout_ms, queue_size, name);
+        auto result = get().make_input_vstream_params({}, format_type, timeout_ms, queue_size, name);
         VALIDATE_EXPECTED(result);
         return py::cast(result.release());
     }
 
-    auto make_output_vstream_params(const std::string &name, bool quantized, hailo_format_type_t format_type,
+    auto make_output_vstream_params(const std::string &name, hailo_format_type_t format_type,
         uint32_t timeout_ms, uint32_t queue_size)
     {
-        auto result = get().make_output_vstream_params(quantized, format_type, timeout_ms, queue_size, name);
+        auto result = get().make_output_vstream_params({}, format_type, timeout_ms, queue_size, name);
         VALIDATE_EXPECTED(result);
         return py::cast(result.release());
     }
diff --git a/hailort/libhailort/bindings/python/src/pyhailort.cpp b/hailort/libhailort/bindings/python/src/pyhailort.cpp
index d9db006..5d9d467 100644
--- a/hailort/libhailort/bindings/python/src/pyhailort.cpp
+++ b/hailort/libhailort/bindings/python/src/pyhailort.cpp
@@ -137,6 +137,22 @@ public:
     }
 };
 
+std::vector<hailo_detection_with_byte_mask_t> convert_nms_with_byte_mask_buffer_to_detections(py::array src_buffer)
+{
+    std::vector<hailo_detection_with_byte_mask_t> detections;
+    uint8_t *src_ptr = static_cast<uint8_t*>(src_buffer.mutable_data());
+    uint16_t detections_count = *(uint16_t*)src_ptr;
+    detections.reserve(detections_count);
+
+    size_t buffer_offset = sizeof(uint16_t);
+    for (size_t i = 0; i < detections_count; i++) {
+        hailo_detection_with_byte_mask_t detection = *(hailo_detection_with_byte_mask_t*)(src_ptr + buffer_offset);
+        buffer_offset += sizeof(hailo_detection_with_byte_mask_t) + detection.mask_size;
+        detections.emplace_back(std::move(detection));
+    }
+    return detections;
+}
+
 static void validate_versions_match()
 {
     hailo_version_t libhailort_version = {};
@@ -162,6 +178,7 @@ PYBIND11_MODULE(_pyhailort, m) {
     validate_versions_match();
 
     m.def("get_status_message", &get_status_message);
+    m.def("convert_nms_with_byte_mask_buffer_to_detections", &convert_nms_with_byte_mask_buffer_to_detections);
     m.def("dequantize_output_buffer_in_place", &QuantizationBindings::dequantize_output_buffer_in_place);
     m.def("dequantize_output_buffer", &QuantizationBindings::dequantize_output_buffer);
     m.def("quantize_input_buffer", &QuantizationBindings::quantize_input_buffer);
@@ -207,12 +224,31 @@ PYBIND11_MODULE(_pyhailort, m) {
         .def(py::pickle(&PowerMeasurementData::get_state, &PowerMeasurementData::set_state))
         ;
 
+    py::class_<hailo_rectangle_t>(m, "HailoRectangle")
+        .def_readonly("y_min", &hailo_rectangle_t::y_min)
+        .def_readonly("x_min", &hailo_rectangle_t::x_min)
+        .def_readonly("y_max", &hailo_rectangle_t::y_max)
+        .def_readonly("x_max", &hailo_rectangle_t::x_max)
+        ;
+
+    py::class_<hailo_detection_with_byte_mask_t>(m, "HailoDetectionWithByteMask")
+        .def_readonly("box", &hailo_detection_with_byte_mask_t::box)
+        .def_readonly("mask_size", &hailo_detection_with_byte_mask_t::mask_size)
+        .def_readonly("score", &hailo_detection_with_byte_mask_t::score)
+        .def_readonly("class_id", &hailo_detection_with_byte_mask_t::class_id)
+        .def("mask", [](const hailo_detection_with_byte_mask_t &detection) -> py::array {
+            auto shape = *py::array::ShapeContainer({detection.mask_size});
+            return py::array(py::dtype("uint8"), shape, detection.mask);
+        })
+        ;
+
     py::enum_<hailo_device_architecture_t>(m, "DeviceArchitecture")
         .value("HAILO8_A0", HAILO_ARCH_HAILO8_A0)
         .value("HAILO8", HAILO_ARCH_HAILO8)
         .value("HAILO8L", HAILO_ARCH_HAILO8L)
         .value("HAILO15H", HAILO_ARCH_HAILO15H)
         .value("PLUTO", HAILO_ARCH_PLUTO)
+        .value("HAILO15M", HAILO_ARCH_HAILO15M)
     ;
 
     /* TODO: SDK-15648 */
@@ -524,7 +560,6 @@ PYBIND11_MODULE(_pyhailort, m) {
 
     py::enum_<hailo_format_flags_t>(m, "FormatFlags", py::arithmetic())
         .value("NONE", HAILO_FORMAT_FLAGS_NONE)
-        .value("QUANTIZED", HAILO_FORMAT_FLAGS_QUANTIZED)
         .value("TRANSPOSED", HAILO_FORMAT_FLAGS_TRANSPOSED)
         .value("HOST_ARGMAX", HAILO_FORMAT_FLAGS_HOST_ARGMAX)
         ;
diff --git a/hailort/libhailort/cmake/toolchains/toolchains.yaml b/hailort/libhailort/cmake/toolchains/toolchains.yaml
deleted file mode 100644
index f1adf4c..0000000
--- a/hailort/libhailort/cmake/toolchains/toolchains.yaml
+++ /dev/null
@@ -1,44 +0,0 @@
--   name: linux.x86_64
-    required_packages:
-    -   gcc
-    -   g++
-    python_versions:
-    -   version: '3.8'
-        installation: deb
-        package_name: python3.8-dev
-    -   version: '3.9'
-        installation: deb
-        package_name: python3.9-dev
-    -   version: '3.10'
-        installation: deb
-        package_name: python3.10-dev
--   name: linux.aarch64
-    required_packages:
-    -   gcc-aarch64-linux-gnu
-    -   g++-aarch64-linux-gnu
-    python_versions:
-    -   version: '3.8'
-        installation: manual
-        package_name: https://launchpad.net/ubuntu/+source/python3.8/3.8.2-1ubuntu1/+build/18834117/+files/libpython3.8-dev_3.8.2-1ubuntu1_arm64.deb
-        package_dest: /usr/include/aarch64-linux-gnu
-    -   version: '3.9'
-        installation: manual
-        package_name: https://launchpad.net/~deadsnakes/+archive/ubuntu/ppa/+files/libpython3.9-dev_3.9.18-1+focal1_arm64.deb
-        package_dest: /usr/include/aarch64-linux-gnu
-    -   version: '3.10'
-        installation: manual
-        package_name: https://launchpadlibrarian.net/569418529/libpython3.10-dev_3.10.0-5_arm64.deb
-        package_dest: /usr/include/aarch64-linux-gnu
--   name: linux.armv7l
-    required_packages:
-    -   gcc-arm-linux-gnueabi
-    -   g++-arm-linux-gnueabi
--   name: linux.armv7lhf
-    required_packages:
-    -   gcc-arm-linux-gnueabihf
-    -   g++-arm-linux-gnueabihf
--   name: linux.android28-arm64-v8a
-    android_ndk:
-        version_name: "android-ndk-r21d"
-        file: "https://dl.google.com/android/repository/android-ndk-r21d-linux-x86_64.zip"
--   name: windows.x86_64
diff --git a/hailort/libhailort/examples/README.md b/hailort/libhailort/examples/README.md
index d2444e1..47cc9a4 100644
--- a/hailort/libhailort/examples/README.md
+++ b/hailort/libhailort/examples/README.md
@@ -54,8 +54,11 @@ The following examples are provided, demonstrating the HailoRT API:
     - The main thread will stop the async operations and the threads by deactivating the network group.
   - `multi_process_example` - Demonstrates how to work with HailoRT multi-process service and using the HailoRT Model Scheduler for network groups switching.
   Using the script `multi_process_example.sh` / `multi_process_example.ps1` one can specify the number of processes to run each hef, see `multi_process_example.sh -h`  / `multi_process_example.ps1 -h` for more information.
+    - For Windows, in case of restricted execution policy, either change the policy, or run the script with "PowerShell -NoProfile -ExecutionPolicy Bypass -File <FilePath>"
   - `notification_callback_example` - Demonstrates how to work with notification callbacks, same as `notification_callback_example` C example.
 You can find more details about each example in the HailoRT user guide.
+  - `async_infer_example` - Basic asynchronous inference of a shortcut network, uses HailoRT C++ api.
+  - `async_infer_functionality_example` - More advanced asynchronous inference of a multiple input and output model, uses HailoRT C++ api.
 ## Compiling with CMake
 Examples are configured and compiled using the following commands:
 ```sh
diff --git a/hailort/libhailort/examples/c/data_quantization_example/CMakeLists.txt b/hailort/libhailort/examples/c/data_quantization_example/CMakeLists.txt
index afd0808..74737e1 100644
--- a/hailort/libhailort/examples/c/data_quantization_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/c/data_quantization_example/CMakeLists.txt
@@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.0.0)
 set(THREADS_PREFER_PTHREAD_FLAG ON)
 find_package(Threads REQUIRED)
 
-find_package(HailoRT 4.15.0 EXACT REQUIRED)
+find_package(HailoRT 4.16.0 EXACT REQUIRED)
 
 SET_SOURCE_FILES_PROPERTIES(data_quantization_example.c PROPERTIES LANGUAGE C)
 
diff --git a/hailort/libhailort/examples/c/infer_pipeline_example/CMakeLists.txt b/hailort/libhailort/examples/c/infer_pipeline_example/CMakeLists.txt
index b7c49c5..5fdc57a 100644
--- a/hailort/libhailort/examples/c/infer_pipeline_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/c/infer_pipeline_example/CMakeLists.txt
@@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.0.0)
 set(THREADS_PREFER_PTHREAD_FLAG ON)
 find_package(Threads REQUIRED)
 
-find_package(HailoRT 4.15.0 EXACT REQUIRED)
+find_package(HailoRT 4.16.0 EXACT REQUIRED)
 
 SET_SOURCE_FILES_PROPERTIES(infer_pipeline_example.c PROPERTIES LANGUAGE C)
 
diff --git a/hailort/libhailort/examples/c/infer_pipeline_example/infer_pipeline_example.c b/hailort/libhailort/examples/c/infer_pipeline_example/infer_pipeline_example.c
index 0a682d2..4f17deb 100644
--- a/hailort/libhailort/examples/c/infer_pipeline_example/infer_pipeline_example.c
+++ b/hailort/libhailort/examples/c/infer_pipeline_example/infer_pipeline_example.c
@@ -101,6 +101,7 @@ int main(int argc, char **argv)
     hailo_activated_network_group activated_network_group = NULL;
     size_t vstreams_infos_size = MAX_EDGE_LAYERS;
     hailo_vstream_info_t vstreams_infos[MAX_EDGE_LAYERS] = {0};
+    bool unused = {0};
 
     parse_arguments(argc, argv, &interface_name);
 
@@ -123,11 +124,11 @@ int main(int argc, char **argv)
     REQUIRE_ACTION(network_group_size == 1, status = HAILO_INVALID_ARGUMENT, l_release_hef, 
         "Invalid network group size");
 
-    status = hailo_make_input_vstream_params(network_group, true, HAILO_FORMAT_TYPE_AUTO,
+    status = hailo_make_input_vstream_params(network_group, unused, HAILO_FORMAT_TYPE_AUTO,
         input_vstream_params, &input_vstreams_size);
     REQUIRE_SUCCESS(status, l_release_hef, "Failed making input virtual stream params");
 
-    status = hailo_make_output_vstream_params(network_group, true, HAILO_FORMAT_TYPE_AUTO,
+    status = hailo_make_output_vstream_params(network_group, unused, HAILO_FORMAT_TYPE_AUTO,
         output_vstream_params, &output_vstreams_size);
     REQUIRE_SUCCESS(status, l_release_hef, "Failed making output virtual stream params");
 
diff --git a/hailort/libhailort/examples/c/multi_device_example/CMakeLists.txt b/hailort/libhailort/examples/c/multi_device_example/CMakeLists.txt
index 7537adb..cf51c4c 100644
--- a/hailort/libhailort/examples/c/multi_device_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/c/multi_device_example/CMakeLists.txt
@@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.0.0)
 set(THREADS_PREFER_PTHREAD_FLAG ON)
 find_package(Threads REQUIRED)
 
-find_package(HailoRT 4.15.0 EXACT REQUIRED)
+find_package(HailoRT 4.16.0 EXACT REQUIRED)
 
 SET_SOURCE_FILES_PROPERTIES(multi_device_example.c PROPERTIES LANGUAGE C)
 
diff --git a/hailort/libhailort/examples/c/multi_device_example/multi_device_example.c b/hailort/libhailort/examples/c/multi_device_example/multi_device_example.c
index a676779..194cf98 100644
--- a/hailort/libhailort/examples/c/multi_device_example/multi_device_example.c
+++ b/hailort/libhailort/examples/c/multi_device_example/multi_device_example.c
@@ -143,6 +143,7 @@ int main()
     size_t output_vstreams_size = MAX_EDGE_LAYERS;
     hailo_input_vstream input_vstreams[MAX_EDGE_LAYERS] = {NULL};
     hailo_output_vstream output_vstreams[MAX_EDGE_LAYERS] = {NULL};
+    bool unused = {0};
 
     status = hailo_scan_devices(NULL, device_ids, &actual_count);
     REQUIRE_SUCCESS(status, l_exit, "Failed to scan devices");
@@ -172,11 +173,11 @@ int main()
     REQUIRE_ACTION(network_group_size == 1, status = HAILO_INVALID_ARGUMENT, l_release_hef, 
         "Invalid network group size");
 
-    status = hailo_make_input_vstream_params(network_group, true, HAILO_FORMAT_TYPE_AUTO,
+    status = hailo_make_input_vstream_params(network_group, unused, HAILO_FORMAT_TYPE_AUTO,
         input_vstream_params, &input_vstreams_size);
     REQUIRE_SUCCESS(status, l_release_hef, "Failed making input virtual stream params");
 
-    status = hailo_make_output_vstream_params(network_group, true, HAILO_FORMAT_TYPE_AUTO,
+    status = hailo_make_output_vstream_params(network_group, unused, HAILO_FORMAT_TYPE_AUTO,
         output_vstream_params, &output_vstreams_size);
     REQUIRE_SUCCESS(status, l_release_hef, "Failed making output virtual stream params");
 
diff --git a/hailort/libhailort/examples/c/multi_network_vstream_example/CMakeLists.txt b/hailort/libhailort/examples/c/multi_network_vstream_example/CMakeLists.txt
index 63cb793..09d444b 100644
--- a/hailort/libhailort/examples/c/multi_network_vstream_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/c/multi_network_vstream_example/CMakeLists.txt
@@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.0.0)
 find_package(Threads REQUIRED)
 set(THREADS_PREFER_PTHREAD_FLAG ON)
 
-find_package(HailoRT 4.15.0 EXACT REQUIRED)
+find_package(HailoRT 4.16.0 EXACT REQUIRED)
 
 SET_SOURCE_FILES_PROPERTIES(multi_network_vstream_example.c PROPERTIES LANGUAGE C)
 
diff --git a/hailort/libhailort/examples/c/multi_network_vstream_example/multi_network_vstream_example.c b/hailort/libhailort/examples/c/multi_network_vstream_example/multi_network_vstream_example.c
index e9cdbe2..5b0f56c 100644
--- a/hailort/libhailort/examples/c/multi_network_vstream_example/multi_network_vstream_example.c
+++ b/hailort/libhailort/examples/c/multi_network_vstream_example/multi_network_vstream_example.c
@@ -167,6 +167,7 @@ int main()
     hailo_input_vstream input_vstreams[NET_COUNT][MAX_EDGE_LAYERS];
     hailo_output_vstream output_vstreams[NET_COUNT][MAX_EDGE_LAYERS];
     uint16_t batch_size[NET_COUNT] = {FIRST_NET_BATCH_SIZE, SECOND_NET_BATCH_SIZE};
+    bool unused = {0};
 
     status = hailo_init_vdevice_params(&params);
     REQUIRE_SUCCESS(status, l_exit, "Failed init vdevice_params");
@@ -201,11 +202,11 @@ int main()
 
     /* Build vstream params per network */
     for (uint8_t network_index = 0; network_index < NET_COUNT; network_index++) {
-        status = hailo_hef_make_input_vstream_params(hef, network_info[network_index].name, true, HAILO_FORMAT_TYPE_AUTO,
+        status = hailo_hef_make_input_vstream_params(hef, network_info[network_index].name, unused, HAILO_FORMAT_TYPE_AUTO,
             input_vstream_params[network_index], &input_vstreams_size[network_index]);
         REQUIRE_SUCCESS(status, l_release_hef, "Failed making input virtual stream params");
 
-        status = hailo_hef_make_output_vstream_params(hef, network_info[network_index].name, true, HAILO_FORMAT_TYPE_AUTO,
+        status = hailo_hef_make_output_vstream_params(hef, network_info[network_index].name, unused, HAILO_FORMAT_TYPE_AUTO,
             output_vstream_params[network_index], &output_vstreams_size[network_index]);
         REQUIRE_SUCCESS(status, l_release_hef, "Failed making output virtual stream params");
 
diff --git a/hailort/libhailort/examples/c/notification_callback_example/CMakeLists.txt b/hailort/libhailort/examples/c/notification_callback_example/CMakeLists.txt
index f2da6a8..6f5fdbe 100644
--- a/hailort/libhailort/examples/c/notification_callback_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/c/notification_callback_example/CMakeLists.txt
@@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.0.0)
 set(THREADS_PREFER_PTHREAD_FLAG ON)
 find_package(Threads REQUIRED)
 
-find_package(HailoRT 4.15.0 EXACT REQUIRED)
+find_package(HailoRT 4.16.0 EXACT REQUIRED)
 
 SET_SOURCE_FILES_PROPERTIES(notification_callback_example.c PROPERTIES LANGUAGE C)
 
diff --git a/hailort/libhailort/examples/c/power_measurement_example/CMakeLists.txt b/hailort/libhailort/examples/c/power_measurement_example/CMakeLists.txt
index 46e6247..e4331d7 100644
--- a/hailort/libhailort/examples/c/power_measurement_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/c/power_measurement_example/CMakeLists.txt
@@ -1,6 +1,6 @@
 cmake_minimum_required(VERSION 3.0.0)
 
-find_package(HailoRT 4.15.0 EXACT REQUIRED)
+find_package(HailoRT 4.16.0 EXACT REQUIRED)
 
 SET_SOURCE_FILES_PROPERTIES(power_measurement_example.c PROPERTIES LANGUAGE C)
 
diff --git a/hailort/libhailort/examples/c/raw_async_streams_single_thread_example/CMakeLists.txt b/hailort/libhailort/examples/c/raw_async_streams_single_thread_example/CMakeLists.txt
index f6a7565..962958f 100644
--- a/hailort/libhailort/examples/c/raw_async_streams_single_thread_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/c/raw_async_streams_single_thread_example/CMakeLists.txt
@@ -1,6 +1,6 @@
 cmake_minimum_required(VERSION 3.0.0)
 
-find_package(HailoRT 4.15.0 EXACT REQUIRED)
+find_package(HailoRT 4.16.0 EXACT REQUIRED)
 
 SET_SOURCE_FILES_PROPERTIES(raw_async_streams_single_thread_example.c PROPERTIES LANGUAGE C)
 
diff --git a/hailort/libhailort/examples/c/raw_async_streams_single_thread_example/raw_async_streams_single_thread_example.c b/hailort/libhailort/examples/c/raw_async_streams_single_thread_example/raw_async_streams_single_thread_example.c
index b5ce769..1fa7838 100644
--- a/hailort/libhailort/examples/c/raw_async_streams_single_thread_example/raw_async_streams_single_thread_example.c
+++ b/hailort/libhailort/examples/c/raw_async_streams_single_thread_example/raw_async_streams_single_thread_example.c
@@ -50,7 +50,7 @@ static void output_done_callback(const hailo_stream_read_async_completion_info_t
         // Real applications can forward the buffer to post-process/display. Here we just re-launch new async reads.
         status = hailo_stream_read_raw_buffer_async(stream, completion_info->buffer_addr, completion_info->buffer_size,
             output_done_callback, stream);
-        if ((HAILO_SUCCESS != status) && (HAILO_STREAM_NOT_ACTIVATED != status)) {
+        if ((HAILO_SUCCESS != status) && (HAILO_STREAM_ABORTED_BY_USER != status)) {
             fprintf(stderr, "Failed read async with status=%d\n", status);
         }
         break;
@@ -73,7 +73,7 @@ static void input_done_callback(const hailo_stream_write_async_completion_info_t
         // new async writes.
         status = hailo_stream_write_raw_buffer_async(stream, completion_info->buffer_addr, completion_info->buffer_size,
             input_done_callback, stream);
-        if ((HAILO_SUCCESS != status) && (HAILO_STREAM_NOT_ACTIVATED != status)) {
+        if ((HAILO_SUCCESS != status) && (HAILO_STREAM_ABORTED_BY_USER != status)) {
             fprintf(stderr, "Failed write async with status=%d\n", status);
         }
         break;
@@ -90,7 +90,6 @@ static hailo_status infer(hailo_configured_network_group network_group, size_t n
     size_t ongoing_transfers)
 {
     hailo_status status = HAILO_UNINITIALIZED;
-    hailo_activated_network_group activated_network_group = NULL;
     size_t i = 0;
     size_t frame_index = 0;
     size_t frame_size = 0;
@@ -99,9 +98,6 @@ static hailo_status infer(hailo_configured_network_group network_group, size_t n
     void *buffers[MAX_EDGE_LAYERS * MAX_ONGOING_TRANSFERS] = {0};
     size_t allocated_buffers = 0;
 
-    status = hailo_activate_network_group(network_group, NULL, &activated_network_group);
-    REQUIRE_SUCCESS(status, l_exit, "Failed activate network group status=%d", status);
-
     // We launch "ongoing_transfers" async operations for both input and output streams. On each async callback, we launch
     // some new operation with the same buffer.
     for (stream_index = 0; stream_index < number_output_streams; stream_index++) {
@@ -111,12 +107,12 @@ static hailo_status infer(hailo_configured_network_group network_group, size_t n
         for (frame_index = 0; frame_index < ongoing_transfers; frame_index++) {
             // Buffers read from async operation must be page aligned.
             current_buffer = page_aligned_alloc(frame_size);
-            REQUIRE_ACTION(INVALID_ADDR != current_buffer, status=HAILO_OUT_OF_HOST_MEMORY, l_deactivate, "allocation failed");
+            REQUIRE_ACTION(INVALID_ADDR != current_buffer, status=HAILO_OUT_OF_HOST_MEMORY, l_shutdown, "allocation failed");
             buffers[allocated_buffers++] = current_buffer;
 
             status = hailo_stream_read_raw_buffer_async(output_streams[stream_index], current_buffer, frame_size,
                 output_done_callback, output_streams[stream_index]);
-            REQUIRE_SUCCESS(status, l_deactivate, "Failed read async with status=%d", status);
+            REQUIRE_SUCCESS(status, l_shutdown, "Failed read async with status=%d", status);
         }
     }
 
@@ -127,28 +123,27 @@ static hailo_status infer(hailo_configured_network_group network_group, size_t n
         for (frame_index = 0; frame_index < ongoing_transfers; frame_index++) {
             // Buffers written to async operation must be page aligned.
             current_buffer = page_aligned_alloc(frame_size);
-            REQUIRE_ACTION(INVALID_ADDR != current_buffer, status=HAILO_OUT_OF_HOST_MEMORY, l_deactivate, "allocation failed");
+            REQUIRE_ACTION(INVALID_ADDR != current_buffer, status=HAILO_OUT_OF_HOST_MEMORY, l_shutdown, "allocation failed");
             buffers[allocated_buffers++] = current_buffer;
 
             status = hailo_stream_write_raw_buffer_async(input_streams[stream_index], current_buffer, frame_size,
                 input_done_callback, input_streams[stream_index]);
-            REQUIRE_SUCCESS(status, l_deactivate, "Failed write async with status=%d", status);
+            REQUIRE_SUCCESS(status, l_shutdown, "Failed write async with status=%d", status);
         }
     }
 
-    // After all async operations are launched, the inference will continue until we deactivate the network.
+    // After all async operations are launched, the inference will continue until we shutdown the network.
     hailo_sleep(INFER_TIME_SECONDS);
 
     status = HAILO_SUCCESS;
-l_deactivate:
-    // Calling hailo_deactivate_network_group will make sure that all async operations are done. All pending async I/O
+l_shutdown:
+    // Calling hailo_shutdown_network_group will ensure that all async operations are done. All pending async I/O
     // operations will be canceled and their callbacks called with status=HAILO_STREAM_ABORTED_BY_USER.
-    (void) hailo_deactivate_network_group(activated_network_group);
+    (void) hailo_shutdown_network_group(network_group);
 
     // There are no async I/O operations ongoing so it is safe to free the buffers now.
     for (i = 0; i < allocated_buffers; i++) page_aligned_free(buffers[i], frame_size);
 
-l_exit:
     return status;
 }
 
@@ -200,6 +195,7 @@ int main()
     size_t index = 0;
     size_t queue_size = 0;
     size_t ongoing_transfers = MAX_ONGOING_TRANSFERS;
+    hailo_activated_network_group activated_network_group = NULL;
 
     // Create device object.
     status = hailo_create_device_by_id(NULL, &device);
@@ -238,14 +234,20 @@ int main()
         ongoing_transfers = MIN(queue_size, ongoing_transfers);
     }
 
+    // Activate network group
+    status = hailo_activate_network_group(network_group, NULL, &activated_network_group);
+    REQUIRE_SUCCESS(status, l_release_device, "Failed activate network group");
+
     // Run infer.
     status = infer(network_group, number_input_streams, input_streams, number_output_streams, output_streams,
         ongoing_transfers);
-    REQUIRE_SUCCESS(status, l_release_device, "Failed performing inference");
+    REQUIRE_SUCCESS(status, l_deactivate, "Failed performing inference");
 
     status = HAILO_SUCCESS;
     printf("Inference ran successfully\n");
 
+l_deactivate:
+    (void) hailo_deactivate_network_group(activated_network_group);
 l_release_device:
     (void) hailo_release_device(device);
 l_exit:
diff --git a/hailort/libhailort/examples/c/raw_streams_example/CMakeLists.txt b/hailort/libhailort/examples/c/raw_streams_example/CMakeLists.txt
index 87e76ac..2586abf 100644
--- a/hailort/libhailort/examples/c/raw_streams_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/c/raw_streams_example/CMakeLists.txt
@@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.0.0)
 set(THREADS_PREFER_PTHREAD_FLAG ON)
 find_package(Threads REQUIRED)
 
-find_package(HailoRT 4.15.0 EXACT REQUIRED)
+find_package(HailoRT 4.16.0 EXACT REQUIRED)
 
 SET_SOURCE_FILES_PROPERTIES(raw_streams_example.c PROPERTIES LANGUAGE C)
 
diff --git a/hailort/libhailort/examples/c/switch_network_groups_example/CMakeLists.txt b/hailort/libhailort/examples/c/switch_network_groups_example/CMakeLists.txt
index 44e2816..a00d43e 100644
--- a/hailort/libhailort/examples/c/switch_network_groups_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/c/switch_network_groups_example/CMakeLists.txt
@@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.0.0)
 set(THREADS_PREFER_PTHREAD_FLAG ON)
 find_package(Threads REQUIRED)
 
-find_package(HailoRT 4.15.0 EXACT REQUIRED)
+find_package(HailoRT 4.16.0 EXACT REQUIRED)
 
 SET_SOURCE_FILES_PROPERTIES(switch_network_groups_example.c PROPERTIES LANGUAGE C)
 
diff --git a/hailort/libhailort/examples/c/switch_network_groups_example/switch_network_groups_example.c b/hailort/libhailort/examples/c/switch_network_groups_example/switch_network_groups_example.c
index 7b10525..8b29020 100644
--- a/hailort/libhailort/examples/c/switch_network_groups_example/switch_network_groups_example.c
+++ b/hailort/libhailort/examples/c/switch_network_groups_example/switch_network_groups_example.c
@@ -102,17 +102,18 @@ hailo_status build_vstreams(hailo_configured_network_group network_group,
     hailo_status status = HAILO_UNINITIALIZED;
     hailo_input_vstream_params_by_name_t input_vstream_params[MAX_EDGE_LAYERS];
     hailo_output_vstream_params_by_name_t output_vstream_params[MAX_EDGE_LAYERS];
+    bool unused = {0};
 
     // Make sure it can hold amount of vstreams for hailo_make_input/output_vstream_params
     size_t input_vstream_size = MAX_EDGE_LAYERS;
     size_t output_vstream_size = MAX_EDGE_LAYERS;
 
-    status = hailo_make_input_vstream_params(network_group, true, HAILO_FORMAT_TYPE_AUTO,
+    status = hailo_make_input_vstream_params(network_group, unused, HAILO_FORMAT_TYPE_AUTO,
         input_vstream_params, &input_vstream_size);
     REQUIRE_SUCCESS(status, l_exit, "Failed making input virtual stream params");
     *num_input_vstreams = input_vstream_size;
 
-    status = hailo_make_output_vstream_params(network_group, true, HAILO_FORMAT_TYPE_AUTO,
+    status = hailo_make_output_vstream_params(network_group, unused, HAILO_FORMAT_TYPE_AUTO,
         output_vstream_params, &output_vstream_size);
     REQUIRE_SUCCESS(status, l_exit, "Failed making output virtual stream params");
     *num_output_vstreams = output_vstream_size;
diff --git a/hailort/libhailort/examples/c/switch_network_groups_manually_example/CMakeLists.txt b/hailort/libhailort/examples/c/switch_network_groups_manually_example/CMakeLists.txt
index 47536df..2eb0b1d 100644
--- a/hailort/libhailort/examples/c/switch_network_groups_manually_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/c/switch_network_groups_manually_example/CMakeLists.txt
@@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.0.0)
 set(THREADS_PREFER_PTHREAD_FLAG ON)
 find_package(Threads REQUIRED)
 
-find_package(HailoRT 4.15.0 EXACT REQUIRED)
+find_package(HailoRT 4.16.0 EXACT REQUIRED)
 
 SET_SOURCE_FILES_PROPERTIES(switch_network_groups_manually_example.c PROPERTIES LANGUAGE C)
 
diff --git a/hailort/libhailort/examples/c/switch_network_groups_manually_example/switch_network_groups_manually_example.c b/hailort/libhailort/examples/c/switch_network_groups_manually_example/switch_network_groups_manually_example.c
index 07bc2c7..7c14532 100644
--- a/hailort/libhailort/examples/c/switch_network_groups_manually_example/switch_network_groups_manually_example.c
+++ b/hailort/libhailort/examples/c/switch_network_groups_manually_example/switch_network_groups_manually_example.c
@@ -176,6 +176,7 @@ int main()
     hailo_thread output_vstream_thread = {0};
     input_vstream_thread_args_t input_args = {0};
     output_vstream_thread_args_t output_args = {0};
+    bool unused = {0};
 
     char HEF_FILES[HEF_COUNT][250] = {"hefs/shortcut_net.hef","hefs/shortcut_net.hef"};
 
@@ -201,13 +202,13 @@ int main()
             "Unexpected network group size");
 
         // Mae sure each hef is single input single output
-        status = hailo_make_input_vstream_params(network_groups[hef_index], true, HAILO_FORMAT_TYPE_AUTO,
+        status = hailo_make_input_vstream_params(network_groups[hef_index], unused, HAILO_FORMAT_TYPE_AUTO,
             &input_vstream_params[hef_index], &input_vstream_size);
         REQUIRE_SUCCESS(status, l_release_hef, "Failed making input virtual stream params");
         REQUIRE_ACTION(input_vstream_size == 1, status = HAILO_INVALID_ARGUMENT, l_release_hef,
             "INVALID HEF - Only hefs with single input vstream are allowed");
 
-        status = hailo_make_output_vstream_params(network_groups[hef_index], true, HAILO_FORMAT_TYPE_AUTO,
+        status = hailo_make_output_vstream_params(network_groups[hef_index], unused, HAILO_FORMAT_TYPE_AUTO,
             &output_vstream_params[hef_index], &output_vstream_size);
         REQUIRE_SUCCESS(status, l_release_hef, "Failed making output virtual stream params");
         REQUIRE_ACTION(output_vstream_size == 1, status = HAILO_INVALID_ARGUMENT, l_release_hef,
diff --git a/hailort/libhailort/examples/c/vstreams_example/CMakeLists.txt b/hailort/libhailort/examples/c/vstreams_example/CMakeLists.txt
index a7fa785..3eea54f 100644
--- a/hailort/libhailort/examples/c/vstreams_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/c/vstreams_example/CMakeLists.txt
@@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.0.0)
 set(THREADS_PREFER_PTHREAD_FLAG ON)
 find_package(Threads REQUIRED)
 
-find_package(HailoRT 4.15.0 EXACT REQUIRED)
+find_package(HailoRT 4.16.0 EXACT REQUIRED)
 
 SET_SOURCE_FILES_PROPERTIES(vstreams_example.c PROPERTIES LANGUAGE C)
 
diff --git a/hailort/libhailort/examples/c/vstreams_example/vstreams_example.c b/hailort/libhailort/examples/c/vstreams_example/vstreams_example.c
index 6338492..30ce1a5 100644
--- a/hailort/libhailort/examples/c/vstreams_example/vstreams_example.c
+++ b/hailort/libhailort/examples/c/vstreams_example/vstreams_example.c
@@ -137,7 +137,7 @@ int main()
     size_t output_vstreams_size = MAX_EDGE_LAYERS;
     hailo_input_vstream input_vstreams[MAX_EDGE_LAYERS] = {NULL};
     hailo_output_vstream output_vstreams[MAX_EDGE_LAYERS] = {NULL};
-    bool quantized = true;
+    bool unused = {0};
 
     status = hailo_create_vdevice(NULL, &vdevice);
     REQUIRE_SUCCESS(status, l_exit, "Failed to create vdevice");
@@ -154,9 +154,8 @@ int main()
         "Invalid network group size");
 
 
-    // Set input format type to auto, and mark the data as quantized - libhailort will not scale the data before writing to the HW
-    quantized = true;
-    status = hailo_make_input_vstream_params(network_group, quantized, HAILO_FORMAT_TYPE_AUTO,
+    // Set input format type to auto - libhailort will not scale the data before writing to the HW
+    status = hailo_make_input_vstream_params(network_group, unused, HAILO_FORMAT_TYPE_AUTO,
         input_vstream_params, &input_vstreams_size);
     REQUIRE_SUCCESS(status, l_release_hef, "Failed making input virtual stream params");
 
@@ -166,10 +165,9 @@ int main()
         input_vstream_params[i].params.user_buffer_format.order = HAILO_FORMAT_ORDER_NCHW;
     }
 
-    // Set output format type to float32, and mark the data as not quantized - libhailort will de-quantize the data after reading from the HW
+    // Set output format type to float32 - libhailort will de-quantize the data after reading from the HW
     // Note: this process might affect the overall performance
-    quantized = false;
-    status = hailo_make_output_vstream_params(network_group, quantized, HAILO_FORMAT_TYPE_FLOAT32,
+    status = hailo_make_output_vstream_params(network_group, unused, HAILO_FORMAT_TYPE_FLOAT32,
         output_vstream_params, &output_vstreams_size);
     REQUIRE_SUCCESS(status, l_release_hef, "Failed making output virtual stream params");
 
diff --git a/hailort/libhailort/examples/cpp/CMakeLists.txt b/hailort/libhailort/examples/cpp/CMakeLists.txt
index 66e3171..a39e286 100644
--- a/hailort/libhailort/examples/cpp/CMakeLists.txt
+++ b/hailort/libhailort/examples/cpp/CMakeLists.txt
@@ -2,6 +2,8 @@ cmake_minimum_required(VERSION 3.0.0)
 
 add_subdirectory(vstreams_example)
 add_subdirectory(infer_pipeline_example)
+add_subdirectory(async_infer_example)
+add_subdirectory(async_infer_functionality_example)
 add_subdirectory(raw_streams_example)
 add_subdirectory(multi_network_vstream_example)
 add_subdirectory(switch_network_groups_example)
@@ -15,6 +17,8 @@ add_subdirectory(notification_callback_example)
 set(CPP_EXAMPLE_TARGETS
     cpp_vstreams_example
     cpp_infer_pipeline_example
+    cpp_async_infer_example
+    cpp_async_infer_functionality_example
     cpp_raw_streams_example
     cpp_multi_network_vstream_example
     cpp_switch_network_groups_example
diff --git a/hailort/libhailort/examples/cpp/async_infer_example/CMakeLists.txt b/hailort/libhailort/examples/cpp/async_infer_example/CMakeLists.txt
new file mode 100644
index 0000000..9335a99
--- /dev/null
+++ b/hailort/libhailort/examples/cpp/async_infer_example/CMakeLists.txt
@@ -0,0 +1,16 @@
+cmake_minimum_required(VERSION 3.0.0)
+
+find_package(HailoRT 4.16.0 EXACT REQUIRED)
+
+add_executable(cpp_async_infer_example async_infer_example.cpp)
+target_link_libraries(cpp_async_infer_example PRIVATE HailoRT::libhailort)
+
+if(WIN32)
+    target_compile_options(cpp_async_infer_example PRIVATE
+        /DWIN32_LEAN_AND_MEAN
+        /DNOMINMAX                  # NOMINMAX is required in order to play nice with std::min/std::max (otherwise Windows.h defines it's own)
+        /wd4201 /wd4251
+    )
+endif()
+
+set_target_properties(cpp_async_infer_example PROPERTIES CXX_STANDARD 14)
diff --git a/hailort/libhailort/examples/cpp/async_infer_example/async_infer_example.cpp b/hailort/libhailort/examples/cpp/async_infer_example/async_infer_example.cpp
new file mode 100644
index 0000000..30c744d
--- /dev/null
+++ b/hailort/libhailort/examples/cpp/async_infer_example/async_infer_example.cpp
@@ -0,0 +1,93 @@
+/**
+ * Copyright (c) 2020-2023 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+ **/
+/**
+ * @file async_infer_example.cpp
+ * This example demonstrates the Async Infer API usage and assumes the model has only one input and output.
+ **/
+
+#include "hailo/hailort.hpp"
+
+#include <iostream>
+
+#if defined(__unix__)
+#include <sys/mman.h>
+#endif
+
+#define HEF_FILE ("hefs/shortcut_net.hef")
+
+using namespace hailort;
+
+static std::shared_ptr<uint8_t> page_aligned_alloc(size_t size)
+{
+#if defined(__unix__)
+    auto addr = mmap(NULL, size, PROT_WRITE | PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+    if (MAP_FAILED == addr) throw std::bad_alloc();
+    return std::shared_ptr<uint8_t>(reinterpret_cast<uint8_t*>(addr), [size](void *addr) { munmap(addr, size); });
+#elif defined(_MSC_VER)
+    auto addr = VirtualAlloc(NULL, size, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
+    if (!addr) throw std::bad_alloc();
+    return std::shared_ptr<uint8_t>(reinterpret_cast<uint8_t*>(addr), [](void *addr){ VirtualFree(addr, 0, MEM_RELEASE); });
+#else
+#pragma error("Aligned alloc not supported")
+#endif
+}
+
+int main()
+{
+    auto vdevice = VDevice::create();
+    if (!vdevice) {
+        std::cerr << "Failed create vdevice, status = " << vdevice.status() << std::endl;
+        return vdevice.status();
+    }
+
+    auto infer_model_exp = vdevice.value()->create_infer_model(HEF_FILE);
+    if (!infer_model_exp) {
+        std::cerr << "Failed to create infer model, status = " << infer_model_exp.status() << std::endl;
+        return infer_model_exp.status();
+    }
+    auto infer_model = infer_model_exp.release();
+
+    auto configured_infer_model = infer_model->configure();
+    if (!configured_infer_model) {
+        std::cerr << "Failed to create configured infer model, status = " << configured_infer_model.status() << std::endl;
+        return configured_infer_model.status();
+    }
+
+    auto bindings = configured_infer_model->create_bindings();
+    if (!bindings) {
+        std::cerr << "Failed to create infer bindings, status = " << bindings.status() << std::endl;
+        return bindings.status();
+    }
+
+    size_t input_frame_size = infer_model->input()->get_frame_size();
+    auto input_buffer = page_aligned_alloc(input_frame_size);
+    auto status = bindings->input()->set_buffer(MemoryView(input_buffer.get(), input_frame_size));
+    if (HAILO_SUCCESS != status) {
+        std::cerr << "Failed to set infer input buffer, status = " << status << std::endl;
+        return status;
+    }
+
+    size_t output_frame_size = infer_model->output()->get_frame_size();
+    auto output_buffer = page_aligned_alloc(output_frame_size);
+    status = bindings->output()->set_buffer(MemoryView(output_buffer.get(), output_frame_size));
+    if (HAILO_SUCCESS != status) {
+        std::cerr << "Failed to set infer input buffer, status = " << status << std::endl;
+        return status;
+    }
+
+    auto job = configured_infer_model->run_async(bindings.value());
+    if (!job) {
+        std::cerr << "Failed to start async infer job, status = " << job.status() << std::endl;
+        return job.status();
+    }
+
+    status = job->wait(std::chrono::milliseconds(1000));
+    if (HAILO_SUCCESS != status) {
+        std::cerr << "Failed to wait for infer to finish, status = " << status << std::endl;
+        return status;
+    }
+    
+    return HAILO_SUCCESS;
+}
diff --git a/hailort/libhailort/examples/cpp/async_infer_functionality_example/CMakeLists.txt b/hailort/libhailort/examples/cpp/async_infer_functionality_example/CMakeLists.txt
new file mode 100644
index 0000000..bbc6736
--- /dev/null
+++ b/hailort/libhailort/examples/cpp/async_infer_functionality_example/CMakeLists.txt
@@ -0,0 +1,16 @@
+cmake_minimum_required(VERSION 3.0.0)
+
+find_package(HailoRT 4.16.0 EXACT REQUIRED)
+
+add_executable(cpp_async_infer_functionality_example async_infer_functionality_example.cpp)
+target_link_libraries(cpp_async_infer_functionality_example PRIVATE HailoRT::libhailort)
+
+if(WIN32)
+    target_compile_options(cpp_async_infer_functionality_example PRIVATE
+        /DWIN32_LEAN_AND_MEAN
+        /DNOMINMAX                  # NOMINMAX is required in order to play nice with std::min/std::max (otherwise Windows.h defines it's own)
+        /wd4201 /wd4251
+    )
+endif()
+
+set_target_properties(cpp_async_infer_functionality_example PROPERTIES CXX_STANDARD 14)
diff --git a/hailort/libhailort/examples/cpp/async_infer_functionality_example/async_infer_functionality_example.cpp b/hailort/libhailort/examples/cpp/async_infer_functionality_example/async_infer_functionality_example.cpp
new file mode 100644
index 0000000..e2a5e22
--- /dev/null
+++ b/hailort/libhailort/examples/cpp/async_infer_functionality_example/async_infer_functionality_example.cpp
@@ -0,0 +1,129 @@
+/**
+ * Copyright (c) 2020-2023 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+ **/
+/**
+ * @file async_infer_functionality_example.cpp
+ * This example demonstrates the Async Infer API usage with a specific model with multiple inputs and outputs
+ * and changes configutrations of the streams.
+ **/
+
+#include "hailo/hailort.hpp"
+
+#include <iostream>
+
+#if defined(__unix__)
+#include <sys/mman.h>
+#endif
+
+#define FRAMES_COUNT (100)
+
+using namespace hailort;
+
+static std::shared_ptr<uint8_t> page_aligned_alloc(size_t size)
+{
+#if defined(__unix__)
+    auto addr = mmap(NULL, size, PROT_WRITE | PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+    if (MAP_FAILED == addr) throw std::bad_alloc();
+    return std::shared_ptr<uint8_t>(reinterpret_cast<uint8_t*>(addr), [size](void *addr) { munmap(addr, size); });
+#elif defined(_MSC_VER)
+    auto addr = VirtualAlloc(NULL, size, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
+    if (!addr) throw std::bad_alloc();
+    return std::shared_ptr<uint8_t>(reinterpret_cast<uint8_t*>(addr), [](void *addr){ VirtualFree(addr, 0, MEM_RELEASE); });
+#else
+#pragma error("Aligned alloc not supported")
+#endif
+}
+
+int main()
+{
+    auto vdevice = VDevice::create();
+    if (!vdevice) {
+        std::cerr << "Failed create vdevice, status = " << vdevice.status() << std::endl;
+        return vdevice.status();
+    }
+
+    auto infer_model_exp = vdevice.value()->create_infer_model("hefs/multi_network_shortcut_net.hef");
+    if (!infer_model_exp) {
+        std::cerr << "Failed to create infer model, status = " << infer_model_exp.status() << std::endl;
+        return infer_model_exp.status();
+    }
+    auto infer_model = infer_model_exp.release();
+
+    infer_model->input("multi_network_shortcut_net_scope1/input_layer_0")->set_format_type(HAILO_FORMAT_TYPE_FLOAT32);
+    infer_model->output("multi_network_shortcut_net_scope1/shortcut0")->set_format_type(HAILO_FORMAT_TYPE_FLOAT32);
+    infer_model->input("multi_network_shortcut_net_scope2/input_layer_1")->set_format_type(HAILO_FORMAT_TYPE_FLOAT32);
+    infer_model->output("multi_network_shortcut_net_scope2/shortcut1")->set_format_type(HAILO_FORMAT_TYPE_FLOAT32);
+
+    auto configured_infer_model = infer_model->configure();
+    if (!configured_infer_model) {
+        std::cerr << "Failed to create configured infer model, status = " << configured_infer_model.status() << std::endl;
+        return configured_infer_model.status();
+    }
+
+    // We store buffers vector here as a guard for the memory. The buffer will be freed only after
+    // configured_infer_model will be released.
+    std::vector<std::shared_ptr<uint8_t>> buffer_guards;
+
+    auto bindings = configured_infer_model->create_bindings();
+    if (!bindings) {
+        std::cerr << "Failed to create infer bindings, status = " << bindings.status() << std::endl;
+        return bindings.status();
+    }
+
+    for (const auto &input_name : infer_model->get_input_names()) {
+        size_t input_frame_size = infer_model->input(input_name)->get_frame_size();
+        auto input_buffer = page_aligned_alloc(input_frame_size);
+        auto status = bindings->input(input_name)->set_buffer(MemoryView(input_buffer.get(), input_frame_size));
+        if (HAILO_SUCCESS != status) {
+            std::cerr << "Failed to set infer input buffer, status = " << status << std::endl;
+            return status;
+        }
+
+        buffer_guards.push_back(input_buffer);
+    }
+
+    for (const auto &output_name : infer_model->get_output_names()) {
+        size_t output_frame_size = infer_model->output(output_name)->get_frame_size();
+        auto output_buffer = page_aligned_alloc(output_frame_size);
+        auto status = bindings->output(output_name)->set_buffer(MemoryView(output_buffer.get(), output_frame_size));
+        if (HAILO_SUCCESS != status) {
+            std::cerr << "Failed to set infer output buffer, status = " << status << std::endl;
+            return status;
+        }
+
+        buffer_guards.push_back(output_buffer);
+    }
+
+    AsyncInferJob last_infer_job;
+    for (uint32_t i = 0; i < FRAMES_COUNT; i++) {
+        // Waiting for available requests in the pipeline
+        auto status = configured_infer_model->wait_for_async_ready(std::chrono::milliseconds(1000));
+        if (HAILO_SUCCESS != status) {
+            std::cerr << "Failed to wait for async ready, status = " << status << std::endl;
+            return status;
+        }
+
+        auto job = configured_infer_model->run_async(bindings.value(), [] (const AsyncInferCompletionInfo &/*completion_info*/) {
+            // Use completion_info to get the job status and the corresponding bindings
+        });
+        if (!job) {
+            std::cerr << "Failed to start async infer job, status = " << job.status() << std::endl;
+            return job.status();
+        }
+        job->detach();
+
+        if (i == FRAMES_COUNT - 1) {
+            last_infer_job = job.release();
+        }
+    }
+
+    // Wait for last infer to finish
+    auto status = last_infer_job.wait(std::chrono::milliseconds(1000));
+    if (HAILO_SUCCESS != status) {
+        std::cerr << "Failed to wait for infer to finish, status = " << status << std::endl;
+        return status;
+    }
+    
+    return HAILO_SUCCESS;
+}
diff --git a/hailort/libhailort/examples/cpp/infer_pipeline_example/CMakeLists.txt b/hailort/libhailort/examples/cpp/infer_pipeline_example/CMakeLists.txt
index bbaa826..1ffe855 100644
--- a/hailort/libhailort/examples/cpp/infer_pipeline_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/cpp/infer_pipeline_example/CMakeLists.txt
@@ -1,6 +1,6 @@
 cmake_minimum_required(VERSION 3.0.0)
 
-find_package(HailoRT 4.15.0 EXACT REQUIRED)
+find_package(HailoRT 4.16.0 EXACT REQUIRED)
 
 add_executable(cpp_infer_pipeline_example infer_pipeline_example.cpp)
 target_link_libraries(cpp_infer_pipeline_example PRIVATE HailoRT::libhailort)
diff --git a/hailort/libhailort/examples/cpp/infer_pipeline_example/infer_pipeline_example.cpp b/hailort/libhailort/examples/cpp/infer_pipeline_example/infer_pipeline_example.cpp
index 02d7d13..2700e02 100644
--- a/hailort/libhailort/examples/cpp/infer_pipeline_example/infer_pipeline_example.cpp
+++ b/hailort/libhailort/examples/cpp/infer_pipeline_example/infer_pipeline_example.cpp
@@ -109,13 +109,13 @@ int main(int argc, char **argv)
         return network_group.status();
     }
 
-    auto input_params = network_group.value()->make_input_vstream_params(true, FORMAT_TYPE, HAILO_DEFAULT_VSTREAM_TIMEOUT_MS, HAILO_DEFAULT_VSTREAM_QUEUE_SIZE);
+    auto input_params = network_group.value()->make_input_vstream_params({}, FORMAT_TYPE, HAILO_DEFAULT_VSTREAM_TIMEOUT_MS, HAILO_DEFAULT_VSTREAM_QUEUE_SIZE);
     if (!input_params) {
         std::cerr << "Failed make_input_vstream_params " << input_params.status() << std::endl;
         return input_params.status();
     }
 
-    auto output_params = network_group.value()->make_output_vstream_params(true, FORMAT_TYPE, HAILO_DEFAULT_VSTREAM_TIMEOUT_MS, HAILO_DEFAULT_VSTREAM_QUEUE_SIZE);
+    auto output_params = network_group.value()->make_output_vstream_params({}, FORMAT_TYPE, HAILO_DEFAULT_VSTREAM_TIMEOUT_MS, HAILO_DEFAULT_VSTREAM_QUEUE_SIZE);
     if (!output_params) {
         std::cerr << "Failed make_output_vstream_params " << output_params.status() << std::endl;
         return output_params.status();
diff --git a/hailort/libhailort/examples/cpp/multi_device_example/CMakeLists.txt b/hailort/libhailort/examples/cpp/multi_device_example/CMakeLists.txt
index 0124401..13cf2f3 100644
--- a/hailort/libhailort/examples/cpp/multi_device_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/cpp/multi_device_example/CMakeLists.txt
@@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.0.0)
 set(THREADS_PREFER_PTHREAD_FLAG ON)
 find_package(Threads REQUIRED)
 
-find_package(HailoRT 4.15.0 EXACT REQUIRED)
+find_package(HailoRT 4.16.0 EXACT REQUIRED)
 
 add_executable(cpp_multi_device_example multi_device_example.cpp)
 target_link_libraries(cpp_multi_device_example PRIVATE HailoRT::libhailort Threads::Threads)
diff --git a/hailort/libhailort/examples/cpp/multi_device_example/multi_device_example.cpp b/hailort/libhailort/examples/cpp/multi_device_example/multi_device_example.cpp
index 5bb6b47..ccf7d0e 100644
--- a/hailort/libhailort/examples/cpp/multi_device_example/multi_device_example.cpp
+++ b/hailort/libhailort/examples/cpp/multi_device_example/multi_device_example.cpp
@@ -16,7 +16,6 @@
 #define HEF_FILE ("hefs/shortcut_net.hef")
 constexpr size_t BATCH_SIZE = 1;
 constexpr size_t FRAMES_COUNT = 100;
-constexpr bool QUANTIZED = true;
 constexpr hailo_format_type_t FORMAT_TYPE = HAILO_FORMAT_TYPE_AUTO;
 constexpr size_t MAX_LAYER_EDGES = 16;
 
@@ -166,7 +165,7 @@ int main()
         return network_group.status();
     }
 
-    auto vstreams = VStreamsBuilder::create_vstreams(*network_group.value(), QUANTIZED, FORMAT_TYPE);
+    auto vstreams = VStreamsBuilder::create_vstreams(*network_group.value(), {}, FORMAT_TYPE);
     if (!vstreams) {
         std::cerr << "Failed creating vstreams " << vstreams.status() << std::endl;
         return vstreams.status();
diff --git a/hailort/libhailort/examples/cpp/multi_network_vstream_example/CMakeLists.txt b/hailort/libhailort/examples/cpp/multi_network_vstream_example/CMakeLists.txt
index 8374a42..d5c93a4 100644
--- a/hailort/libhailort/examples/cpp/multi_network_vstream_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/cpp/multi_network_vstream_example/CMakeLists.txt
@@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.0.0)
 find_package(Threads REQUIRED)
 set(THREADS_PREFER_PTHREAD_FLAG ON)
 
-find_package(HailoRT 4.15.0 EXACT REQUIRED)
+find_package(HailoRT 4.16.0 EXACT REQUIRED)
 
 add_executable(cpp_multi_network_vstream_example multi_network_vstream_example.cpp)
 target_link_libraries(cpp_multi_network_vstream_example PRIVATE HailoRT::libhailort Threads::Threads)
diff --git a/hailort/libhailort/examples/cpp/multi_network_vstream_example/multi_network_vstream_example.cpp b/hailort/libhailort/examples/cpp/multi_network_vstream_example/multi_network_vstream_example.cpp
index 078bba2..3a1d69d 100644
--- a/hailort/libhailort/examples/cpp/multi_network_vstream_example/multi_network_vstream_example.cpp
+++ b/hailort/libhailort/examples/cpp/multi_network_vstream_example/multi_network_vstream_example.cpp
@@ -15,7 +15,6 @@
 
 #define HEF_FILE ("hefs/multi_network_shortcut_net.hef")
 constexpr size_t INFER_FRAME_COUNT = 100;
-constexpr bool QUANTIZED = true;
 constexpr hailo_format_type_t FORMAT_TYPE = HAILO_FORMAT_TYPE_AUTO;
 constexpr size_t MAX_LAYER_EDGES = 16;
 constexpr size_t NET_GROUPS_COUNT = 1;
@@ -81,7 +80,7 @@ Expected<std::map<std::string, InOutVStreams>> create_vstreams_per_network(Confi
     // Create vstreams for each network
     std::map<std::string, InOutVStreams> networks_vstreams;
     for (auto &network_info : networks_infos) {
-        auto vstreams = VStreamsBuilder::create_vstreams(net_group, QUANTIZED, FORMAT_TYPE, network_info.name);
+        auto vstreams = VStreamsBuilder::create_vstreams(net_group, {}, FORMAT_TYPE, network_info.name);
         if (!vstreams) {
             std::cerr << "Failed to create vstreams for network " << network_info.name << std::endl;
             return make_unexpected(vstreams.status());
diff --git a/hailort/libhailort/examples/cpp/multi_process_example/CMakeLists.txt b/hailort/libhailort/examples/cpp/multi_process_example/CMakeLists.txt
index b5257bb..100fb66 100644
--- a/hailort/libhailort/examples/cpp/multi_process_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/cpp/multi_process_example/CMakeLists.txt
@@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.0.0)
 set(THREADS_PREFER_PTHREAD_FLAG ON)
 find_package(Threads REQUIRED)
 
-find_package(HailoRT 4.15.0 EXACT REQUIRED)
+find_package(HailoRT 4.16.0 EXACT REQUIRED)
 
 add_executable(cpp_multi_process_example multi_process_example.cpp)
 target_link_libraries(cpp_multi_process_example PRIVATE HailoRT::libhailort Threads::Threads)
diff --git a/hailort/libhailort/examples/cpp/multi_process_example/multi_process_example.cpp b/hailort/libhailort/examples/cpp/multi_process_example/multi_process_example.cpp
index 932cb66..891f034 100644
--- a/hailort/libhailort/examples/cpp/multi_process_example/multi_process_example.cpp
+++ b/hailort/libhailort/examples/cpp/multi_process_example/multi_process_example.cpp
@@ -17,7 +17,6 @@
 
 
 constexpr size_t FRAMES_COUNT = 100;
-constexpr bool QUANTIZED = true;
 constexpr hailo_format_type_t FORMAT_TYPE = HAILO_FORMAT_TYPE_AUTO;
 constexpr size_t MAX_LAYER_EDGES = 16;
 constexpr uint32_t DEVICE_COUNT = 1;
@@ -156,7 +155,7 @@ int main(int argc, char **argv)
         return network_group.status();
     }
 
-    auto vstreams = VStreamsBuilder::create_vstreams(*network_group.value(), QUANTIZED, FORMAT_TYPE);
+    auto vstreams = VStreamsBuilder::create_vstreams(*network_group.value(), {}, FORMAT_TYPE);
     if (!vstreams) {
         std::cerr << "Failed creating vstreams " << vstreams.status() << std::endl;
         return vstreams.status();
diff --git a/hailort/libhailort/examples/cpp/notification_callback_example/CMakeLists.txt b/hailort/libhailort/examples/cpp/notification_callback_example/CMakeLists.txt
index e01b717..d123da9 100644
--- a/hailort/libhailort/examples/cpp/notification_callback_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/cpp/notification_callback_example/CMakeLists.txt
@@ -1,6 +1,6 @@
 cmake_minimum_required(VERSION 3.0.0)
 
-find_package(HailoRT 4.15.0 EXACT REQUIRED)
+find_package(HailoRT 4.16.0 EXACT REQUIRED)
 
 add_executable(cpp_notification_callback_example notification_callback_example.cpp)
 target_link_libraries(cpp_notification_callback_example PRIVATE HailoRT::libhailort)
diff --git a/hailort/libhailort/examples/cpp/power_measurement_example/CMakeLists.txt b/hailort/libhailort/examples/cpp/power_measurement_example/CMakeLists.txt
index 2db7d96..b6ebeb0 100644
--- a/hailort/libhailort/examples/cpp/power_measurement_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/cpp/power_measurement_example/CMakeLists.txt
@@ -1,6 +1,6 @@
 cmake_minimum_required(VERSION 3.0.0)
 
-find_package(HailoRT 4.15.0 EXACT REQUIRED)
+find_package(HailoRT 4.16.0 EXACT REQUIRED)
 
 add_executable(cpp_power_measurement_example power_measurement_example.cpp)
 target_link_libraries(cpp_power_measurement_example PRIVATE HailoRT::libhailort)
diff --git a/hailort/libhailort/examples/cpp/raw_async_streams_multi_thread_example/CMakeLists.txt b/hailort/libhailort/examples/cpp/raw_async_streams_multi_thread_example/CMakeLists.txt
index db6e185..428d8c1 100644
--- a/hailort/libhailort/examples/cpp/raw_async_streams_multi_thread_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/cpp/raw_async_streams_multi_thread_example/CMakeLists.txt
@@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.0.0)
 set(THREADS_PREFER_PTHREAD_FLAG ON)
 find_package(Threads REQUIRED)
 
-find_package(HailoRT 4.15.0 EXACT REQUIRED)
+find_package(HailoRT 4.16.0 EXACT REQUIRED)
 
 add_executable(cpp_raw_async_streams_multi_thread_example raw_async_streams_multi_thread_example.cpp)
 target_link_libraries(cpp_raw_async_streams_multi_thread_example PRIVATE HailoRT::libhailort Threads::Threads)
diff --git a/hailort/libhailort/examples/cpp/raw_async_streams_multi_thread_example/raw_async_streams_multi_thread_example.cpp b/hailort/libhailort/examples/cpp/raw_async_streams_multi_thread_example/raw_async_streams_multi_thread_example.cpp
index c423a3a..cbc99fa 100644
--- a/hailort/libhailort/examples/cpp/raw_async_streams_multi_thread_example/raw_async_streams_multi_thread_example.cpp
+++ b/hailort/libhailort/examples/cpp/raw_async_streams_multi_thread_example/raw_async_streams_multi_thread_example.cpp
@@ -86,43 +86,21 @@ static void input_async_callback(const InputStream::CompletionInfo &completion_i
     }
 }
 
-int main()
+static hailo_status infer(ConfiguredNetworkGroup &network_group)
 {
-    auto device = Device::create();
-    if (!device) {
-        std::cerr << "Failed create device " << device.status() << std::endl;
-        return EXIT_FAILURE;
-    }
-
-    static const auto HEF_FILE = "hefs/shortcut_net.hef";
-    auto network_group = configure_network_group(*device.value(), HEF_FILE);
-    if (!network_group) {
-        std::cerr << "Failed to configure network group " << HEF_FILE << std::endl;
-        return EXIT_FAILURE;
-    }
-
     // Assume one input and output
-    auto &output = network_group->get()->get_output_streams()[0].get();
-    auto &input = network_group->get()->get_input_streams()[0].get();
+    auto &output = network_group.get_output_streams()[0].get();
+    auto &input = network_group.get_input_streams()[0].get();
 
     // Allocate buffers. The buffers sent to the async API must be page aligned.
     // For simplicity, in this example, we pass one buffer for each stream (It may be problematic in output since the
     // buffer will be overridden on each read).
-    // Note - the buffers are allocated before we activate the network group. This will ensure that they won't be freed
-    // until the network group will become inactive.
+    // Note - the buffers can be freed only after all callbacks are called. The user can either wait for all
+    // callbacks, or as done in this example, call ConfiguredNetworkGroup::shutdown that will make sure all callbacks
+    // are called.
     auto output_buffer = page_aligned_alloc(output.get_frame_size());
     auto input_buffer = page_aligned_alloc(input.get_frame_size());
 
-    // The destructor of activated_network_group will make sure that all async operations are done. All pending
-    // operations will be canceled and their callbacks will be called with status=HAILO_STREAM_ABORTED_BY_USER.
-    // Be sure to capture variables in the callbacks that will be destructed after the activated_network_group.
-    // Otherwise, the lambda would have access an uninitialized data.
-    auto activated_network_group = network_group.value()->activate();
-    if (!activated_network_group) {
-        std::cerr << "Failed to activate network group "  << activated_network_group.status() << std::endl;
-        return EXIT_FAILURE;
-    }
-
     std::atomic<hailo_status> output_status(HAILO_UNINITIALIZED);
     std::thread output_thread([&]() {
         while (true) {
@@ -148,14 +126,47 @@ int main()
     // After all async operations are launched, the inference is running.
     std::this_thread::sleep_for(std::chrono::seconds(5));
 
-    // Make it stop. We explicitly destruct activated_network_group to stop all async I/O.
-    activated_network_group->reset();
+    // Calling shutdown on a network group will ensure that all async operations are done. All pending
+    // operations will be canceled and their callbacks will be called with status=HAILO_STREAM_ABORTED_BY_USER.
+    // Only after the shutdown is called, we can safely free the buffers and any variable captured inside the async
+    // callback lambda.
+    network_group.shutdown();
 
-    // Thread should be stopped with HAILO_STREAM_NOT_ACTIVATED status.
+    // Thread should be stopped with HAILO_STREAM_ABORTED_BY_USER status.
     output_thread.join();
     input_thread.join();
-    if ((HAILO_STREAM_NOT_ACTIVATED != output_status) || (HAILO_STREAM_NOT_ACTIVATED != input_status)) {
+
+    if ((HAILO_STREAM_ABORTED_BY_USER != output_status) || (HAILO_STREAM_ABORTED_BY_USER != input_status)) {
         std::cerr << "Got unexpected statues from thread: " << output_status << ", " << input_status << std::endl;
+        return HAILO_INTERNAL_FAILURE;
+    }
+
+    return HAILO_SUCCESS;
+}
+
+int main()
+{
+    auto device = Device::create();
+    if (!device) {
+        std::cerr << "Failed create device " << device.status() << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    static const auto HEF_FILE = "hefs/shortcut_net.hef";
+    auto network_group = configure_network_group(*device.value(), HEF_FILE);
+    if (!network_group) {
+        std::cerr << "Failed to configure network group " << HEF_FILE << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    auto activated_network_group = network_group.value()->activate();
+    if (!activated_network_group) {
+        std::cerr << "Failed to activate network group "  << activated_network_group.status() << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    auto status = infer(*network_group.value());
+    if (HAILO_SUCCESS != status) {
         return EXIT_FAILURE;
     }
 
diff --git a/hailort/libhailort/examples/cpp/raw_async_streams_single_thread_example/CMakeLists.txt b/hailort/libhailort/examples/cpp/raw_async_streams_single_thread_example/CMakeLists.txt
index ce8dc32..3d20fcc 100644
--- a/hailort/libhailort/examples/cpp/raw_async_streams_single_thread_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/cpp/raw_async_streams_single_thread_example/CMakeLists.txt
@@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.0.0)
 set(THREADS_PREFER_PTHREAD_FLAG ON)
 find_package(Threads REQUIRED)
 
-find_package(HailoRT 4.15.0 EXACT REQUIRED)
+find_package(HailoRT 4.16.0 EXACT REQUIRED)
 
 add_executable(cpp_raw_async_streams_single_thread_example raw_async_streams_single_thread_example.cpp)
 target_link_libraries(cpp_raw_async_streams_single_thread_example PRIVATE HailoRT::libhailort Threads::Threads)
diff --git a/hailort/libhailort/examples/cpp/raw_async_streams_single_thread_example/raw_async_streams_single_thread_example.cpp b/hailort/libhailort/examples/cpp/raw_async_streams_single_thread_example/raw_async_streams_single_thread_example.cpp
index 219b2da..e402a8d 100644
--- a/hailort/libhailort/examples/cpp/raw_async_streams_single_thread_example/raw_async_streams_single_thread_example.cpp
+++ b/hailort/libhailort/examples/cpp/raw_async_streams_single_thread_example/raw_async_streams_single_thread_example.cpp
@@ -36,8 +36,12 @@ static AlignedBuffer page_aligned_alloc(size_t size)
 #endif
 }
 
-static hailo_status infer(ConfiguredNetworkGroup &network_group, InputStream &input, OutputStream &output)
+static hailo_status infer(ConfiguredNetworkGroup &network_group)
 {
+    // Assume one input and output
+    auto &output = network_group.get_output_streams()[0].get();
+    auto &input = network_group.get_input_streams()[0].get();
+
     auto input_queue_size = input.get_async_max_queue_size();
     auto output_queue_size = output.get_async_max_queue_size();
     if (!input_queue_size || !output_queue_size) {
@@ -45,8 +49,10 @@ static hailo_status infer(ConfiguredNetworkGroup &network_group, InputStream &in
         return HAILO_INTERNAL_FAILURE;
     }
 
-    // We store buffers vector here as a guard for the memory. The buffer will be freed only after
-    // activated_network_group will be released.
+    // Allocate buffers. The buffers sent to the async API must be page aligned.
+    // Note - the buffers can be freed only after all callbacks are called. The user can either wait for all
+    // callbacks, or as done in this example, call ConfiguredNetworkGroup::shutdown that will make sure all callbacks
+    // are called.
     std::vector<AlignedBuffer> buffer_guards;
 
     OutputStream::TransferDoneCallback read_done = [&output, &read_done](const OutputStream::CompletionInfo &completion_info) {
@@ -55,7 +61,7 @@ static hailo_status infer(ConfiguredNetworkGroup &network_group, InputStream &in
         case HAILO_SUCCESS:
             // Real applications can forward the buffer to post-process/display. Here we just re-launch new async read.
             status = output.read_async(completion_info.buffer_addr, completion_info.buffer_size, read_done);
-            if ((HAILO_SUCCESS != status) && (HAILO_STREAM_NOT_ACTIVATED != status)) {
+            if ((HAILO_SUCCESS != status) && (HAILO_STREAM_ABORTED_BY_USER != status)) {
                 std::cerr << "Failed read async with status=" << status << std::endl;
             }
             break;
@@ -74,7 +80,7 @@ static hailo_status infer(ConfiguredNetworkGroup &network_group, InputStream &in
             // Real applications may free the buffer and replace it with new buffer ready to be sent. Here we just
             // re-launch new async write.
             status = input.write_async(completion_info.buffer_addr, completion_info.buffer_size, write_done);
-            if ((HAILO_SUCCESS != status) && (HAILO_STREAM_NOT_ACTIVATED != status)) {
+            if ((HAILO_SUCCESS != status) && (HAILO_STREAM_ABORTED_BY_USER != status)) {
                 std::cerr << "Failed read async with status=" << status << std::endl;
             }
             break;
@@ -86,16 +92,6 @@ static hailo_status infer(ConfiguredNetworkGroup &network_group, InputStream &in
         }
     };
 
-    // The destructor of activated_network_group will make sure that all async operations are done. All pending
-    // operations will be canceled and their callbacks will be called with status=HAILO_STREAM_ABORTED_BY_USER.
-    // Be sure to capture variables in the callbacks that will be destructed after the activated_network_group.
-    // Otherwise, the lambda would have access an uninitialized data.
-    auto activated_network_group = network_group.activate();
-    if (!activated_network_group) {
-        std::cerr << "Failed to activate network group "  << activated_network_group.status() << std::endl;
-        return activated_network_group.status();
-    }
-
     // We launch "*output_queue_size" async read operation. On each async callback, we launch a new async read operation.
     for (size_t i = 0; i < *output_queue_size; i++) {
         // Buffers read from async operation must be page aligned.
@@ -122,10 +118,14 @@ static hailo_status infer(ConfiguredNetworkGroup &network_group, InputStream &in
         buffer_guards.emplace_back(buffer);
     }
 
-    // After all async operations are launched, the inference will continue until the activated_network_group
-    // destructor is called.
     std::this_thread::sleep_for(std::chrono::seconds(5));
 
+    // Calling shutdown on a network group will ensure that all async operations are done. All pending
+    // operations will be canceled and their callbacks will be called with status=HAILO_STREAM_ABORTED_BY_USER.
+    // Only after the shutdown is called, we can safely free the buffers and any variable captured inside the async
+    // callback lambda.
+    network_group.shutdown();
+
     return HAILO_SUCCESS;
 }
 
@@ -167,27 +167,29 @@ int main()
     auto device = Device::create();
     if (!device) {
         std::cerr << "Failed to create device " << device.status() << std::endl;
-        return device.status();
+        return EXIT_FAILURE;
     }
 
     static const auto HEF_FILE = "hefs/shortcut_net.hef";
     auto network_group = configure_network_group(*device.value(), HEF_FILE);
     if (!network_group) {
         std::cerr << "Failed to configure network group" << HEF_FILE << std::endl;
-        return network_group.status();
+        return EXIT_FAILURE;
     }
 
-    // Assume one input and output
-    auto output = network_group->get()->get_output_streams()[0];
-    auto input = network_group->get()->get_input_streams()[0];
+    auto activated_network_group = network_group.value()->activate();
+    if (!activated_network_group) {
+        std::cerr << "Failed to activate network group "  << activated_network_group.status() << std::endl;
+        return EXIT_FAILURE;
+    }
 
     // Now start the inference
-    auto status = infer(*network_group.value(), input.get(), output.get());
+    auto status = infer(*network_group.value());
     if (HAILO_SUCCESS != status) {
         std::cerr << "Inference failed with " << status << std::endl;
-        return status;
+        return EXIT_FAILURE;
     }
 
     std::cout << "Inference finished successfully" << std::endl;
-    return HAILO_SUCCESS;
+    return EXIT_SUCCESS;
 }
diff --git a/hailort/libhailort/examples/cpp/raw_streams_example/CMakeLists.txt b/hailort/libhailort/examples/cpp/raw_streams_example/CMakeLists.txt
index 8bd3678..1debea8 100644
--- a/hailort/libhailort/examples/cpp/raw_streams_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/cpp/raw_streams_example/CMakeLists.txt
@@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.0.0)
 set(THREADS_PREFER_PTHREAD_FLAG ON)
 find_package(Threads REQUIRED)
 
-find_package(HailoRT 4.15.0 EXACT REQUIRED)
+find_package(HailoRT 4.16.0 EXACT REQUIRED)
 
 add_executable(cpp_raw_streams_example raw_streams_example.cpp)
 target_link_libraries(cpp_raw_streams_example PRIVATE HailoRT::libhailort Threads::Threads)
diff --git a/hailort/libhailort/examples/cpp/raw_streams_example/raw_streams_example.cpp b/hailort/libhailort/examples/cpp/raw_streams_example/raw_streams_example.cpp
index c780cad..6d6b7ee 100644
--- a/hailort/libhailort/examples/cpp/raw_streams_example/raw_streams_example.cpp
+++ b/hailort/libhailort/examples/cpp/raw_streams_example/raw_streams_example.cpp
@@ -15,7 +15,6 @@
 
 #define HEF_FILE ("hefs/shortcut_net.hef")
 constexpr size_t FRAMES_COUNT = 100;
-constexpr bool QUANTIZED = true;
 constexpr hailo_format_type_t FORMAT_TYPE = HAILO_FORMAT_TYPE_AUTO;
 constexpr size_t MAX_LAYER_EDGES = 16;
 
@@ -48,7 +47,7 @@ Expected<std::shared_ptr<ConfiguredNetworkGroup>> configure_network_group(Device
 
 void write_all(InputStream &input, hailo_status &status)
 {
-    auto transform_context = InputTransformContext::create(input.get_info(), QUANTIZED, FORMAT_TYPE);
+    auto transform_context = InputTransformContext::create(input.get_info(), {}, FORMAT_TYPE);
     if (!transform_context) {
         status = transform_context.status();
         return;
@@ -74,7 +73,7 @@ void write_all(InputStream &input, hailo_status &status)
 
 void read_all(OutputStream &output, hailo_status &status)
 {
-    auto transform_context = OutputTransformContext::create(output.get_info(), QUANTIZED, FORMAT_TYPE);
+    auto transform_context = OutputTransformContext::create(output.get_info(), {}, FORMAT_TYPE);
     if (!transform_context) {
         status = transform_context.status();
         return;
diff --git a/hailort/libhailort/examples/cpp/switch_network_groups_example/CMakeLists.txt b/hailort/libhailort/examples/cpp/switch_network_groups_example/CMakeLists.txt
index 5c0ca8f..4dea0c1 100644
--- a/hailort/libhailort/examples/cpp/switch_network_groups_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/cpp/switch_network_groups_example/CMakeLists.txt
@@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.0.0)
 set(THREADS_PREFER_PTHREAD_FLAG ON)
 find_package(Threads REQUIRED)
 
-find_package(HailoRT 4.15.0 EXACT REQUIRED)
+find_package(HailoRT 4.16.0 EXACT REQUIRED)
 
 add_executable(cpp_switch_network_groups_example switch_network_groups_example.cpp)
 target_link_libraries(cpp_switch_network_groups_example PRIVATE HailoRT::libhailort Threads::Threads)
diff --git a/hailort/libhailort/examples/cpp/switch_network_groups_example/switch_network_groups_example.cpp b/hailort/libhailort/examples/cpp/switch_network_groups_example/switch_network_groups_example.cpp
index b035fef..08d6502 100644
--- a/hailort/libhailort/examples/cpp/switch_network_groups_example/switch_network_groups_example.cpp
+++ b/hailort/libhailort/examples/cpp/switch_network_groups_example/switch_network_groups_example.cpp
@@ -16,7 +16,6 @@
 #include <thread>
 
 
-constexpr bool QUANTIZED = true;
 constexpr hailo_format_type_t FORMAT_TYPE = HAILO_FORMAT_TYPE_AUTO;
 constexpr size_t INFER_FRAME_COUNT = 100;
 constexpr uint32_t DEVICE_COUNT = 1;
@@ -66,7 +65,7 @@ Expected<std::vector<std::pair<std::vector<InputVStream>, std::vector<OutputVStr
     std::vector<std::pair<std::vector<InputVStream>, std::vector<OutputVStream>>> vstreams_per_network_group;
 
     for (auto &network_group : configured_network_groups) {
-        auto vstreams_exp = VStreamsBuilder::create_vstreams(*network_group, QUANTIZED, FORMAT_TYPE);
+        auto vstreams_exp = VStreamsBuilder::create_vstreams(*network_group, {}, FORMAT_TYPE);
         if (!vstreams_exp) {
             return make_unexpected(vstreams_exp.status());
         }
diff --git a/hailort/libhailort/examples/cpp/switch_network_groups_manually_example/CMakeLists.txt b/hailort/libhailort/examples/cpp/switch_network_groups_manually_example/CMakeLists.txt
index 7777a94..fd319e3 100644
--- a/hailort/libhailort/examples/cpp/switch_network_groups_manually_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/cpp/switch_network_groups_manually_example/CMakeLists.txt
@@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.0.0)
 find_package(Threads REQUIRED)
 set(THREADS_PREFER_PTHREAD_FLAG ON)
 
-find_package(HailoRT 4.15.0 EXACT REQUIRED)
+find_package(HailoRT 4.16.0 EXACT REQUIRED)
 
 add_executable(cpp_switch_network_groups_manually_example switch_network_groups_manually_example.cpp)
 target_link_libraries(cpp_switch_network_groups_manually_example PRIVATE HailoRT::libhailort Threads::Threads)
diff --git a/hailort/libhailort/examples/cpp/switch_network_groups_manually_example/switch_network_groups_manually_example.cpp b/hailort/libhailort/examples/cpp/switch_network_groups_manually_example/switch_network_groups_manually_example.cpp
index ddbe2f8..2ace0c0 100644
--- a/hailort/libhailort/examples/cpp/switch_network_groups_manually_example/switch_network_groups_manually_example.cpp
+++ b/hailort/libhailort/examples/cpp/switch_network_groups_manually_example/switch_network_groups_manually_example.cpp
@@ -17,7 +17,6 @@
 #include <thread>
 
 
-constexpr bool QUANTIZED = true;
 constexpr hailo_format_type_t FORMAT_TYPE = HAILO_FORMAT_TYPE_AUTO;
 
 constexpr size_t INFER_FRAME_COUNT = 100;
@@ -151,7 +150,7 @@ void network_group_thread_main(std::shared_ptr<ConfiguredNetworkGroup> network_g
     std::shared_ptr<std::atomic_bool> should_threads_run, hailo_status &status_out)
 {
     // Create VStreams
-    auto vstreams_exp = VStreamsBuilder::create_vstreams(*network_group, QUANTIZED, FORMAT_TYPE);
+    auto vstreams_exp = VStreamsBuilder::create_vstreams(*network_group, {}, FORMAT_TYPE);
     if (!vstreams_exp) {
         std::cerr << "Failed to create vstreams, status = " << vstreams_exp.status() << std::endl;
         status_out = vstreams_exp.status();
diff --git a/hailort/libhailort/examples/cpp/vstreams_example/CMakeLists.txt b/hailort/libhailort/examples/cpp/vstreams_example/CMakeLists.txt
index 7ea2566..ee7a669 100644
--- a/hailort/libhailort/examples/cpp/vstreams_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/cpp/vstreams_example/CMakeLists.txt
@@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.0.0)
 set(THREADS_PREFER_PTHREAD_FLAG ON)
 find_package(Threads REQUIRED)
 
-find_package(HailoRT 4.15.0 EXACT REQUIRED)
+find_package(HailoRT 4.16.0 EXACT REQUIRED)
 
 add_executable(cpp_vstreams_example vstreams_example.cpp)
 target_link_libraries(cpp_vstreams_example PRIVATE HailoRT::libhailort Threads::Threads)
diff --git a/hailort/libhailort/examples/cpp/vstreams_example/vstreams_example.cpp b/hailort/libhailort/examples/cpp/vstreams_example/vstreams_example.cpp
index 5baae18..09d9dfb 100644
--- a/hailort/libhailort/examples/cpp/vstreams_example/vstreams_example.cpp
+++ b/hailort/libhailort/examples/cpp/vstreams_example/vstreams_example.cpp
@@ -138,9 +138,8 @@ int main()
         return network_group.status();
     }
 
-    // Set input format type to auto, and mark the data as quantized - libhailort will not scale the data before writing to the HW
-    bool quantized = true;
-    auto input_vstream_params = network_group.value()->make_input_vstream_params(quantized, HAILO_FORMAT_TYPE_AUTO, HAILO_DEFAULT_VSTREAM_TIMEOUT_MS,
+    // Set input format type to auto - libhailort will not scale the data before writing to the HW
+    auto input_vstream_params = network_group.value()->make_input_vstream_params({}, HAILO_FORMAT_TYPE_AUTO, HAILO_DEFAULT_VSTREAM_TIMEOUT_MS,
         HAILO_DEFAULT_VSTREAM_QUEUE_SIZE);
     if (!input_vstream_params) {
         std::cerr << "Failed creating input vstreams params " << input_vstream_params.status() << std::endl;
@@ -159,10 +158,9 @@ int main()
         return input_vstreams.status();
     }
 
-    // Set output format type to float32, and mark the data as not quantized - libhailort will de-quantize the data after reading from the HW
+    // Set output format type to float32 - libhailort will de-quantize the data after reading from the HW
     // Note: this process might affect the overall performance
-    quantized = false;
-    auto output_vstream_params = network_group.value()->make_output_vstream_params(quantized, HAILO_FORMAT_TYPE_FLOAT32, HAILO_DEFAULT_VSTREAM_TIMEOUT_MS,
+    auto output_vstream_params = network_group.value()->make_output_vstream_params({}, HAILO_FORMAT_TYPE_FLOAT32, HAILO_DEFAULT_VSTREAM_TIMEOUT_MS,
         HAILO_DEFAULT_VSTREAM_QUEUE_SIZE);
     if (!output_vstream_params) {
         std::cerr << "Failed creating output vstreams params " << output_vstream_params.status() << std::endl;
diff --git a/hailort/libhailort/hef.proto b/hailort/libhailort/hef.proto
index 7c231c3..5250493 100644
--- a/hailort/libhailort/hef.proto
+++ b/hailort/libhailort/hef.proto
@@ -50,6 +50,7 @@ enum ProtoHEFExtensionType {
     HAILO_NET_FLOW_YOLOV5_SEG_NMS = 24;
     OUTPUT_SCALE_PER_FEATURE = 25;
     PERIPH_CALCULATION_IN_HAILORT = 26;
+    HAILO_NET_FLOW_YOLOV8_NMS = 27;
     UNUSED = 0XFFFF;
 }
 
@@ -83,12 +84,13 @@ enum ProtoHEFHwArch {
     PROTO__HW_ARCH__HAILO8P = 1;
     PROTO__HW_ARCH__HAILO8R = 2;
     PROTO__HW_ARCH__HAILO8L = 3;
+    PROTO__HW_ARCH__HAILO15H = 103;
+    PROTO__HW_ARCH__HAILO15M = 4;
 
     // Reserving low numbers to public hw archs
     PROTO__HW_ARCH__SAGE_A0 = 100;
     PROTO__HW_ARCH__SAGE_B0 = 101;
     PROTO__HW_ARCH__PAPRIKA_B0 = 102;
-    PROTO__HW_ARCH__HAILO15H = 103;
     PROTO__HW_ARCH__GINGER = 104;
     PROTO__HW_ARCH__LAVENDER = 105;
     PROTO__HW_ARCH__PLUTO = 106;
@@ -245,6 +247,31 @@ message ProtoHEFYoloSegNmsOp {
     double mask_threshold = 5;
 };
 
+message ProtoHEFYolov8BboxDecoder {
+    // Pixels stride for given bbox
+    uint32 stride = 1;
+
+    // Index of the pad connected to the encoded layer in the decoder (reg layer)
+    uint32 reg_pad_index = 2;
+
+    // Index of the pad connected to the classes scores layer in the decoder (cls layer)
+    uint32 cls_pad_index = 3;
+};
+
+message ProtoHEFYolov8NmsOp {
+    // Input image dimensions
+    double image_height = 1;
+    double image_width = 2;
+
+    // Division factor of proposals sent to the NMS per class, instead of running NMS on all proposal together
+    uint32 input_division_factor = 3;
+
+    // List of bbox decoders (anchors) for the NMS layer. Each model has its own number of boxes per anchor
+    repeated ProtoHEFYolov8BboxDecoder bbox_decoders = 4;
+
+    uint32 regression_length = 5;
+};
+
 message ProtoHEFNmsOp {
     // NMS score threshold
     double nms_score_th = 1;
@@ -271,6 +298,7 @@ message ProtoHEFNmsOp {
         ProtoHEFSSDNmsOp ssd_nms_op = 9; // SSD post process
         ProtoHEFIOUNmsOp iou_op = 10; // IoU only
         ProtoHEFYoloSegNmsOp yolo_seg_op = 11; // YOLOv5 seg post process
+        ProtoHEFYolov8NmsOp yolov8_nms_op = 12; // YOLOv8 post process
     }
 };
 
diff --git a/hailort/libhailort/include/hailo/buffer_storage.hpp b/hailort/libhailort/include/hailo/buffer_storage.hpp
index 6a9bd45..ce227e4 100644
--- a/hailort/libhailort/include/hailo/buffer_storage.hpp
+++ b/hailort/libhailort/include/hailo/buffer_storage.hpp
@@ -28,10 +28,13 @@ namespace hailort
 // Forward declarations
 class Device;
 class VDevice;
+class VdmaDevice;
 class BufferStorage;
 class HeapStorage;
 class DmaStorage;
+class UserBufferStorage;
 class HailoRTDriver;
+class Buffer;
 
 namespace vdma {
     class DmaAbleBuffer;
@@ -98,7 +101,8 @@ class HAILORTAPI BufferStorage
 public:
     enum class Type {
         HEAP,
-        DMA
+        DMA,
+        USER_BUFFER
     };
 
     static Expected<BufferStoragePtr> create(size_t size, const BufferStorageParams &params);
@@ -122,12 +126,11 @@ public:
     // - If the mapping is new - true is returned.
     // - If the mapping already exists - false is returned.
     // - Otherwise - Unexpected with a failure status is returned.
+    // Note: This buffer storage must be destroyed before the device it is mapped to is destroyed!
+    //       Failing to do so will lead to unexpected results
+    // TODO: resolve this issue (HRT-12361)
     virtual Expected<bool> dma_map(Device &device, hailo_dma_buffer_direction_t data_direction) = 0;
-    // Maps the backing buffer to a device via driver in data_direction, returning a pointer to it.
-    // - If the mapping is new - true is returned.
-    // - If the mapping already exists - false is returned.
-    // - Otherwise - Unexpected with a failure status is returned.
-    virtual Expected<bool> dma_map(HailoRTDriver &driver, hailo_dma_buffer_direction_t data_direction) = 0;
+    virtual Expected<bool> dma_map(VdmaDevice &device, hailo_dma_buffer_direction_t data_direction) = 0;
 
     // Internal functions
     virtual Expected<vdma::MappedBufferPtr> get_dma_mapped_buffer(const std::string &device_id) = 0;
@@ -155,7 +158,7 @@ public:
     virtual void *user_address() override;
     virtual Expected<void *> release() noexcept override;
     virtual Expected<bool> dma_map(Device &device, hailo_dma_buffer_direction_t data_direction) override;
-    virtual Expected<bool> dma_map(HailoRTDriver &driver, hailo_dma_buffer_direction_t data_direction) override;
+    virtual Expected<bool> dma_map(VdmaDevice &device, hailo_dma_buffer_direction_t data_direction) override;
 
     // Internal functions
     virtual Expected<vdma::MappedBufferPtr> get_dma_mapped_buffer(const std::string &device_id) override;
@@ -199,38 +202,80 @@ public:
     // The buffer is mapped to vdevice.get_physical_devices() in data_direction.
     static Expected<DmaStoragePtr> create_from_user_address(void *user_address, size_t size,
         hailo_dma_buffer_direction_t data_direction, VDevice &device);
+    // Creates a DMA-able buffer from given user buffer at address given of size length if possible,
+    // Otherwise allocates new one length of size
+    static Expected<std::shared_ptr<Buffer>> create_dma_able_buffer_from_user_size(void *addr, size_t size);
 
     DmaStorage(const DmaStorage &other) = delete;
     DmaStorage &operator=(const DmaStorage &other) = delete;
     DmaStorage(DmaStorage &&other) noexcept = default;
     DmaStorage &operator=(DmaStorage &&other) = delete;
-    virtual ~DmaStorage() = default;
+    virtual ~DmaStorage();
 
     virtual size_t size() const override;
     virtual void *user_address() override;
     virtual Expected<void *> release() noexcept override;
     // TODO: thread safety (HRT-10669)
     virtual Expected<bool> dma_map(Device &device, hailo_dma_buffer_direction_t data_direction) override;
-    virtual Expected<bool> dma_map(HailoRTDriver &driver, hailo_dma_buffer_direction_t data_direction) override;
+    virtual Expected<bool> dma_map(VdmaDevice &device, hailo_dma_buffer_direction_t data_direction) override;
 
     // Internal functions
     DmaStorage(vdma::DmaAbleBufferPtr &&dma_able_buffer);
     virtual Expected<vdma::MappedBufferPtr> get_dma_mapped_buffer(const std::string &device_id) override;
 
 private:
-    // Creates a backing dma-able buffer (either user or hailort allocated).
-    // Maps said buffer to physical_devices in data_direction.
-    // By default (if physical_devices is empty), no mapping will occur
+    // - Creates a backing DmaAbleBuffer:
+    //   - If user_address is null, it'll be allocated by hailort
+    //   - Otherwise, it'll be a non owning wrapper of the user's buffer
+    // - The said buffer is mapped physical_devices in data_direction.
+    // - By default (if physical_devices is empty), no mapping will occur
     static Expected<DmaStoragePtr> create(void *user_address, size_t size,
         hailo_dma_buffer_direction_t data_direction = HAILO_DMA_BUFFER_DIRECTION_MAX_ENUM,
         std::vector<std::reference_wrapper<Device>> &&physical_devices = {});
 
+    // Initialization dependency
     vdma::DmaAbleBufferPtr m_dma_able_buffer;
-
     // For each device (key is device_id), we store some vdma mapping.
-    // TODO: use (device_id, direction) as key - HRT-10656
-    std::unordered_map<std::string, vdma::MappedBufferPtr> m_mappings;
+    // TODO: use (device_id, direction) as key or have two dicts (HRT-10656)
+    using UnmappingCallback = std::function<void()>;
+    std::unordered_map<std::string, std::pair<vdma::MappedBufferPtr, UnmappingCallback>> m_mappings;
+};
+
+
+using UserBufferStoragePtr = std::shared_ptr<UserBufferStorage>;
+class HAILORTAPI UserBufferStorage : public BufferStorage
+{
+public:
+    static Expected<UserBufferStoragePtr> create(void *user_address, const size_t size);
+
+    UserBufferStorage(void *user_address, const size_t size);
+    UserBufferStorage(const UserBufferStorage &other) = delete;
+    UserBufferStorage &operator=(const UserBufferStorage &other) = delete;
+    UserBufferStorage(UserBufferStorage &&other) noexcept = default;
+    UserBufferStorage &operator=(UserBufferStorage &&other) = delete;
+    virtual ~UserBufferStorage() = default;
+
+    virtual size_t size() const override;
+    virtual void *user_address() override;
+    virtual Expected<void *> release() noexcept override;
+    virtual Expected<bool> dma_map(Device &device, hailo_dma_buffer_direction_t data_direction) override;
+    virtual Expected<bool> dma_map(VdmaDevice &device, hailo_dma_buffer_direction_t data_direction) override;
+
+    // Internal functions
+    virtual Expected<vdma::MappedBufferPtr> get_dma_mapped_buffer(const std::string &device_id) override;
+
+    // Craete storage for user buffer to store mappings. Used internally not by the user.
+    static Expected<std::shared_ptr<Buffer>> create_storage_from_user_buffer(void *addr, size_t size);
+
+private:
+
+    void * m_user_address;
+    const size_t m_size;
+
+    using UnmappingCallback = std::function<void()>;
+    std::unordered_map<std::string, std::pair<vdma::MappedBufferPtr, UnmappingCallback>> m_mappings;
 };
+
 // ************************************** NOTE - END ************************************** //
 // DmaStorage isn't currently supported and is for internal use only                      //
 // **************************************************************************************** //
diff --git a/hailort/libhailort/include/hailo/device.hpp b/hailort/libhailort/include/hailo/device.hpp
index 8ad3e9d..1109791 100644
--- a/hailort/libhailort/include/hailo/device.hpp
+++ b/hailort/libhailort/include/hailo/device.hpp
@@ -34,6 +34,14 @@ namespace hailort
 class Device;
 using NotificationCallback = std::function<void(Device &device, const hailo_notification_t &notification, void *opaque)>;
 
+namespace vdma {
+    class DmaAbleBuffer;
+    using DmaAbleBufferPtr = std::shared_ptr<DmaAbleBuffer>;
+
+    class MappedBuffer;
+    using MappedBufferPtr = std::shared_ptr<MappedBuffer>;
+}
+
 /** @} */ // end of group_type_definitions
 
 /*! Represents the Hailo device (chip). */
@@ -392,8 +400,8 @@ public:
      * 
      * @param[in]   averaging_factor     Number of samples per time period, sensor configuration value.
      * @param[in]   sampling_period      Related conversion time, sensor configuration value.
-     *                                   The sensor samples the power every sampling_period {ms} and averages every
-     *                                   averaging_factor samples. The sensor provides a new value every: 2 * sampling_period * averaging_factor {ms}.
+     *                                   The sensor samples the power every sampling_period {us} and averages every
+     *                                   averaging_factor samples. The sensor provides a new value every: (2 * sampling_period * averaging_factor){ms}.
      *                                   The firmware wakes up every interval_milliseconds {ms} and checks the sensor.
      *                                   If there is a new value to read from the sensor, the firmware reads it.
      *                                   Note that the average calculated by the firmware is 'average of averages',
@@ -692,6 +700,42 @@ public:
      */
     virtual bool is_stream_interface_supported(const hailo_stream_interface_t &stream_interface) const = 0;
 
+    // TODO: Also link to async infer - ConfiguredInferModel, Bindings etc. Just like we did for
+    //       InputStream::write_async and OutputStream::read_async (HRT-11039)
+    /**
+     * Maps the buffer pointed to by @a address for DMA transfers to/from this device, in the specified
+     * @a direction.
+     * DMA mapping of buffers in advance may improve the performance of `InputStream::write_async()` or
+     * `OutputStream::read_async()`. This improvement will be realized if the buffer is reused multiple times
+     * across different async operations.
+     * - For buffers that will be written to the device via `InputStream::write_async()`, use `HAILO_H2D_STREAM`
+     *   for the @a direction parameter.
+     * - For buffers that will be read from the device via `OutputStream::read_async()`, use `HAILO_D2H_STREAM`
+     *   for the @a direction parameter.
+     *
+     * @param[in] address       The address of the buffer to be mapped
+     * @param[in] size          The buffer's size in bytes
+     * @param[in] direction     The direction of the mapping
+     * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error.
+     * @note The DMA mapping will be freed upon calling dma_unmap() with @a address and @a direction, or when the
+     *       @a Device object is destroyed.
+     * @note The buffer pointed to by @a address cannot be freed until it is unmapped (via dma_unmap() or @a Device
+     *       destruction).
+     */
+    virtual hailo_status dma_map(void *address, size_t size, hailo_stream_direction_t direction);
+
+    /**
+     * Un-maps a buffer buffer pointed to by @a address for DMA transfers to/from this device, in the direction
+     * @a direction.
+     *
+     * @param[in] address       The address of the buffer to be un-mapped
+     * @param[in] direction     The direction of the mapping
+     * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error.
+     */
+    virtual hailo_status dma_unmap(void *address, hailo_stream_direction_t direction);
+
+    virtual Expected<std::pair<vdma::MappedBufferPtr, bool>> try_dma_map(vdma::DmaAbleBufferPtr buffer,
+        hailo_stream_direction_t direction);
     virtual hailo_status direct_write_memory(uint32_t address, const void *buffer, uint32_t size);
     virtual hailo_status direct_read_memory(uint32_t address, void *buffer, uint32_t size);
     hailo_status set_overcurrent_state(bool should_activate);
diff --git a/hailort/libhailort/include/hailo/hailort.h b/hailort/libhailort/include/hailo/hailort.h
index c33b94e..d218b22 100644
--- a/hailort/libhailort/include/hailo/hailort.h
+++ b/hailort/libhailort/include/hailo/hailort.h
@@ -58,6 +58,8 @@ extern "C" {
 #define HAILO_PCIE_ANY_DOMAIN (UINT32_MAX)
 #define HAILO_DEFAULT_VSTREAM_QUEUE_SIZE (2)
 #define HAILO_DEFAULT_VSTREAM_TIMEOUT_MS (10000)
+#define HAILO_DEFAULT_ASYNC_INFER_TIMEOUT_MS (10000)
+#define HAILO_DEFAULT_ASYNC_INFER_QUEUE_SIZE (2)
 #define HAILO_DEFAULT_DEVICE_COUNT (1)
 
 #define HAILO_SOC_ID_LENGTH (32)
@@ -415,6 +417,7 @@ typedef enum hailo_device_architecture_e {
     HAILO_ARCH_HAILO8L,
     HAILO_ARCH_HAILO15H,
     HAILO_ARCH_PLUTO,
+    HAILO_ARCH_HAILO15M,
     
     /** Max enum value to maintain ABI Integrity */
     HAILO_ARCH_MAX_ENUM = HAILO_MAX_ENUM
@@ -716,21 +719,18 @@ typedef enum {
     HAILO_FORMAT_ORDER_HAILO_YYYYUV                     = 19,
 
     /**
-     * NMS bbox
-     * - Host side
+     * NMS_WITH_BYTE_MASK format
      *
-     *      For each class (::hailo_nms_shape_t.number_of_classes), the layout is
-     *          \code
-     *          struct (packed) {
-     *              float32_t bbox_count;
-     *              hailo_bbox_with_byte_mask_t bbox_with_byte_mask[bbox_count];
-     *          };
-     *          \endcode
+     * - Host side
+     *      \code
+     *      struct (packed) {
+     *          uint16_t detections_count;
+     *          hailo_detection_with_byte_mask_t[detections_count];
+     *      };
+     *      \endcode
      *
      *      The host format type supported ::HAILO_FORMAT_TYPE_FLOAT32.
      *
-     *      Maximum amount of bboxes per class is ::hailo_nms_shape_t.max_bboxes_per_class.
-     *
      * - Not used for device side
      */
     HAILO_FORMAT_ORDER_HAILO_NMS_WITH_BYTE_MASK         = 20,
@@ -818,6 +818,7 @@ typedef enum {
 // ************************************* NOTE - START ************************************* //
 // Dma buffer allocation isn't currently supported and is for internal use only             //
 // **************************************************************************************** //
+// TODO: remove hailo_dma_buffer_direction_t (HRT-12391)
 /** Hailo dma buffer direction */
 typedef enum {
     HAILO_DMA_BUFFER_DIRECTION_H2D    = 0,
@@ -1311,15 +1312,38 @@ typedef struct {
 } hailo_bbox_float32_t;
 
 typedef struct {
-    hailo_bbox_float32_t bbox;
+    float32_t y_min;
+    float32_t x_min;
+    float32_t y_max;
+    float32_t x_max;
+} hailo_rectangle_t;
+
+typedef struct {
+    /** Detection's box coordinates */
+    hailo_rectangle_t box;
+
+    /** Detection's score */
+    float32_t score;
+
+    /** Detection's class id */
+    uint16_t class_id;
 
     /** Mask size in bytes */
-    uint32_t mask_size;
+    size_t mask_size;
 
-    /** Mask */
-    // TODO: HRT-11413 - Add documentation on byte mask
+    /**
+     * Byte Mask:
+     * The mask is a binary mask that defines a region of interest (ROI) of the image.
+     * Mask pixel values of 1 indicate image pixels that belong to the ROI.
+     * Mask pixel values of 0 indicate image pixels that are part of the background.
+     *
+     * The size of the mask is the size of the box, in the original input image's dimensions.
+     * Mask width = ceil((box.x_max - box.x_min) * image_width)
+     * Mask height = ceil((box.y_max - box.y_min) * image_height)
+     * First pixel represents the pixel (x_min * image_width, y_min * image_height) in the original input image.
+    */
     uint8_t *mask;
-} hailo_bbox_with_byte_mask_t;
+} hailo_detection_with_byte_mask_t;
 #pragma pack(pop)
 
 /**
@@ -2370,8 +2394,8 @@ HAILORTAPI hailo_status hailo_power_measurement(hailo_device device, hailo_dvm_o
  * @param[in]   device               A ::hailo_device object.
  * @param[in]   averaging_factor     Number of samples per time period, sensor configuration value.
  * @param[in]   sampling_period      Related conversion time, sensor configuration value.
- *                                   The sensor samples the power every sampling_period {ms} and averages every
- *                                   averaging_factor samples. The sensor provides a new value every: 2 * sampling_period * averaging_factor {ms}.
+ *                                   The sensor samples the power every sampling_period {us} and averages every
+ *                                   averaging_factor samples. The sensor provides a new value every: (2 * sampling_period * averaging_factor) {ms}.
  *                                   The firmware wakes up every interval_milliseconds {ms} and checks the sensor.
  *                                   If there is a new value to read from the sensor, the firmware reads it.
  *                                   Note that the average calculated by the firmware is 'average of averages',
@@ -2785,6 +2809,19 @@ HAILORTAPI hailo_status hailo_network_group_get_input_stream_infos(hailo_configu
 HAILORTAPI hailo_status hailo_network_group_get_output_stream_infos(hailo_configured_network_group network_group,
     hailo_stream_info_t *stream_infos, size_t stream_infos_length, size_t *number_of_streams);
 
+/**
+ * Shutdown a given network group. Makes sure all ongoing async operations are canceled. All async callbacks
+ * of transfers that have not been completed will be called with status ::HAILO_STREAM_ABORTED_BY_USER.
+ * Any resources attached to the network group may be released after function returns.
+ *
+ * @param[in]  network_group                NetworkGroup to be shutdown.
+ * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error.
+ *
+ * @note Calling this function is optional, and it is used to shutdown network group while there is still ongoing
+ *       inference.
+ */
+HAILORTAPI hailo_status hailo_shutdown_network_group(hailo_configured_network_group network_group);
+
 /**
  * Activates hailo_device inner-resources for context_switch inference.
  *
@@ -2851,7 +2888,7 @@ HAILORTAPI hailo_status hailo_get_latency_measurement(hailo_configured_network_g
  * @param[in]  network_name                 Network name for which to set the timeout.
  *                                          If NULL is passed, the timeout will be set for all the networks in the network group.
  * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error.
- * @note Using this function is only allowed when scheduling_algorithm is not ::HAILO_SCHEDULING_ALGORITHM_NONE, and before the creation of any vstreams.
+ * @note Using this function is only allowed when scheduling_algorithm is not ::HAILO_SCHEDULING_ALGORITHM_NONE.
  * @note The default timeout is 0ms.
  * @note Currently, setting the timeout for a specific network is not supported.
  * @note The timeout may be ignored to prevent idle time from the device.
@@ -2869,7 +2906,7 @@ HAILORTAPI hailo_status hailo_set_scheduler_timeout(hailo_configured_network_gro
  * @param[in]  network_name                 Network name for which to set the threshold.
  *                                          If NULL is passed, the threshold will be set for all the networks in the network group.
  * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error.
- * @note Using this function is only allowed when scheduling_algorithm is not ::HAILO_SCHEDULING_ALGORITHM_NONE, and before the creation of any vstreams.
+ * @note Using this function is only allowed when scheduling_algorithm is not ::HAILO_SCHEDULING_ALGORITHM_NONE.
  * @note The default threshold is 0, which means HailoRT will apply an automatic heuristic to choose the threshold.
  * @note Currently, setting the threshold for a specific network is not supported.
  * @note The threshold may be ignored to prevent idle time from the device.
@@ -3440,18 +3477,15 @@ HAILORTAPI hailo_status hailo_fuse_nms_frames(const hailo_nms_fuse_input_t *nms_
  *                                      the function returns input virtual stream params of the given network.
  *                                      If NULL is passed, the function returns the input virtual stream params of 
  *                                      all the networks of the first network group.
- * @param[in] quantized                 Deprecated parameter that will be ignored. Determine whether to quantize (scale)
- *                                      the data will be decided by the src-data and dst-data types.
+ * @param[in] unused                    Unused.
  * @param[in] format_type               The default format type for all input virtual streams.
  * @param[out] input_params             List of params for input virtual streams.
  * @param[inout] input_params_count     On input: Amount of @a input_params array.
  *                                      On output: Will be filled with the detected amount of input vstreams on the @a network or @a network_group.
  * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error.
- * @note The argument @a quantized is deprecated and its usage is ignored. Determine whether to quantize (scale) the data will be decided by
- *       the src-data and dst-data types.
  */
 HAILORTAPI hailo_status hailo_hef_make_input_vstream_params(hailo_hef hef, const char *name, 
-    bool quantized, hailo_format_type_t format_type, 
+    bool unused, hailo_format_type_t format_type, 
     hailo_input_vstream_params_by_name_t *input_params, size_t *input_params_count);
 
 /**
@@ -3465,52 +3499,43 @@ HAILORTAPI hailo_status hailo_hef_make_input_vstream_params(hailo_hef hef, const
  *                                      the function returns output virtual stream params of the given network.
  *                                      If NULL is passed, the function returns the output virtual stream params of 
  *                                      all the networks of the first network group.
- * @param[in] quantized                 Deprecated parameter that will be ignored. Determine whether to de-quantize (rescale)
- *                                      the data will be decided by the src-data and dst-data types.
+ * @param[in] unused                    Unused.
  * @param[in] format_type               The default format type for all output virtual streams.
  * @param[out] output_params            List of params for output virtual streams.
  * @param[inout] output_params_count    On input: Amount of @a output_params array.
  *                                      On output: Will be filled with the detected amount of output vstreams on the @a network or @a network_group.
  * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error.
- * @note The argument @a quantized is deprecated and its usage is ignored. Determine whether to de-quantize (rescale) the data will be decided by
- *       the src-data and dst-data types.
  */
 HAILORTAPI hailo_status hailo_hef_make_output_vstream_params(hailo_hef hef, const char *name, 
-    bool quantized, hailo_format_type_t format_type, 
+    bool unused, hailo_format_type_t format_type, 
     hailo_output_vstream_params_by_name_t *output_params, size_t *output_params_count);
 
 /**
  * Creates input virtual stream params for a given network_group.
  *
  * @param[in]  network_group            Network group that owns the streams.
- * @param[in]  quantized                Deprecated parameter that will be ignored. Determine whether to quantize (scale)
- *                                      the data will be decided by the src-data and dst-data types.
+ * @param[in]  unused                   Unused.
  * @param[in]  format_type              The default format type for all input virtual streams.
  * @param[out] input_params             List of params for input virtual streams.
  * @param[inout] input_params_count     On input: Amount of @a input_params array.
  *                                      On output: Will be filled with the detected amount of input vstreams on the @a network_group.
  * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error.
- * @note The argument @a quantized is deprecated and its usage is ignored. Determine whether to quantize (scale) the data will be decided by
- *       the src-data and dst-data types.
  */
-HAILORTAPI hailo_status hailo_make_input_vstream_params(hailo_configured_network_group network_group, bool quantized,
+HAILORTAPI hailo_status hailo_make_input_vstream_params(hailo_configured_network_group network_group, bool unused,
     hailo_format_type_t format_type, hailo_input_vstream_params_by_name_t *input_params, size_t *input_params_count);
 
 /**
  * Creates output virtual stream params for given network_group.
  *
  * @param[in]  network_group            Network group that owns the streams.
- * @param[in]  quantized                Deprecated parameter that will be ignored. Determine whether to de-quantize (rescale)
- *                                      the data will be decided by the src-data and dst-data types.
+ * @param[in]  unused                   Unused.
  * @param[in]  format_type              The default format type for all output virtual streams.
  * @param[out] output_params            List of params for output virtual streams.
  * @param[inout] output_params_count    On input: Amount of @a output_params array.
  *                                      On output: Will be filled with the detected amount of output vstreams on the @a network_group.
  * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error.
- * @note The argument @a quantized is deprecated and its usage is ignored. Determine whether to de-quantize (rescale) the data will be decided by
- *       the src-data and dst-data types.
  */
-HAILORTAPI hailo_status hailo_make_output_vstream_params(hailo_configured_network_group network_group, bool quantized,
+HAILORTAPI hailo_status hailo_make_output_vstream_params(hailo_configured_network_group network_group, bool unused,
     hailo_format_type_t format_type, hailo_output_vstream_params_by_name_t *output_params,
     size_t *output_params_count);
 
diff --git a/hailort/libhailort/include/hailo/hailort_common.hpp b/hailort/libhailort/include/hailo/hailort_common.hpp
index 00259a7..203c9a6 100644
--- a/hailort/libhailort/include/hailo/hailort_common.hpp
+++ b/hailort/libhailort/include/hailo/hailort_common.hpp
@@ -42,6 +42,8 @@ public:
     static const uint16_t ETH_INPUT_BASE_PORT = 32401;
     static const uint16_t ETH_OUTPUT_BASE_PORT = 32501;
     static const uint32_t MAX_NMS_BURST_SIZE = 65536;
+    static const size_t DMA_ABLE_ALIGNMENT_WRITE_HW_LIMITATION = 64;
+    static const size_t DMA_ABLE_ALIGNMENT_READ_HW_LIMITATION = 4096;
 
     /**
      * Gets the NMS host shape size (number of elements) from NMS info.
@@ -82,7 +84,8 @@ public:
      * @param[in] alignment       Returned number should be aligned to this parameter.
      * @return aligned number
      */
-    static constexpr uint32_t align_to(uint32_t num, uint32_t alignment) {
+    template<typename T>
+    static constexpr T align_to(T num, T alignment) {
         auto remainder = num % alignment;
         return remainder == 0 ? num : num + (alignment - remainder);
     }
@@ -182,6 +185,8 @@ public:
             return "HAILO15H";
         case HAILO_ARCH_PLUTO:
             return "PLUTO";
+        case HAILO_ARCH_HAILO15M:
+            return "HAILO15M";
         default:
             return "UNKNOWN ARCHITECTURE";
         }
@@ -362,6 +367,10 @@ public:
             format.type = vstream_info.format.type;
         }
 
+        if (HAILO_FORMAT_ORDER_AUTO == format.order) {
+            format.order = vstream_info.format.order;
+        }
+
         if (HailoRTCommon::is_nms(vstream_info)) {
             return get_nms_host_frame_size(vstream_info.nms_shape, format);
         } else {
@@ -369,6 +378,19 @@ public:
         }
     }
 
+    /**
+     * Gets periph frame size in bytes by image shape and format - periph frame size is amount of bytes transferred
+     * through peripherals which must be aligned to HW_DATA_ALIGNMENT (8). Note: this function always aligns to next largest HW_DATA_ALIGNMENT 
+     *
+     * @param[in] shape         A ::hailo_3d_image_shape_t object.
+     * @param[in] format        A ::hailo_format_t object.
+     * @return The periph frame's size in bytes.
+     */
+    static constexpr uint32_t get_periph_frame_size(const hailo_3d_image_shape_t &shape, const hailo_format_t &format)
+    {
+        return align_to(get_frame_size(shape, format), static_cast<uint32_t>(HW_DATA_ALIGNMENT));
+    }
+
     static constexpr bool is_vdma_stream_interface(hailo_stream_interface_t stream_interface)
     {
         return (HAILO_STREAM_INTERFACE_PCIE == stream_interface) || (HAILO_STREAM_INTERFACE_INTEGRATED == stream_interface);
diff --git a/hailort/libhailort/include/hailo/hailort_defaults.hpp b/hailort/libhailort/include/hailo/hailort_defaults.hpp
index c0edbac..58bc8d0 100644
--- a/hailort/libhailort/include/hailo/hailort_defaults.hpp
+++ b/hailort/libhailort/include/hailo/hailort_defaults.hpp
@@ -31,14 +31,14 @@ public:
 
     static hailo_format_t expand_auto_format(const hailo_format_t &host_format, const hailo_format_t &hw_format);
     static hailo_format_t get_user_buffer_format();
-    static hailo_format_t get_user_buffer_format(bool quantized, hailo_format_type_t format_type);
+    static hailo_format_t get_user_buffer_format(bool unused, hailo_format_type_t format_type);
 
-    static hailo_transform_params_t get_transform_params(bool quantized, hailo_format_type_t format_type);
+    static hailo_transform_params_t get_transform_params(bool unused, hailo_format_type_t format_type);
     static hailo_transform_params_t get_transform_params(const hailo_stream_info_t &stream_info);
     static hailo_transform_params_t get_transform_params();
 
     static hailo_vstream_params_t get_vstreams_params();
-    static hailo_vstream_params_t get_vstreams_params(bool quantized, hailo_format_type_t format_type);
+    static hailo_vstream_params_t get_vstreams_params(bool unused, hailo_format_type_t format_type);
 
     static Expected<hailo_stream_parameters_t> get_stream_parameters(hailo_stream_interface_t interface,
             hailo_stream_direction_t direction);
diff --git a/hailort/libhailort/include/hailo/hef.hpp b/hailort/libhailort/include/hailo/hef.hpp
index 9127857..3ddd8b2 100644
--- a/hailort/libhailort/include/hailo/hef.hpp
+++ b/hailort/libhailort/include/hailo/hef.hpp
@@ -89,10 +89,10 @@ public:
      *                                    the function returns the input stream infos of the given network.
      *                                    If NULL is passed, the function returns the input stream infos of 
      *                                    all the networks of the first network group.
-     * @return Upon success, returns a vector of ::hailo_stream_info_t, containing each stream's informmation.
+     * @return Upon success, returns a vector of ::hailo_stream_info_t, containing each stream's information.
      *         Otherwise, returns a ::hailo_status error.
      */
-    Expected<std::vector<hailo_stream_info_t>> get_input_stream_infos(const std::string &name="");
+    Expected<std::vector<hailo_stream_info_t>> get_input_stream_infos(const std::string &name="") const;
     
     /**
      * Gets output streams informations.
@@ -104,10 +104,10 @@ public:
      *                                    the function returns the output stream infos of the given network.
      *                                    If NULL is passed, the function returns the output stream infos of 
      *                                    all the networks of the first network group.
-     * @return Upon success, returns a vector of ::hailo_stream_info_t, containing each stream's informmation.
+     * @return Upon success, returns a vector of ::hailo_stream_info_t, containing each stream's information.
      *         Otherwise, returns a ::hailo_status error.
      */
-    Expected<std::vector<hailo_stream_info_t>> get_output_stream_infos(const std::string &name="");
+    Expected<std::vector<hailo_stream_info_t>> get_output_stream_infos(const std::string &name="") const;
 
     /**
      * Gets all streams informations.
@@ -119,10 +119,10 @@ public:
      *                                    the function returns all stream infos of the given network.
      *                                    If NULL is passed, the function returns all the stream infos of 
      *                                    all the networks of the first network group.
-     * @return Upon success, returns Expected of a vector of ::hailo_stream_info_t, containing each stream's informmation.
+     * @return Upon success, returns Expected of a vector of ::hailo_stream_info_t, containing each stream's information.
      *         Otherwise, returns Unexpected of ::hailo_status error.
      */
-    Expected<std::vector<hailo_stream_info_t>> get_all_stream_infos(const std::string &name="");
+    Expected<std::vector<hailo_stream_info_t>> get_all_stream_infos(const std::string &name="") const;
 
     /**
      * Gets stream's information from it's name.
@@ -135,7 +135,7 @@ public:
      * 
      */
     Expected<hailo_stream_info_t> get_stream_info_by_name(const std::string &stream_name,
-        hailo_stream_direction_t stream_direction, const std::string &net_group_name="");
+        hailo_stream_direction_t stream_direction, const std::string &net_group_name="") const;
 
     /**
      * Gets input virtual streams infos.
@@ -150,7 +150,7 @@ public:
      * @return Upon success, returns Expected of a vector of ::hailo_vstream_info_t.
      *         Otherwise, returns Unexpected of ::hailo_status error.
      */
-    Expected<std::vector<hailo_vstream_info_t>> get_input_vstream_infos(const std::string &name="");
+    Expected<std::vector<hailo_vstream_info_t>> get_input_vstream_infos(const std::string &name="") const;
 
     /**
      * Gets output virtual streams infos.
@@ -166,7 +166,7 @@ public:
      * @return Upon success, returns Expected of a vector of ::hailo_vstream_info_t.
      *         Otherwise, returns Unexpected of ::hailo_status error.
      */
-    Expected<std::vector<hailo_vstream_info_t>> get_output_vstream_infos(const std::string &name="");
+    Expected<std::vector<hailo_vstream_info_t>> get_output_vstream_infos(const std::string &name="") const;
 
     /**
      * Gets all virtual streams infos.
@@ -182,7 +182,7 @@ public:
      * @return Upon success, returns Expected of a vector of ::hailo_vstream_info_t.
      *         Otherwise, returns Unexpected of ::hailo_status error.
      */
-    Expected<std::vector<hailo_vstream_info_t>> get_all_vstream_infos(const std::string &name="");
+    Expected<std::vector<hailo_vstream_info_t>> get_all_vstream_infos(const std::string &name="") const;
 
     /**
      * Gets sorted output vstreams names.
@@ -192,7 +192,7 @@ public:
      * @return Upon success, returns Expected of a sorted vector of output vstreams names.
      *         Otherwise, returns Unexpected of ::hailo_status error.
      */
-    Expected<std::vector<std::string>> get_sorted_output_names(const std::string &net_group_name="");
+    Expected<std::vector<std::string>> get_sorted_output_names(const std::string &net_group_name="") const;
 
     /**
      * Gets the number of low-level input streams.
@@ -202,7 +202,7 @@ public:
      * @return Upon success, returns Expected containing the number of low-level input streams.
      *         Otherwise, returns Unexpected of ::hailo_status error.
      */
-    Expected<size_t> get_number_of_input_streams(const std::string &net_group_name="");
+    Expected<size_t> get_number_of_input_streams(const std::string &net_group_name="") const;
 
     /**
      * Gets the number of low-level output streams.
@@ -212,7 +212,7 @@ public:
      * @return Upon success, returns Expected containing the number of low-level output streams.
      *         Otherwise, returns Unexpected of ::hailo_status error.
      */
-    Expected<size_t> get_number_of_output_streams(const std::string &net_group_name="");
+    Expected<size_t> get_number_of_output_streams(const std::string &net_group_name="") const;
 
     /**
      * Gets bottleneck FPS.
@@ -222,7 +222,7 @@ public:
      * @return Upon success, returns Expected containing the bottleneck FPS number.
      *         Otherwise, returns Unexpected of ::hailo_status error.
      */
-    Expected<float64_t> get_bottleneck_fps(const std::string &net_group_name="");
+    Expected<float64_t> get_bottleneck_fps(const std::string &net_group_name="") const;
 
     /**
      * Get device Architecture HEF was compiled for.
@@ -230,7 +230,7 @@ public:
      * @return Upon success, returns Expected containing the device architecture the HEF was compiled for.
      *         Otherwise, returns Unexpected of ::hailo_status error.
      */
-    Expected<hailo_device_architecture_t> get_hef_device_arch();
+    Expected<hailo_device_architecture_t> get_hef_device_arch() const;
 
     /**
      * Get string of device architecture HEF was compiled for.
@@ -251,7 +251,7 @@ public:
      *         Otherwise, returns Unexpected of ::hailo_status error.
      */
     Expected<std::vector<std::string>> get_stream_names_from_vstream_name(const std::string &vstream_name,
-        const std::string &net_group_name="");
+        const std::string &net_group_name="") const;
 
     /**
      * Get all vstream names under the given stream name
@@ -264,7 +264,7 @@ public:
      *         Otherwise, returns Unexpected of ::hailo_status error.
      */
     Expected<std::vector<std::string>> get_vstream_names_from_stream_name(const std::string &stream_name,
-        const std::string &net_group_name="");
+        const std::string &net_group_name="") const;
 
     /**
      * Gets vstream name from original layer name.
@@ -276,7 +276,7 @@ public:
      *         Otherwise, returns Unexpected of ::hailo_status error.
      */
     Expected<std::string> get_vstream_name_from_original_name(const std::string &original_name,
-        const std::string &net_group_name="");
+        const std::string &net_group_name="") const;
 
     /**
      * Gets original names from vstream name.
@@ -288,14 +288,14 @@ public:
      *         Otherwise, returns Unexpected of ::hailo_status error.
      */
     Expected<std::vector<std::string>> get_original_names_from_vstream_name(const std::string &vstream_name,
-        const std::string &net_group_name="");
+        const std::string &net_group_name="") const;
 
     /**
      * Gets all network groups names in the Hef.
      *
      * @return Returns a vector of all network groups names.
      */
-    std::vector<std::string> get_network_groups_names();
+    std::vector<std::string> get_network_groups_names() const;
 
     /**
      * Gets all network groups infos in the Hef.
@@ -303,7 +303,7 @@ public:
      * @return Upon success, returns Expected of a vector of ::hailo_network_group_info_t.
      *         Otherwise, returns Unexpected of ::hailo_status error.
      */
-    Expected<std::vector<hailo_network_group_info_t>> get_network_groups_infos();
+    Expected<std::vector<hailo_network_group_info_t>> get_network_groups_infos() const;
 
     /**
      * Creates the default configure params for the Hef. The user can modify the given params before
@@ -398,18 +398,15 @@ public:
      *                              the function returns the input virtual stream params of the given network.
      *                              If NULL is passed, the function returns the input virtual stream params of 
      *                              all the networks of the first network group.
-     * @param[in] quantized         Deprecated parameter that will be ignored. Determine whether to quantize (scale)
-     *                              the data will be decided by the src-data and dst-data types.
+     * @param[in] unused            Unused.
      * @param[in] format_type       The default format type for all input virtual streams.
      * @param[in] timeout_ms        The default timeout in milliseconds for all input virtual streams.
      * @param[in] queue_size        The default queue size for all input virtual streams.
      * @return Upon success, returns Expected of a map of input virtual stream name to params.
      *         Otherwise, returns Unexpected of ::hailo_status error.
-     * @note The argument @a quantized is deprecated and its usage is ignored. Determine whether to quantize (scale) the data will be decided by
-     *       the src-data and dst-data types.
      */
     Expected<std::map<std::string, hailo_vstream_params_t>> make_input_vstream_params(
-        const std::string &name, bool quantized, hailo_format_type_t format_type,
+        const std::string &name, bool unused, hailo_format_type_t format_type,
         uint32_t timeout_ms, uint32_t queue_size);
 
     /**
@@ -422,18 +419,15 @@ public:
      *                              the function returns the output virtual stream params of the given network.
      *                              If NULL is passed, the function returns the output virtual stream params of
      *                              all the networks of the first network group.
-     * @param[in] quantized         Deprecated parameter that will be ignored. Determine whether to de-quantize (rescale)
-     *                              the data will be decided by the src-data and dst-data types.
+     * @param[in] unused            Unused.
      * @param[in] format_type       The default format type for all output virtual streams.
      * @param[in] timeout_ms        The default timeout in milliseconds for all output virtual streams.
      * @param[in] queue_size        The default queue size for all output virtual streams.
      * @return Upon success, returns Expected of a map of output virtual stream name to params.
      *         Otherwise, returns Unexpected of ::hailo_status error.
-     * @note The argument @a quantized is deprecated and its usage is ignored. Determine whether to de-quantize (rescale) the data will be decided by
-     *       the src-data and dst-data types.
      */
     Expected<std::map<std::string, hailo_vstream_params_t>> make_output_vstream_params(
-        const std::string &name, bool quantized, hailo_format_type_t format_type,
+        const std::string &name, bool unused, hailo_format_type_t format_type,
         uint32_t timeout_ms, uint32_t queue_size);
 
     /**
@@ -442,10 +436,10 @@ public:
      * @param[in] net_group_name      The name of the network_group which contains the network information.
      *                                If NULL is passed, the function returns the network infos of 
      *                                all the networks of the first network group.
-     * @return Upon success, returns Expected of a vector of ::hailo_network_info_t, containing each networks's informmation.
+     * @return Upon success, returns Expected of a vector of ::hailo_network_info_t, containing each networks's information.
      *         Otherwise, returns Unexpected of ::hailo_status error.
      */
-    Expected<std::vector<hailo_network_info_t>> get_network_infos(const std::string &net_group_name="");
+    Expected<std::vector<hailo_network_info_t>> get_network_infos(const std::string &net_group_name="") const;
 
     /**
      * Returns a unique hash for the specific Hef.
@@ -454,7 +448,7 @@ public:
      */
     std::string hash() const;
 
-    Expected<std::string> get_description(bool stream_infos, bool vstream_infos);
+    Expected<std::string> get_description(bool stream_infos, bool vstream_infos) const;
 
     ~Hef();
     Hef(Hef &&);
diff --git a/hailort/libhailort/include/hailo/infer_model.hpp b/hailort/libhailort/include/hailo/infer_model.hpp
index 4a20423..258cc89 100644
--- a/hailort/libhailort/include/hailo/infer_model.hpp
+++ b/hailort/libhailort/include/hailo/infer_model.hpp
@@ -22,6 +22,7 @@ namespace hailort
 {
 
 class ConfiguredInferModelImpl;
+class AsyncInferRunnerImpl;
 class HAILORTAPI AsyncInferJob
 {
 public:
@@ -45,18 +46,24 @@ private:
     bool m_should_wait_in_dtor;
 };
 
-struct CompletionInfoAsyncInfer;
+struct AsyncInferCompletionInfo;
 class HAILORTAPI ConfiguredInferModel
 {
 public:
+    ConfiguredInferModel() = default;
+
     class HAILORTAPI Bindings
     {
     public:
+        Bindings() = default;
+
         class HAILORTAPI InferStream
         {
         public:
             hailo_status set_buffer(MemoryView view);
-            MemoryView get_buffer();
+            Expected<MemoryView> get_buffer();
+            hailo_status set_pix_buffer(const hailo_pix_buffer_t &pix_buffer);
+            Expected<hailo_pix_buffer_t> get_pix_buffer();
 
         private:
             friend class ConfiguredInferModelImpl;
@@ -87,7 +94,12 @@ public:
     void deactivate();
     hailo_status run(Bindings bindings, std::chrono::milliseconds timeout);
     Expected<AsyncInferJob> run_async(Bindings bindings,
-        std::function<void(const CompletionInfoAsyncInfer &)> callback = [] (const CompletionInfoAsyncInfer &) {});
+        std::function<void(const AsyncInferCompletionInfo &)> callback = [] (const AsyncInferCompletionInfo &) {});
+    Expected<LatencyMeasurementResult> get_hw_latency_measurement(const std::string &network_name = "");
+    hailo_status set_scheduler_timeout(const std::chrono::milliseconds &timeout);
+    hailo_status set_scheduler_threshold(uint32_t threshold);
+    hailo_status set_scheduler_priority(uint8_t priority);
+    Expected<size_t> get_async_queue_size();
 
 private:
     friend class InferModel;
@@ -97,9 +109,9 @@ private:
     std::shared_ptr<ConfiguredInferModelImpl> m_pimpl;
 };
 
-struct HAILORTAPI CompletionInfoAsyncInfer
+struct HAILORTAPI AsyncInferCompletionInfo
 {
-    CompletionInfoAsyncInfer(ConfiguredInferModel::Bindings _bindings, hailo_status _status) : bindings(_bindings), status(_status)
+    AsyncInferCompletionInfo(ConfiguredInferModel::Bindings _bindings, hailo_status _status) : bindings(_bindings), status(_status)
     {
     }
 
@@ -115,22 +127,36 @@ public:
     class HAILORTAPI InferStream
     {
     public:
+        // TODO: explain that the getters return what the user defined with set_ functions
         const std::string name() const;
+        hailo_3d_image_shape_t shape() const;
+        hailo_format_t format() const;
         size_t get_frame_size() const;
+        Expected<hailo_nms_shape_t> get_nms_shape() const;
+        
         void set_format_type(hailo_format_type_t type);
         void set_format_order(hailo_format_order_t order);
+        std::vector<hailo_quant_info_t> get_quant_infos() const;
+        bool is_nms() const;
+        void set_nms_score_threshold(float32_t threshold);
+        void set_nms_iou_threshold(float32_t threshold);
+        void set_nms_max_proposals_per_class(uint32_t max_proposals_per_class);
 
     private:
         friend class InferModel;
-        friend class VDeviceBase;
+        friend class VDevice;
 
         class Impl;
         InferStream(std::shared_ptr<Impl> pimpl);
-        hailo_format_t get_user_buffer_format();
 
         std::shared_ptr<Impl> m_pimpl;
     };
 
+    const Hef &hef() const;
+    void set_batch_size(uint16_t batch_size);
+    void set_power_mode(hailo_power_mode_t power_mode);
+    void set_hw_latency_measurement_flags(hailo_latency_measurement_flags_t latency);
+
     Expected<ConfiguredInferModel> configure(const std::string &network_name = "");
     Expected<InferStream> input();
     Expected<InferStream> output();
@@ -143,8 +169,11 @@ public:
     
     InferModel(InferModel &&);
 
+    Expected<ConfiguredInferModel> configure_for_ut(std::shared_ptr<AsyncInferRunnerImpl> async_infer_runner,
+        const std::vector<std::string> &input_names, const std::vector<std::string> &output_names);
+
 private:
-    friend class VDeviceBase;
+    friend class VDevice;
 
     InferModel(VDevice &vdevice, Hef &&hef, std::unordered_map<std::string, InferStream> &&inputs,
         std::unordered_map<std::string, InferStream> &&outputs);
@@ -157,6 +186,7 @@ private:
     std::vector<InferStream> m_outputs_vector;
     std::vector<std::string> m_input_names;
     std::vector<std::string> m_output_names;
+    ConfigureNetworkParams m_config_params;
 };
 
 } /* namespace hailort */
diff --git a/hailort/libhailort/include/hailo/network_group.hpp b/hailort/libhailort/include/hailo/network_group.hpp
index d80a062..9b76547 100644
--- a/hailort/libhailort/include/hailo/network_group.hpp
+++ b/hailort/libhailort/include/hailo/network_group.hpp
@@ -17,13 +17,24 @@
 #include <string>
 #include <map>
 #include <unordered_map>
+#include <mutex>
+#include <condition_variable>
 
 /** hailort namespace */
 namespace hailort
 {
 
+namespace net_flow {
+class OpMetadata;
+using PostProcessOpMetadataPtr = std::shared_ptr<OpMetadata>;
+}
+
+using NamedBuffersCallbacks = std::unordered_map<std::string, std::pair<MemoryView, std::function<void(hailo_status)>>>;
+
 class InputVStream;
 class OutputVStream;
+struct LayerInfo;
+
 
 /** @addtogroup group_type_definitions */
 /*@{*/
@@ -93,7 +104,7 @@ public:
     ConfiguredNetworkGroup(const ConfiguredNetworkGroup &other) = delete;
     ConfiguredNetworkGroup &operator=(const ConfiguredNetworkGroup &other) = delete;
     ConfiguredNetworkGroup &operator=(ConfiguredNetworkGroup &&other) = delete;
-    ConfiguredNetworkGroup(ConfiguredNetworkGroup &&other) noexcept = default;
+    ConfiguredNetworkGroup(ConfiguredNetworkGroup &&other) noexcept = delete;
 
     /**
      * @return The network group name.
@@ -226,11 +237,22 @@ public:
      */
     virtual hailo_status wait_for_activation(const std::chrono::milliseconds &timeout) = 0;
 
+    /**
+     * Shutdown the network group. Makes sure all ongoing async operations are canceled. All async callbacks
+     * of transfers that have not been completed will be called with status ::HAILO_STREAM_ABORTED_BY_USER.
+     * Any resources attached to the network group may be released after function returns.
+     *
+     * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error.
+     *
+     * @note Calling this function is optional, and it is used to shutdown network group while there is still ongoing
+     *       inference.
+     */
+    virtual hailo_status shutdown() = 0;
+
     /**
      * Creates input virtual stream params.
      *
-     * @param[in] quantized                 Deprecated parameter that will be ignored. Determine whether to quantize (scale)
-     *                                      the data will be decided by the src-data and dst-data types.
+     * @param[in]  unused                   Unused.
      * @param[in]  format_type              The default format type for all input virtual streams.
      * @param[in]  timeout_ms               The default timeout in milliseconds for all input virtual streams.
      * @param[in]  queue_size               The default queue size for all input virtual streams.
@@ -238,18 +260,15 @@ public:
      *                                      If not passed, all the networks in the network group will be addressed.
      * @return Upon success, returns Expected of a map of name to vstream params.
      *         Otherwise, returns Unexpected of ::hailo_status error.
-     * @note The argument @a quantized is deprecated and its usage is ignored. Determine whether to quantize (scale) the data will be decided by
-     *       the src-data and dst-data types.
      */
     virtual Expected<std::map<std::string, hailo_vstream_params_t>> make_input_vstream_params(
-        bool quantized, hailo_format_type_t format_type, uint32_t timeout_ms, uint32_t queue_size,
+        bool unused, hailo_format_type_t format_type, uint32_t timeout_ms, uint32_t queue_size,
         const std::string &network_name="") = 0;
 
     /**
      * Creates output virtual stream params.
      *
-     * @param[in] quantized                 Deprecated parameter that will be ignored. Determine whether to de-quantize (rescale)
-     *                                      the data will be decided by the src-data and dst-data types.
+     * @param[in]  unused                   Unused.
      * @param[in]  format_type              The default format type for all output virtual streams.
      * @param[in]  timeout_ms               The default timeout in milliseconds for all output virtual streams.
      * @param[in]  queue_size               The default queue size for all output virtual streams.
@@ -257,28 +276,23 @@ public:
      *                                      If not passed, all the networks in the network group will be addressed.
      * @return Upon success, returns Expected of a map of name to vstream params.
      *         Otherwise, returns Unexpected of ::hailo_status error.
-     * @note The argument @a quantized is deprecated and its usage is ignored. Determine whether to de-quantize (rescale) the data will be decided by
-     *       the src-data and dst-data types.
      */
     virtual Expected<std::map<std::string, hailo_vstream_params_t>> make_output_vstream_params(
-        bool quantized, hailo_format_type_t format_type, uint32_t timeout_ms, uint32_t queue_size,
+        bool unused, hailo_format_type_t format_type, uint32_t timeout_ms, uint32_t queue_size,
         const std::string &network_name="") = 0;
 
     /**
      * Creates output virtual stream params. The groups are splitted with respect to their low-level streams.
      *
-     * @param[in] quantized                 Deprecated parameter that will be ignored. Determine whether to de-quantize (rescale)
-     *                                      the data will be decided by the src-data and dst-data types.
+     * @param[in]  unused                   Unused.
      * @param[in]  format_type              The default format type for all output virtual streams.
      * @param[in]  timeout_ms               The default timeout in milliseconds for all output virtual streams.
      * @param[in]  queue_size               The default queue size for all output virtual streams.
      * @return Upon success, returns Expected of a vector of maps, mapping name to vstream params, where each map represents a params group.
      *         Otherwise, returns Unexpected of ::hailo_status error.
-     * @note The argument @a quantized is deprecated and its usage is ignored. Determine whether to de-quantize (rescale) the data will be decided by
-     *       the src-data and dst-data types.
      */
     virtual Expected<std::vector<std::map<std::string, hailo_vstream_params_t>>> make_output_vstream_params_groups(
-        bool quantized, hailo_format_type_t format_type, uint32_t timeout_ms, uint32_t queue_size) = 0;
+        bool unused, hailo_format_type_t format_type, uint32_t timeout_ms, uint32_t queue_size) = 0;
 
     /**
      * Gets output virtual stream groups for given network_group. The groups are splitted with respect to their low-level streams.
@@ -343,7 +357,7 @@ public:
      * @param[in]  network_name         Network name for which to set the timeout.
      *                                  If not passed, the timeout will be set for all the networks in the network group.
      * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error.
-     * @note Using this function is only allowed when scheduling_algorithm is not ::HAILO_SCHEDULING_ALGORITHM_NONE, and before the creation of any vstreams.
+     * @note Using this function is only allowed when scheduling_algorithm is not ::HAILO_SCHEDULING_ALGORITHM_NONE.
      * @note The default timeout is 0ms.
      * @note Currently, setting the timeout for a specific network is not supported.
      */
@@ -358,7 +372,7 @@ public:
      * @param[in]  network_name         Network name for which to set the threshold.
      *                                  If not passed, the threshold will be set for all the networks in the network group.
      * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error.
-     * @note Using this function is only allowed when scheduling_algorithm is not ::HAILO_SCHEDULING_ALGORITHM_NONE, and before the creation of any vstreams.
+     * @note Using this function is only allowed when scheduling_algorithm is not ::HAILO_SCHEDULING_ALGORITHM_NONE.
      * @note The default threshold is 1.
      * @note Currently, setting the threshold for a specific network is not supported.
      */
@@ -394,6 +408,7 @@ public:
 
     virtual Expected<std::vector<InputVStream>> create_input_vstreams(const std::map<std::string, hailo_vstream_params_t> &inputs_params) = 0;
     virtual Expected<std::vector<OutputVStream>> create_output_vstreams(const std::map<std::string, hailo_vstream_params_t> &outputs_params) = 0;
+    virtual Expected<size_t> get_min_buffer_pool_size() = 0;
 
     virtual Expected<HwInferResults> run_hw_infer_estimator() = 0;
 
@@ -410,11 +425,28 @@ public:
     virtual hailo_status after_fork_in_parent();
     virtual hailo_status after_fork_in_child();
 
+    virtual hailo_status infer_async(const NamedBuffersCallbacks &named_buffers_callbacks,
+        const std::function<void(hailo_status)> &infer_request_done_cb) = 0;
+    virtual Expected<std::vector<net_flow::PostProcessOpMetadataPtr>> get_ops_metadata() = 0;
+    virtual Expected<std::unique_ptr<LayerInfo>> get_layer_info(const std::string &stream_name) = 0;
+    hailo_status wait_for_callbacks_finish();
+    hailo_status wait_for_callbacks_to_maintain_below_threshold(size_t threshold);
+    void decrease_ongoing_callbacks();
+    void increase_ongoing_callbacks();
+
+    virtual hailo_status set_nms_score_threshold(const std::string &edge_name, float32_t nms_score_threshold) = 0;
+    virtual hailo_status set_nms_iou_threshold(const std::string &edge_name, float32_t iou_threshold) = 0;
+    virtual hailo_status set_nms_max_bboxes_per_class(const std::string &edge_name, uint32_t max_bboxes_per_class) = 0;
+
 protected:
-    ConfiguredNetworkGroup() = default;
+    ConfiguredNetworkGroup();
 
+    std::mutex m_infer_requests_mutex;
+    std::atomic_size_t m_ongoing_transfers;
+    std::condition_variable m_cv;
 private:
     friend class ActivatedNetworkGroup;
+    friend class PipelineBuilder;
 };
 using ConfiguredNetworkGroupVector = std::vector<std::shared_ptr<ConfiguredNetworkGroup>>;
 
diff --git a/hailort/libhailort/include/hailo/stream.hpp b/hailort/libhailort/include/hailo/stream.hpp
index d5ee93e..3f21bdf 100644
--- a/hailort/libhailort/include/hailo/stream.hpp
+++ b/hailort/libhailort/include/hailo/stream.hpp
@@ -77,15 +77,19 @@ public:
      * Aborting the stream.
      *
      * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error.
+     * @note This function is deprecated. One should use ConfiguredNetworkGroup::shutdown()
      */
-    virtual hailo_status abort() = 0;
+    virtual hailo_status abort()
+        DEPRECATED("InputStream::abort is deprecated. One should use ConfiguredNetworkGroup::shutdown()") = 0;
 
     /**
      * Clearing the aborted state of the stream.
      *
      * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error.
+     * @note This function is deprecated. To reuse network after shutdown, reconfigure it.
      */
-    virtual hailo_status clear_abort() = 0;
+    virtual hailo_status clear_abort()
+        DEPRECATED("InputStream::clear_abort() is deprecated. To reuse network after shutdown, reconfigure it") = 0;
 
     /**
      * Writes all pending data to the underlying stream.
@@ -179,6 +183,8 @@ public:
      * @note The address provided must be aligned to the system's page size, and the rest of the page should not be in
      *       use by any other part of the program to ensure proper functioning of the DMA operation. Memory for the
      *       provided address can be allocated using `mmap` on Unix-like systems or `VirtualAlloc` on Windows.
+     * @note Pre-mapping @a buffer to DMA via `Device::dma_map()` may improve performance, if @a buffer is used for
+     *       multiple async transfers.
      */
     virtual hailo_status write_async(const MemoryView &buffer, const TransferDoneCallback &user_callback) = 0;
 
@@ -212,6 +218,8 @@ public:
      * @note The address provided must be aligned to the system's page size, and the rest of the page should not be in
      *       use by any other part of the program to ensure proper functioning of the DMA operation. Memory for the
      *       provided address can be allocated using `mmap` on Unix-like systems or `VirtualAlloc` on Windows.
+     * @note Pre-mapping @a buffer to DMA via `Device::dma_map()` may improve performance, if @a buffer is used for
+     *       multiple async transfers.
      */
     virtual hailo_status write_async(const void *buffer, size_t size, const TransferDoneCallback &user_callback) = 0;
 
@@ -316,15 +324,19 @@ public:
      * Aborting the stream.
      *
      * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error.
+     * @note This function is deprecated. One should use ConfiguredNetworkGroup::shutdown()
      */
-    virtual hailo_status abort() = 0;
+    virtual hailo_status abort()
+        DEPRECATED("OutputStream::abort is deprecated. One should use ConfiguredNetworkGroup::shutdown()")= 0;
 
     /**
      * Clearing the abort flag of the stream.
      *
      * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error.
+     * @note This function is deprecated. To reuse network after shutdown, reconfigure it.
      */
-    virtual hailo_status clear_abort() = 0;
+    virtual hailo_status clear_abort()
+        DEPRECATED("OutputStream::clear_abort is deprecated. To reuse network after shutdown, reconfigure it") = 0;
 
     /**
      * @returns a pointer for network group activated event.
@@ -453,6 +465,8 @@ public:
      * @note The address provided must be aligned to the system's page size, and the rest of the page should not be in
      *       use by any other part of the program to ensure proper functioning of the DMA operation. Memory for the
      *       provided address can be allocated using `mmap` on Unix-like systems or `VirtualAlloc` on Windows.
+     * @note Pre-mapping @a buffer to DMA via `Device::dma_map()` may improve performance, if @a buffer is used for
+     *       multiple async transfers.
      */
     virtual hailo_status read_async(MemoryView buffer, const TransferDoneCallback &user_callback) = 0;
 
@@ -486,6 +500,8 @@ public:
      * @note The address provided must be aligned to the system's page size, and the rest of the page should not be in
      *       use by any other part of the program to ensure proper functioning of the DMA operation. Memory for the
      *       provided address can be allocated using `mmap` on Unix-like systems or `VirtualAlloc` on Windows.
+     * @note Pre-mapping @a buffer to DMA via `Device::dma_map()` may improve performance, if @a buffer is used for
+     *       multiple async transfers.
      */
     virtual hailo_status read_async(void *buffer, size_t size, const TransferDoneCallback &user_callback) = 0;
 
diff --git a/hailort/libhailort/include/hailo/transform.hpp b/hailort/libhailort/include/hailo/transform.hpp
index 2a77799..b5bf04f 100644
--- a/hailort/libhailort/include/hailo/transform.hpp
+++ b/hailort/libhailort/include/hailo/transform.hpp
@@ -77,15 +77,12 @@ public:
      * Creates input transform_context.
      * 
      * @param[in] stream_info    Creates transform_context that fits this stream info.
-     * @param[in] quantized      Deprecated parameter that will be ignored. Determine weather to quantize (scale)
-     *                           the data will be decided by the src-data and dst-data types.
+     * @param[in] unused         Unused.
      * @param[in] format_type    The type of the buffer sent to the transform_context.
      * @return Upon success, returns Expected of a pointer to InputTransformContext.
      *         Otherwise, returns Unexpected of ::hailo_status error.
-     * @note The argument @a quantized is deprecated and its usage is ignored. Determine weather to quantize (scale) the data will be decided by
-     *       the src-data and dst-data types.
      */
-    static Expected<std::unique_ptr<InputTransformContext>> create(const hailo_stream_info_t &stream_info, bool quantized,
+    static Expected<std::unique_ptr<InputTransformContext>> create(const hailo_stream_info_t &stream_info, bool unused,
         hailo_format_type_t format_type);
 
     /**
@@ -159,7 +156,8 @@ private:
     InputTransformContext(size_t src_frame_size, const hailo_3d_image_shape_t &src_image_shape,
         const hailo_format_t &src_format, size_t dst_frame_size, const hailo_3d_image_shape_t &dst_image_shape,
         const hailo_format_t &dst_format, const std::vector<hailo_quant_info_t> &dst_quant_infos, Buffer &&quant_buffer,
-        Buffer &&transpose_buffer, const bool should_quantize, const bool should_transpose, const bool should_reorder);
+        Buffer &&transpose_buffer, const bool should_quantize, const bool should_transpose, const bool should_reorder,
+        const bool should_pad_periph);
 
     inline MemoryView quant_buffer() {
         return MemoryView(m_quant_buffer);
@@ -184,6 +182,7 @@ private:
     const bool m_should_quantize;
     const bool m_should_transpose;
     const bool m_should_reorder;
+    const bool m_should_pad_periph;
 
     Buffer m_quant_buffer;
     Buffer m_transpose_buffer;
@@ -249,15 +248,12 @@ public:
      * Creates output transform_context with default transform parameters
      * 
      * @param[in] stream_info    Creates transform_context that fits this stream info.
-     * @param[in] quantized      Deprecated parameter that will be ignored. Determine weather to de-quantize (rescale)
-     *                           the data will be decided by the src-data and dst-data types.
+     * @param[in] unused         Unused.
      * @param[in] format_type    The type of the buffer returned from the transform_context
      * @return Upon success, returns Expected of a pointer to OutputTransformContext.
      *         Otherwise, returns Unexpected of ::hailo_status error.
-     * @note The argument @a quantized is deprecated and its usage is ignored. Determine weather to de-quantize (rescale) the data will be decided by
-     *       the src-data and dst-data types.
      */
-    static Expected<std::unique_ptr<OutputTransformContext>> create(const hailo_stream_info_t &stream_info, bool quantized,
+    static Expected<std::unique_ptr<OutputTransformContext>> create(const hailo_stream_info_t &stream_info, bool unused,
         hailo_format_type_t format_type);
 
     /**
@@ -330,7 +326,7 @@ public:
 protected:
     OutputTransformContext(size_t src_frame_size, const hailo_format_t &src_format, size_t dst_frame_size,
         const hailo_format_t &dst_format, const std::vector<hailo_quant_info_t> &dst_quant_infos, const bool should_quantize,
-        const bool should_transpose, const bool should_reorder);
+        const bool should_transpose, const bool should_reorder, const bool should_pad_periph);
 
     const size_t m_src_frame_size;
     const hailo_format_t m_src_format;
@@ -340,6 +336,7 @@ protected:
     const bool m_should_quantize;
     const bool m_should_transpose;
     const bool m_should_reorder;
+    const bool m_should_pad_periph;
 };
 
 /*! Object used to demux muxed stream */
diff --git a/hailort/libhailort/include/hailo/vdevice.hpp b/hailort/libhailort/include/hailo/vdevice.hpp
index b8566db..fd8f40a 100644
--- a/hailort/libhailort/include/hailo/vdevice.hpp
+++ b/hailort/libhailort/include/hailo/vdevice.hpp
@@ -15,13 +15,13 @@
 #include "hailo/hef.hpp"
 #include "hailo/network_group.hpp"
 #include "hailo/device.hpp"
-#include "hailo/infer_model.hpp"
 
 
 /** hailort namespace */
 namespace hailort
 {
 
+class InferModel;
 /*! Represents a bundle of physical devices. */
 class HAILORTAPI VDevice
 {
@@ -64,8 +64,8 @@ public:
      */
     virtual Expected<ConfiguredNetworkGroupVector> configure(Hef &hef,
         const NetworkGroupsParamsMap &configure_params={}) = 0;
-
-    virtual Expected<InferModel> create_infer_model(const std::string &hef_path) = 0;
+    
+    virtual Expected<std::shared_ptr<InferModel>> create_infer_model(const std::string &hef_path);
 
     /**
      * Gets the underlying physical devices.
@@ -111,6 +111,40 @@ public:
      */
     Expected<ConfigureNetworkParams> create_configure_params(Hef &hef, const std::string &network_group_name) const;
 
+    // TODO: Also link to async infer - ConfiguredInferModel, Bindings etc. Just like we did for
+    //       InputStream::write_async and OutputStream::read_async (HRT-11039)
+    /**
+     * Maps the buffer pointed to by @a address for DMA transfers to/from this vdevice, in the specified
+     * @a data_direction.
+     * DMA mapping of buffers in advance may improve the performance of `InputStream::write_async()` or
+     * `OutputStream::read_async()`. This improvement will be realized if the buffer is reused multiple times
+     * across different async operations.
+     * - For buffers that will be written to the vdevice via `InputStream::write_async()`, use `HAILO_H2D_STREAM`
+     *   for the @a direction parameter.
+     * - For buffers that will be read from the vdevice via `OutputStream::read_async()`, use `HAILO_D2H_STREAM`
+     *   for the @a direction parameter.
+     *
+     * @param[in] address       The address of the buffer to be mapped
+     * @param[in] size          The buffer's size in bytes
+     * @param[in] direction     The direction of the mapping
+     * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error.
+     * @note The DMA mapping will be freed upon calling dma_unmap() with @a address and @a data_direction, or when the
+     *       @a VDevice object is destroyed.
+     * @note The buffer pointed to by @a address cannot be freed until it is unmapped (via dma_unmap() or @a VDevice
+     *       destruction).
+     */
+    virtual hailo_status dma_map(void *address, size_t size, hailo_stream_direction_t direction);
+
+    /**
+     * Un-maps a buffer buffer pointed to by @a address for DMA transfers to/from this vdevice, in the direction
+     * @a direction.
+     *
+     * @param[in] address       The address of the buffer to be un-mapped
+     * @param[in] direction     The direction of the mapping
+     * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error.
+     */
+    virtual hailo_status dma_unmap(void *address, hailo_stream_direction_t direction);
+
     virtual hailo_status before_fork();
     virtual hailo_status after_fork_in_parent();
     virtual hailo_status after_fork_in_child();
diff --git a/hailort/libhailort/include/hailo/vstream.hpp b/hailort/libhailort/include/hailo/vstream.hpp
index e9d97f8..76d17f7 100644
--- a/hailort/libhailort/include/hailo/vstream.hpp
+++ b/hailort/libhailort/include/hailo/vstream.hpp
@@ -391,13 +391,7 @@ public:
      * Creates input virtual streams and output virtual streams.
      *
      * @param[in] net_group            Configured network group that owns the streams.
-     * @param[in] quantized            Default quantized parameter for all virtual streams.
-     *                                 For input vstreams indicates whether the data fed into the chip is already quantized.
-     *                                 True means the data is already quantized.
-     *                                 False means it's HailoRT's responsibility to quantize (scale) the data.
-     *                                 For output vstreams indicates whether the data returned from the device should be quantized.
-     *                                 True means that the data returned to the user is still quantized.
-     *                                 False means it's HailoRT's responsibility to de-quantize (rescale) the data.
+     * @param[in] unused               Unused.
      * @param[in] format_type          The default format type for all virtual streams.
      * @param[in] network_name         Request to create vstreams of specific network inside the configured network group.
      *                                 If not passed, all the networks in the network group will be addressed.
@@ -405,7 +399,7 @@ public:
      *         Otherwise, returns Unexpected of ::hailo_status error.
      */
     static Expected<std::pair<std::vector<InputVStream>, std::vector<OutputVStream>>> create_vstreams(
-        ConfiguredNetworkGroup &net_group, bool quantized, hailo_format_type_t format_type,
+        ConfiguredNetworkGroup &net_group, bool unused, hailo_format_type_t format_type,
         const std::string &network_name="");
 
     /**
diff --git a/hailort/libhailort/src/core_op/CMakeLists.txt b/hailort/libhailort/src/core_op/CMakeLists.txt
index a8d7a54..79c3218 100644
--- a/hailort/libhailort/src/core_op/CMakeLists.txt
+++ b/hailort/libhailort/src/core_op/CMakeLists.txt
@@ -9,6 +9,7 @@ set(SRC_FILES
     ${CMAKE_CURRENT_SOURCE_DIR}/resource_manager/intermediate_buffer.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/resource_manager/channel_allocator.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/resource_manager/context_switch_buffer_builder.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/resource_manager/periph_calculator.cpp
 )
 
 set(HAILORT_CPP_SOURCES ${HAILORT_CPP_SOURCES} ${SRC_FILES} PARENT_SCOPE)
diff --git a/hailort/libhailort/src/core_op/core_op.cpp b/hailort/libhailort/src/core_op/core_op.cpp
index 56f512a..83fcc4f 100644
--- a/hailort/libhailort/src/core_op/core_op.cpp
+++ b/hailort/libhailort/src/core_op/core_op.cpp
@@ -330,6 +330,28 @@ hailo_status CoreOp::deactivate_low_level_streams()
     return status;
 }
 
+hailo_status CoreOp::abort_low_level_streams()
+{
+    auto status = HAILO_SUCCESS; // Success oriented
+
+    for (auto &name_pair : m_input_streams) {
+        auto abort_status = name_pair.second->abort_impl();
+        if (HAILO_SUCCESS != abort_status) {
+            LOGGER__ERROR("Failed to abort stream {}", name_pair.first);
+            status = abort_status;
+        }
+    }
+    for (auto &name_pair : m_output_streams) {
+        auto abort_status = name_pair.second->abort_impl();
+        if (HAILO_SUCCESS != abort_status) {
+            LOGGER__ERROR("Failed to abort stream {}", name_pair.first);
+            status = abort_status;
+        }
+    }
+
+    return status;
+}
+
 const SupportedFeatures &CoreOp::get_supported_features()
 {
     return m_metadata->supported_features();
@@ -393,6 +415,50 @@ hailo_status CoreOp::wrap_streams_for_remote_process()
     return HAILO_SUCCESS;
 }
 
+Expected<size_t> CoreOp::get_async_max_queue_size() const
+{
+    size_t queue_size = std::numeric_limits<size_t>::max();
+
+    for (const auto &input : m_input_streams) {
+        auto stream_queue_size = input.second->get_async_max_queue_size();
+        CHECK_EXPECTED(stream_queue_size);
+        queue_size = std::min(queue_size, *stream_queue_size);
+    }
+
+    for (const auto &output : m_output_streams) {
+        auto stream_queue_size = output.second->get_async_max_queue_size();
+        CHECK_EXPECTED(stream_queue_size);
+        queue_size = std::min(queue_size, *stream_queue_size);
+    }
+
+    return queue_size;
+}
+
+hailo_status CoreOp::infer_async(InferRequest &&request)
+{
+    assert(request.transfers.size() == (m_input_streams.size() + m_output_streams.size()));
+
+    // To optimize allocation on runtime, we can use some fixed slab-allocator
+    auto state = make_shared_nothrow<OngoingInferState>();
+    CHECK_NOT_NULL(state, HAILO_OUT_OF_HOST_MEMORY);
+    state->callbacks_left = request.transfers.size();
+    state->status = HAILO_SUCCESS; // Success oriented, on any failure, modify this
+
+    auto transfers_copy = request.transfers;
+    auto status = infer_async_impl(transfers_copy, state, request.callback);
+    if (HAILO_SUCCESS != status) {
+        // infer_async_impl remove all launched transfers from transfer_copy. Here, we finish all callbacks left
+        for (auto &transfer : transfers_copy) {
+            transfer.second.callback(status);
+        }
+        // Note: See `CoreOp::infer_async` docs
+        return HAILO_SUCCESS;
+    }
+    assert(transfers_copy.empty());
+
+    return HAILO_SUCCESS;
+}
+
 bool CoreOp::is_multi_context() const
 {
     return m_metadata->supported_features().multi_context;
@@ -451,6 +517,65 @@ Expected<std::shared_ptr<InputStreamBase>> CoreOp::create_input_stream_from_conf
     return input_stream;
 }
 
+hailo_status CoreOp::infer_async_impl(std::unordered_map<std::string, TransferRequest> &transfers,
+    std::shared_ptr<OngoingInferState> state, TransferDoneCallback done_callback)
+{
+    for ( auto &name_to_transfer : transfers) {
+        name_to_transfer.second.callback = wrap_user_callback(std::move(name_to_transfer.second.callback), state, done_callback);
+    }
+
+    for (auto &input : m_input_streams) {
+        auto transfer = transfers.find(input.second->name());
+        CHECK(transfer != transfers.end(), HAILO_INTERNAL_FAILURE, "Invalid stream {}", input.second->name());
+
+        CHECK(input.second->get_frame_size() == transfer->second.get_total_transfer_size(), HAILO_INVALID_ARGUMENT,
+            "for input '{}', passed buffer size is {} (expected {})", input.first, transfer->second.get_total_transfer_size(),
+            input.second->get_frame_size());
+
+        auto status = input.second->write_async(std::move(transfer->second));
+        if (HAILO_STREAM_ABORTED_BY_USER == status) {
+            return status;
+        }
+        CHECK_SUCCESS(status);
+        transfers.erase(transfer);
+    }
+
+    for (auto &output : m_output_streams) {
+        auto transfer = transfers.find(output.second->name());
+        CHECK(transfer != transfers.end(), HAILO_INTERNAL_FAILURE, "Invalid stream {}", output.second->name());
+
+        CHECK(output.second->get_frame_size() == transfer->second.get_total_transfer_size(), HAILO_INVALID_ARGUMENT,
+            "for output '{}', passed buffer size is {} (expected {})", output.first, transfer->second.get_total_transfer_size(),
+            output.second->get_frame_size());
+
+        auto status = output.second->read_async(std::move(transfer->second));
+        if (HAILO_STREAM_ABORTED_BY_USER == status) {
+            return status;
+        }
+        CHECK_SUCCESS(status);
+        transfers.erase(transfer);
+    }
+
+    return HAILO_SUCCESS;
+}
+
+TransferDoneCallback CoreOp::wrap_user_callback(TransferDoneCallback &&original_callback,
+    std::shared_ptr<OngoingInferState> state,
+    TransferDoneCallback infer_callback)
+{
+    return [original_callback, state, infer_callback](hailo_status status) {
+        original_callback(status);
+
+        if (HAILO_SUCCESS != status) {
+            state->status = status;
+        }
+
+        if (0 == (--state->callbacks_left)) {
+            infer_callback(state->status);
+        }
+    };
+}
+
 Expected<std::shared_ptr<InputStreamBase>> CoreOp::create_vdma_input_stream(Device &device, const std::string &stream_name,
     const LayerInfo &layer_info, const hailo_stream_parameters_t &stream_params)
 {
@@ -490,7 +615,7 @@ Expected<std::shared_ptr<OutputStreamBase>> CoreOp::create_output_stream_from_co
 
         case HAILO_STREAM_INTERFACE_ETH:
             {
-                auto output_stream_exp =  EthernetOutputStream::create(device,
+                auto output_stream_exp = EthernetOutputStream::create(device,
                     layer_info.value(), stream_params.eth_output_params, 
                     m_core_op_activated_event);
                 CHECK_EXPECTED(output_stream_exp);
diff --git a/hailort/libhailort/src/core_op/core_op.hpp b/hailort/libhailort/src/core_op/core_op.hpp
index 00f046a..6672c4b 100644
--- a/hailort/libhailort/src/core_op/core_op.hpp
+++ b/hailort/libhailort/src/core_op/core_op.hpp
@@ -26,13 +26,18 @@
 #include "hef/hef_internal.hpp"
 #include "hef/core_op_metadata.hpp"
 #include "control_protocol.h"
-#include "vdma/channel/boundary_channel.hpp"
 #include "core_op/active_core_op_holder.hpp"
 #include "stream_common/stream_internal.hpp"
 
 
-namespace hailort
-{
+namespace hailort {
+
+namespace vdma {
+    class BoundaryChannel;
+    using BoundaryChannelPtr = std::shared_ptr<BoundaryChannel>;
+} /* namespace vdma */
+
+
 /** Represents a vector of InputStream ptrs */
 using InputStreamPtrVector = std::vector<std::shared_ptr<InputStreamBase>>;
 
@@ -75,6 +80,9 @@ public:
     hailo_status activate(uint16_t dynamic_batch_size = CONTROL_PROTOCOL__IGNORE_DYNAMIC_BATCH_SIZE);
     hailo_status deactivate();
 
+    // Shutdown the core-op, make sure all ongoing transfers are completed with status HAILO_STREAM_ABORTED_BY_USER
+    virtual hailo_status shutdown() = 0;
+
     virtual hailo_status activate_impl(uint16_t dynamic_batch_size = CONTROL_PROTOCOL__IGNORE_DYNAMIC_BATCH_SIZE) = 0;
     virtual hailo_status deactivate_impl() = 0;
 
@@ -99,16 +107,21 @@ public:
     void set_vdevice_core_op_handle(vdevice_core_op_handle_t handle) { m_vdevice_core_op_handle = handle;}
     vdevice_core_op_handle_t vdevice_core_op_handle() { return m_vdevice_core_op_handle;}
 
+    Expected<size_t> get_async_max_queue_size() const;
+
+    /**
+     * The function returns `HAILO_SUCCESS` if at least one of the writes or reads happened.
+     * This assures that all the callbacks will be called: The callbacks per transfer and the `infer_request` callback.
+     *
+     * If the function fails, then we can assume that no callback has being called.
+     * Neither the transfers callbacks nor the `infer_request` callback.
+     *
+     */
+    hailo_status infer_async(InferRequest &&request);
+
     std::map<std::string, std::shared_ptr<InputStreamBase>> m_input_streams;
     std::map<std::string, std::shared_ptr<OutputStreamBase>> m_output_streams;
 
-    // This function is called when a user is creating VStreams and is only relevant for VDeviceCoreOp.
-    // In case a user is using VdmaConfigCoreOp or HcpConfigCoreOp this function should do nothing.
-    virtual void set_vstreams_multiplexer_callbacks(std::vector<OutputVStream> &output_vstreams) 
-    {
-        (void)output_vstreams;
-    }
-
 protected:
     CoreOp(const ConfigureNetworkParams &config_params, std::shared_ptr<CoreOpMetadata> metadata,
         ActiveCoreOpHolder &active_core_op_holder, hailo_status &status);
@@ -120,6 +133,7 @@ protected:
 
     hailo_status activate_low_level_streams();
     hailo_status deactivate_low_level_streams();
+    hailo_status abort_low_level_streams();
 
     Expected<LayerInfo> get_layer_info(const std::string &stream_name);
     bool is_nms();
@@ -134,6 +148,21 @@ protected:
     static uint16_t get_smallest_configured_batch_size(const ConfigureNetworkParams &config_params);
 
 private:
+    struct OngoingInferState {
+        std::atomic_size_t callbacks_left;
+        hailo_status status;
+    };
+
+    // Launch write_async/read_async on all streams with wrapped callback.
+    // We remove all transfer that was launched successfully from transfers in order to call those callback
+    // with HAILO_STREAM_ABORTED_BY_USER status on the case of a failure.
+    hailo_status infer_async_impl(std::unordered_map<std::string, TransferRequest> &transfers,
+        std::shared_ptr<OngoingInferState> state,
+         TransferDoneCallback done_callback);
+    TransferDoneCallback wrap_user_callback(TransferDoneCallback &&original_callback,
+        std::shared_ptr<OngoingInferState> state,
+        TransferDoneCallback infer_callback);
+
     const ConfigureNetworkParams m_config_params;
     ActiveCoreOpHolder &m_active_core_op_holder;
     const uint16_t m_min_configured_batch_size; // TODO: remove after HRT-6535
diff --git a/hailort/libhailort/src/core_op/resource_manager/intermediate_buffer.cpp b/hailort/libhailort/src/core_op/resource_manager/intermediate_buffer.cpp
index 3329ca5..59be948 100644
--- a/hailort/libhailort/src/core_op/resource_manager/intermediate_buffer.cpp
+++ b/hailort/libhailort/src/core_op/resource_manager/intermediate_buffer.cpp
@@ -93,9 +93,10 @@ Expected<std::unique_ptr<vdma::VdmaBuffer>> IntermediateBuffer::create_sg_buffer
 {
     auto const DONT_FORCE_DEFAULT_PAGE_SIZE = false;
     auto const FORCE_BATCH_SIZE = true;
+    auto const IS_VDMA_ALIGNED_BUFFER = true;
     auto buffer_requirements = vdma::BufferSizesRequirements::get_sg_buffer_requirements_single_transfer(
         driver.desc_max_page_size(), batch_size, batch_size, transfer_size, is_circular, DONT_FORCE_DEFAULT_PAGE_SIZE,
-        FORCE_BATCH_SIZE);
+        FORCE_BATCH_SIZE, IS_VDMA_ALIGNED_BUFFER);
     CHECK_EXPECTED(buffer_requirements);
     const auto desc_page_size = buffer_requirements->desc_page_size();
     const auto descs_count = buffer_requirements->descs_count();
@@ -153,11 +154,12 @@ bool IntermediateBuffer::should_use_ccb(HailoRTDriver &driver, StreamingType str
     case StreamingType::CIRCULAR_CONTINUOS:
         // On circular_continuous (aka ddr), the buffers are relatively small and we want to verify the C2C mechanism,
         // therefore the CCB is the default behaviour.
-        if (nullptr != std::getenv("HAILO_FORCE_DDR_CHANNEL_OVER_DESC")) {
-            LOGGER__WARNING("Using desc instead of CCB for ddr channel is not optimal for performance.\n");
-            return false;
-        } else {
+        // Due to request from the DFC group (Memory issues) - DDR buffers would run over DESC and not CCB buffers.
+        if (nullptr != std::getenv("HAILO_FORCE_DDR_CHANNEL_OVER_CCB")) {
+            LOGGER__INFO("Using Non default buffer type (CCB instead of DESC) for ddr channel. \n");
             return true;
+        } else {
+            return false;
         }
     }
 
diff --git a/hailort/libhailort/src/core_op/resource_manager/periph_calculator.cpp b/hailort/libhailort/src/core_op/resource_manager/periph_calculator.cpp
new file mode 100644
index 0000000..29846f9
--- /dev/null
+++ b/hailort/libhailort/src/core_op/resource_manager/periph_calculator.cpp
@@ -0,0 +1,160 @@
+/**
+ * Copyright (c) 2020-2023 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+ **/
+/**
+ * @file periph_calculator.cpp
+ * @brief Class that calculates periph register values based off layer, device and HEF information
+ **/
+
+#include "periph_calculator.hpp"
+#include "device_common/device_internal.hpp"
+
+namespace hailort
+{
+
+bool PeriphCalculator::is_valid_periph_bytes_value(const uint32_t periph_bytes_per_buffer, const uint32_t periph_frame_size, 
+    const bool is_ddr, const uint32_t max_shmifo_size, const uint32_t desc_page_size, const uint32_t max_periph_bytes_value,
+    const uint16_t core_bytes_per_buffer)
+{
+    if (0 == periph_bytes_per_buffer) {
+        return false;
+    }
+
+    if (is_ddr) {
+        // In DDR there is no residue of descriptor - but has to divide with no remainder by core_bytes_per_buffer
+        // Calculated by DFC, Furthermore periph is aligned to PERIPH_BYTES_PER_BUFFER_DDR_ALIGNMENT_SIZE and we cant
+        // force that hw_frame_size will be aligned to periph_bytes_per_buffer.
+        return (periph_bytes_per_buffer < max_shmifo_size) && (periph_bytes_per_buffer <= max_periph_bytes_value) &&
+            (0 == (core_bytes_per_buffer % periph_bytes_per_buffer));
+    }
+    return ((periph_bytes_per_buffer < (max_shmifo_size - desc_page_size)) &&
+        (0 == (periph_frame_size % periph_bytes_per_buffer)) && (periph_bytes_per_buffer <= max_periph_bytes_value));
+}
+
+Expected<LayerInfo> PeriphCalculator::calculate_nms_periph_registers(const LayerInfo &layer_info)
+{
+    CHECK_AS_EXPECTED(IS_FIT_IN_UINT16(layer_info.nms_info.bbox_size * layer_info.nms_info.burst_size),
+        HAILO_INVALID_HEF, "Invalid NMS parameters");
+    LayerInfo updated_layer_info = layer_info;
+    const auto nms_periph_bytes = static_cast<uint16_t>(layer_info.nms_info.bbox_size * layer_info.nms_info.burst_size);
+
+    const auto transfer_size = LayerInfoUtils::get_nms_layer_transfer_size(layer_info);
+    CHECK_AS_EXPECTED(IS_FIT_IN_UINT16(transfer_size / nms_periph_bytes), HAILO_INVALID_HEF, "Invalid NMS parameters");
+    // Will divide with no remainder seeing as transfer size is multiple of (bbox_size * burst_size)
+    assert(0 == (transfer_size % nms_periph_bytes));
+    const auto nms_periph_buffers = static_cast<uint16_t>(transfer_size / nms_periph_bytes);
+
+    // In NMS - update periph variables to represent size of frame in case of "interrupt per frame" (where we know frame
+    // size). Otherwise - size of burst / bbox (transfer size)
+    updated_layer_info.nn_stream_config.periph_bytes_per_buffer = nms_periph_bytes;
+    updated_layer_info.nn_stream_config.periph_buffers_per_frame = nms_periph_buffers;
+    return updated_layer_info;
+}
+
+uint32_t PeriphCalculator::calculate_ddr_periph_buffers_per_frame(const LayerInfo &layer_info,
+    const uint32_t periph_bytes_per_buffer)
+{
+    uint32_t periph_buffers_per_frame = layer_info.nn_stream_config.core_bytes_per_buffer *
+        layer_info.nn_stream_config.core_buffers_per_frame / periph_bytes_per_buffer;
+
+    // if we get a periph bytes per buffer so small that the periph buffers per frame cant fit in uint16
+    // put uint16_t max - seeing as this value doesnt really affect anything and we should not fail in that case.
+    if (!IS_FIT_IN_UINT16(periph_buffers_per_frame)) {
+        LOGGER__WARNING("periph buffers per frame in DDR too large - putting uint16_t max (This may affect HW infer estimator results");
+        periph_buffers_per_frame = UINT16_MAX;
+    }
+
+    return periph_buffers_per_frame;
+}
+
+Expected<LayerInfo> PeriphCalculator::calculate_periph_registers_impl(const LayerInfo &layer_info,
+    const uint32_t desc_page_size, const uint32_t max_periph_bytes_value, const bool is_core_hw_padding_config_in_dfc,
+    const ProtoHEFHwArch &hw_arch)
+{
+    // Calculate periph according to hw shape - the shape the core is epecting to get
+    const hailo_3d_image_shape_t& periph_shape = layer_info.hw_shape;
+
+    CHECK_AS_EXPECTED(IS_FIT_IN_UINT32(periph_shape.width * periph_shape.features * periph_shape.height *
+        layer_info.hw_data_bytes), HAILO_INVALID_HEF, "Invalid frame size");
+
+    LayerInfo updated_layer_info = layer_info;
+    const auto is_ddr = (LayerType::DDR == layer_info.type);
+    const uint32_t alignment = is_ddr ? PERIPH_BYTES_PER_BUFFER_DDR_ALIGNMENT_SIZE : PERIPH_BYTES_PER_BUFFER_ALIGNMENT_SIZE;
+    const auto row_size = static_cast<uint32_t>(periph_shape.width * periph_shape.features * layer_info.hw_data_bytes);
+    auto periph_frame_size = periph_shape.height * row_size;
+
+    // In case of core hw padding in DFC extension - hw shape might not be aligned - use aligned frame size and 
+    // confgured periph registers will add / removed the extra padding
+    if (is_core_hw_padding_config_in_dfc) {
+        if (0 != (periph_frame_size % PERIPH_FRAME_ALIGNMENT)) {
+            auto max_periph_padding_payload = HefConfigurator::max_periph_padding_payload_value(
+                    DeviceBase::hef_arch_to_device_arch(hw_arch));
+            CHECK_EXPECTED(max_periph_padding_payload);
+            
+            // Currently case of payload larger than max periph padding payload value - not supported
+            CHECK_AS_EXPECTED(max_periph_padding_payload.value() > periph_frame_size, HAILO_INVALID_HEF,
+                "Error, padded frame size larger than {} Currently not supported", max_periph_padding_payload.value());
+            
+            const auto padded_periph_frame_size = HailoRTCommon::align_to(periph_frame_size,
+                static_cast<uint32_t>(PERIPH_FRAME_ALIGNMENT));
+            // Configure periph padding registers
+            updated_layer_info.nn_stream_config.buffer_padding_payload = periph_frame_size;
+            updated_layer_info.nn_stream_config.buffer_padding = static_cast<uint16_t>(padded_periph_frame_size -
+                periph_frame_size);
+            periph_frame_size = padded_periph_frame_size;
+        }
+    }
+
+    // Currently takes the largest periph_bytes_per_buffer that is possible with shmifo size and desc page size
+    // TODO HRT-10961 : calculate optimal periph size
+    auto periph_bytes_per_buffer = HailoRTCommon::align_to(row_size, alignment);
+    while ((0 < periph_bytes_per_buffer) && !is_valid_periph_bytes_value(periph_bytes_per_buffer, periph_frame_size,
+        is_ddr, layer_info.max_shmifo_size, desc_page_size, max_periph_bytes_value,
+        layer_info.nn_stream_config.core_bytes_per_buffer)) {
+        periph_bytes_per_buffer -= alignment;
+    }
+    CHECK_AS_EXPECTED(0 != periph_bytes_per_buffer, HAILO_INVALID_ARGUMENT,
+        "Error, Could not find valid periph bytes per buffer value");
+    
+    // In ddr - the core make sure that row size is aligned to PERIPH_BYTES_PER_BUFFER_DDR_ALIGNMENT_SIZE but if a row
+    // Is too large to fit in core bytes per buffer - they will divide it and put it in mutliple buffers - so in order to 
+    // Get the exact size in periph buffers per frame - we must multiply core registers and divide by periph bytes per buffer
+    uint32_t periph_buffers_per_frame = is_ddr ? calculate_ddr_periph_buffers_per_frame(layer_info, periph_bytes_per_buffer):
+        (periph_frame_size / periph_bytes_per_buffer);
+    CHECK_AS_EXPECTED(IS_FIT_IN_UINT16(periph_buffers_per_frame), HAILO_INVALID_ARGUMENT);
+
+    updated_layer_info.nn_stream_config.periph_bytes_per_buffer = static_cast<uint16_t>(periph_bytes_per_buffer);
+    updated_layer_info.nn_stream_config.periph_buffers_per_frame = static_cast<uint16_t>(periph_buffers_per_frame);
+
+    return updated_layer_info;
+}
+
+Expected<LayerInfo> PeriphCalculator::calculate_periph_registers(const LayerInfo &layer_info,
+    const uint32_t desc_page_size, const bool is_periph_calculated_in_hailort, const ProtoHEFHwArch &hw_arch,
+    const bool is_core_hw_padding_config_in_dfc)
+{
+    auto max_periph_bytes_from_hef = HefConfigurator::max_periph_bytes_value(DeviceBase::hef_arch_to_device_arch(hw_arch));
+    CHECK_EXPECTED(max_periph_bytes_from_hef);
+    const auto max_periph_bytes = std::min(max_periph_bytes_from_hef.value(), layer_info.max_shmifo_size);
+    // If extension for calculating periph values in hailort is false and core hw padding is not supported - copy values from
+    // Core registers, otherwise calculate them according to shape and other layer information
+    const bool hw_padding_supported = HefConfigurator::is_core_hw_padding_supported(layer_info, max_periph_bytes,
+        is_core_hw_padding_config_in_dfc);
+    if (!is_periph_calculated_in_hailort && !hw_padding_supported) {
+        LayerInfo updated_layer_info = layer_info;
+        updated_layer_info.nn_stream_config.periph_bytes_per_buffer = layer_info.nn_stream_config.core_bytes_per_buffer;
+        updated_layer_info.nn_stream_config.periph_buffers_per_frame = layer_info.nn_stream_config.core_buffers_per_frame;
+        return updated_layer_info;
+    }
+
+    if (HAILO_FORMAT_ORDER_HAILO_NMS == layer_info.format.order) {
+        return calculate_nms_periph_registers(layer_info);
+    }
+
+    // TODO : HRT-12051 - remove max_periph_bytes from parameters and calculate in impl when remove is_core_hw_padding
+    return calculate_periph_registers_impl(layer_info, desc_page_size, max_periph_bytes,
+        is_core_hw_padding_config_in_dfc, hw_arch);
+}
+    
+} /* namespace hailort */
\ No newline at end of file
diff --git a/hailort/libhailort/src/core_op/resource_manager/periph_calculator.hpp b/hailort/libhailort/src/core_op/resource_manager/periph_calculator.hpp
new file mode 100644
index 0000000..bfa487b
--- /dev/null
+++ b/hailort/libhailort/src/core_op/resource_manager/periph_calculator.hpp
@@ -0,0 +1,43 @@
+/**
+ * Copyright (c) 2020-2023 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+ **/
+/**
+ * @file periph_calculator.hpp
+ * @brief Class that calculates periph register values based off layer, device and HEF information
+ **/
+
+#ifndef _PERIPH_CALCULATOR_HPP_
+#define _PERIPH_CALCULATOR_HPP_
+
+
+#include "common/utils.hpp"
+#include "hailo/hailort_common.hpp"
+#include "hef/hef_internal.hpp"
+
+namespace hailort
+{
+
+static const uint64_t PERIPH_FRAME_ALIGNMENT = 8;
+
+class PeriphCalculator {
+public:
+    static Expected<LayerInfo> calculate_periph_registers(const LayerInfo &layer_info,
+        const uint32_t desc_page_size, const bool is_periph_calculated_in_hailort, const ProtoHEFHwArch &hw_arch,
+        const bool is_core_hw_padding_config_in_dfc);
+private:
+    static bool is_valid_periph_bytes_value(const uint32_t periph_bytes_per_buffer, const uint32_t hw_frame_size,
+        const bool is_ddr, const uint32_t max_shmifo_size, const uint32_t desc_page_size,
+        const uint32_t max_periph_bytes_value, const uint16_t core_bytes_per_buffer);
+    static Expected<LayerInfo> calculate_nms_periph_registers(const LayerInfo &layer_info);
+    static Expected<LayerInfo> calculate_periph_registers_impl(const LayerInfo &layer_info,
+        const uint32_t desc_page_size, const uint32_t max_periph_bytes_value, 
+        const bool is_core_hw_padding_config_in_dfc, const ProtoHEFHwArch &hw_arch);
+    static uint32_t calculate_ddr_periph_buffers_per_frame(const LayerInfo &layer_info,
+        const uint32_t periph_bytes_per_buffer);
+
+};
+
+} /* namespace hailort */
+
+#endif /* _PERIPH_CALCULATOR_HPP_ */
\ No newline at end of file
diff --git a/hailort/libhailort/src/core_op/resource_manager/resource_manager.cpp b/hailort/libhailort/src/core_op/resource_manager/resource_manager.cpp
index db8bc7e..b1b7445 100644
--- a/hailort/libhailort/src/core_op/resource_manager/resource_manager.cpp
+++ b/hailort/libhailort/src/core_op/resource_manager/resource_manager.cpp
@@ -418,7 +418,7 @@ hailo_status ResourcesManager::create_boundary_vdma_channel(const LayerInfo &lay
     CHECK_EXPECTED_AS_STATUS(device_arch);
     /* Add error in configure phase for invalid NMS parameters */
     if (layer_info.format.order == HAILO_FORMAT_ORDER_HAILO_NMS && 
-        (device_arch.value() == HAILO_ARCH_HAILO15H || device_arch.value() == HAILO_ARCH_PLUTO)) {
+        (device_arch.value() == HAILO_ARCH_HAILO15H || device_arch.value() == HAILO_ARCH_HAILO15M || device_arch.value() == HAILO_ARCH_PLUTO)) {
         CHECK(layer_info.nms_info.number_of_classes * layer_info.nms_info.chunks_per_frame * network_batch_size.value() < HAILO15H_NMS_MAX_CLASSES, 
         HAILO_INVALID_ARGUMENT, "Invalid NMS parameters. Number of classes ({}) * division factor ({}) * batch size ({}) must be under {}",
         layer_info.nms_info.number_of_classes, layer_info.nms_info.chunks_per_frame, network_batch_size.value(), HAILO15H_NMS_MAX_CLASSES);
@@ -439,6 +439,7 @@ hailo_status ResourcesManager::create_boundary_vdma_channel(const LayerInfo &lay
 
     /* TODO - HRT-6829- page_size should be calculated inside the vDMA channel class create function */
     static const bool IS_CIRCULAR = true;
+    static const bool IS_VDMA_ALIGNED_BUFFER = false;
     const auto transfer_size = LayerInfoUtils::get_layer_transfer_size(layer_info);
 
     const auto DONT_FORCE_DEFAULT_PAGE_SIZE = false;
@@ -450,15 +451,15 @@ hailo_status ResourcesManager::create_boundary_vdma_channel(const LayerInfo &lay
         (m_driver.desc_max_page_size() / 2) : m_driver.desc_max_page_size();
     auto buffer_sizes_requirements = vdma::BufferSizesRequirements::get_sg_buffer_requirements_single_transfer(
         max_page_size, static_cast<uint16_t>(min_active_trans), static_cast<uint16_t>(max_active_trans),
-        transfer_size, IS_CIRCULAR, DONT_FORCE_DEFAULT_PAGE_SIZE, DONT_FORCE_BATCH_SIZE);
+        transfer_size, IS_CIRCULAR, DONT_FORCE_DEFAULT_PAGE_SIZE, DONT_FORCE_BATCH_SIZE, IS_VDMA_ALIGNED_BUFFER);
     CHECK_EXPECTED_AS_STATUS(buffer_sizes_requirements);
 
     const auto page_size = buffer_sizes_requirements->desc_page_size();
     const auto descs_count = (nullptr != std::getenv("HAILO_CONFIGURE_FOR_HW_INFER")) ?
         MAX_DESCS_COUNT : buffer_sizes_requirements->descs_count();
 
-    auto channel = vdma::BoundaryChannel::create(channel_id.value(), channel_direction, m_driver, descs_count, page_size,
-        layer_info.name, latency_meter);
+    auto channel = vdma::BoundaryChannel::create(channel_id.value(), channel_direction, m_vdma_device, descs_count,
+        page_size, layer_info.name, latency_meter);
     CHECK_EXPECTED_AS_STATUS(channel);
 
     m_boundary_channels.emplace(channel_id.value(), channel.release());
@@ -636,14 +637,6 @@ hailo_status ResourcesManager::reset_state_machine()
     return HAILO_SUCCESS;
 }
 
-hailo_status ResourcesManager::cancel_pending_transfers()
-{
-    for (const auto &boundary_channel : m_boundary_channels) {
-        boundary_channel.second->cancel_pending_transfers();
-    }
-    return HAILO_SUCCESS;
-}
-
 hailo_status ResourcesManager::start_vdma_interrupts_dispatcher()
 {
     auto interrupts_dispatcher = m_vdma_device.get_vdma_interrupts_dispatcher();
@@ -700,7 +693,8 @@ Expected<std::pair<vdma::ChannelId, uint16_t>> ResourcesManager::create_mapped_b
     CHECK_AS_EXPECTED(IS_FIT_IN_UINT16(total_desc_count), HAILO_INVALID_ARGUMENT,
         "calculated total_desc_count for vdma descriptor list is out of UINT16 range");
 
-    auto mapped_buffer = vdma::MappedBuffer::create_shared(m_driver, direction, total_desc_count * desc_list->desc_page_size());
+    auto mapped_buffer = vdma::MappedBuffer::create_shared_by_allocation(
+        total_desc_count * desc_list->desc_page_size(), m_driver, direction);
     CHECK_EXPECTED(mapped_buffer);
     m_hw_only_boundary_buffers.emplace_back(mapped_buffer.release());
 
diff --git a/hailort/libhailort/src/core_op/resource_manager/resource_manager.hpp b/hailort/libhailort/src/core_op/resource_manager/resource_manager.hpp
index e187529..ea8f0d1 100644
--- a/hailort/libhailort/src/core_op/resource_manager/resource_manager.hpp
+++ b/hailort/libhailort/src/core_op/resource_manager/resource_manager.hpp
@@ -189,7 +189,6 @@ public:
     hailo_status enable_state_machine(uint16_t dynamic_batch_size, 
         uint16_t batch_count = CONTROL_PROTOCOL__INIFINITE_BATCH_COUNT);
     hailo_status reset_state_machine();
-    hailo_status cancel_pending_transfers();
     hailo_status start_vdma_interrupts_dispatcher();
     hailo_status stop_vdma_interrupts_dispatcher();
     Expected<uint16_t> get_network_batch_size(const std::string &network_name) const;
diff --git a/hailort/libhailort/src/core_op/resource_manager/resource_manager_builder.cpp b/hailort/libhailort/src/core_op/resource_manager/resource_manager_builder.cpp
index bff5613..26d8a1b 100644
--- a/hailort/libhailort/src/core_op/resource_manager/resource_manager_builder.cpp
+++ b/hailort/libhailort/src/core_op/resource_manager/resource_manager_builder.cpp
@@ -9,6 +9,7 @@
 
 #include "resource_manager_builder.hpp"
 #include "device_common/control.hpp"
+#include "periph_calculator.hpp"
 
 
 namespace hailort
@@ -75,122 +76,27 @@ static Expected<LayerInfo> calculate_credit_params(const CONTROL_PROTOCOL__hw_co
     return updated_layer_info;
 }
 
-// NOTE: in case of ddr where periph is aligned to PERIPH_BYTES_PER_BUFFER_DDR_ALIGNMENT_SIZE we cant force that
-// periph_bytes_per_buffer * periph_buffers_per_frame will equal exactly hw_frame_size.
-static bool is_logical_periph_bytes_per_buffer(const uint32_t periph_bytes_per_buffer, const size_t hw_frame_size, const bool is_ddr,
-    const uint32_t max_shmifo_size, const uint32_t desc_page_size, const uint32_t max_periph_bytes_value,
-    const uint16_t core_bytes_per_buffer)
-{
-    if (0 == periph_bytes_per_buffer) {
-        return false;
-    }
-
-    if (is_ddr) {
-        // In DDR there is no residue of descriptor - but has to divide with no remainder by core_bytes_per_buffer
-        // Calculated by DFC
-        return (periph_bytes_per_buffer < max_shmifo_size) && (periph_bytes_per_buffer <= max_periph_bytes_value) &&
-            (0 == (core_bytes_per_buffer % periph_bytes_per_buffer));
-    }
-    return ((periph_bytes_per_buffer < (max_shmifo_size - desc_page_size)) &&
-        (0 == (hw_frame_size % periph_bytes_per_buffer)) && (periph_bytes_per_buffer <= max_periph_bytes_value));
-}
-
-static Expected<std::tuple<uint16_t, uint16_t>> calculate_periph_requirements(const LayerInfo &layer_info, const uint32_t desc_page_size,
-    const bool is_periph_calculated_in_hailort, const uint32_t max_periph_bytes_value)
-{
-    // If extension for calculating periph values in hailort is false and hw padding is not supported - copy values from
-    // Core registers, calculate them according to shape and other layer information
-    const bool hw_padding_supported = HefConfigurator::is_hw_padding_supported(layer_info, max_periph_bytes_value);
-    if (!is_periph_calculated_in_hailort && !hw_padding_supported) {
-        return std::make_tuple(static_cast<uint16_t>(layer_info.nn_stream_config.core_bytes_per_buffer),
-            static_cast<uint16_t>(layer_info.nn_stream_config.core_buffers_per_frame));
-    }
-
-    if (HAILO_FORMAT_ORDER_HAILO_NMS == layer_info.format.order) {
-        CHECK_AS_EXPECTED(IS_FIT_IN_UINT16(layer_info.nms_info.bbox_size * layer_info.nms_info.burst_size),
-            HAILO_INVALID_HEF, "Invalid NMS parameters");
-        const auto nms_periph_bytes = static_cast<uint16_t>(layer_info.nms_info.bbox_size * layer_info.nms_info.burst_size);
-
-        const auto transfer_size = LayerInfoUtils::get_nms_layer_transfer_size(layer_info);
-        CHECK_AS_EXPECTED(IS_FIT_IN_UINT16(transfer_size / nms_periph_bytes), HAILO_INVALID_HEF, "Invalid NMS parameters");
-        // Will divide with no remainder seeing as transfer size is multiple of (bbox_size * burst_size)
-        assert(0 == (transfer_size % nms_periph_bytes));
-        const auto nms_periph_buffers = static_cast<uint16_t>(transfer_size / nms_periph_bytes);
-
-        // In NMS - update periph variables to represent size of frame in case of "interrupt per frame" (where we know frame size)
-        // Otherwise - size of burst / bbox (transfer size)
-        return std::make_tuple(nms_periph_bytes, nms_periph_buffers);
-    }
-
-    CHECK_AS_EXPECTED(IS_FIT_IN_UINT32(layer_info.hw_shape.width * layer_info.hw_shape.features *
-        layer_info.hw_shape.height * layer_info.hw_data_bytes), HAILO_INVALID_HEF, "Invalid core frame size");
-
-    const auto is_ddr = (LayerType::DDR == layer_info.type);
-    const uint32_t alignment = is_ddr ? PERIPH_BYTES_PER_BUFFER_DDR_ALIGNMENT_SIZE : PERIPH_BYTES_PER_BUFFER_ALIGNMENT_SIZE;
-    const auto row_size = static_cast<uint32_t>(layer_info.hw_shape.width * layer_info.hw_shape.features *
-        layer_info.hw_data_bytes);
-    const auto core_frame_size = layer_info.hw_shape.height * row_size;
-
-    // Currently takes the largest periph_bytes_per_buffer that is possible with shmifo size and desc page size
-    // TODO HRT-10961 : calculate optimal periph size
-    auto periph_bytes_per_buffer = HailoRTCommon::align_to(row_size, alignment);
-    while ((0 < periph_bytes_per_buffer) && !is_logical_periph_bytes_per_buffer(periph_bytes_per_buffer, core_frame_size,
-        is_ddr, layer_info.max_shmifo_size, desc_page_size, max_periph_bytes_value, layer_info.nn_stream_config.core_bytes_per_buffer)) {
-        periph_bytes_per_buffer -= alignment;
-    }
-
-    CHECK_AS_EXPECTED(0 != periph_bytes_per_buffer, HAILO_INVALID_ARGUMENT, "Error, Could not find logical periph bytes per buffer value");
-
-    uint32_t periph_buffers_per_frame = (core_frame_size / periph_bytes_per_buffer);
-    // In ddr - the core make sure that row size is aligned to PERIPH_BYTES_PER_BUFFER_DDR_ALIGNMENT_SIZE but if a row
-    // Is too large to fit in core bytes per buffer - they will divide it and put it in mutliple buffers - so in order to 
-    // Get the exact size in periph buffers per frame - we must muttiply core registers and divide by periph bytes per buffer 
-    if (is_ddr) {
-        periph_buffers_per_frame = layer_info.nn_stream_config.core_bytes_per_buffer *
-            layer_info.nn_stream_config.core_buffers_per_frame / periph_bytes_per_buffer;
-
-        // if we get a periph bytes per buffer so small that the periph buffers per frame cant fit in uint16
-        // put uint16_t max - seeing as this value doesnt really affect anything and we should not fail in that case.
-        if (!IS_FIT_IN_UINT16(periph_buffers_per_frame)) {
-            LOGGER__WARNING("periph buffers per frame in DDR too large - putting uint16_t max (This may affect HW infer estimator results");
-            periph_buffers_per_frame = UINT16_MAX;
-        }
-    }
-    CHECK_AS_EXPECTED(IS_FIT_IN_UINT16(periph_buffers_per_frame), HAILO_INVALID_ARGUMENT);
-
-    return std::make_tuple(static_cast<uint16_t>(periph_bytes_per_buffer), static_cast<uint16_t>(periph_buffers_per_frame));
-}
-
 static Expected<LayerInfo> update_layer_info(const LayerInfo &original_layer_info,
     const CONTROL_PROTOCOL__host_buffer_info_t &buffer_info,
     const CONTROL_PROTOCOL__hw_consts_t &hw_consts, const ProtoHEFHwArch &hw_arch, const bool should_optimize_credits,
-    const bool is_periph_calculated_in_hailort)
+    const bool is_periph_calculated_in_hailort, const bool is_core_hw_padding_config_in_dfc)
 {
     LayerInfo local_layer_info = original_layer_info;
 
+    // TODO HRT-12099 - remove when we remove support for hefs with no max_shmifo size
     if (local_layer_info.max_shmifo_size == 0) {
         local_layer_info.max_shmifo_size = hw_consts.default_initial_credit_size;
     }
 
     local_layer_info.nn_stream_config.is_periph_calculated_in_hailort = is_periph_calculated_in_hailort;
+    local_layer_info.nn_stream_config.is_core_hw_padding_config_in_dfc = is_core_hw_padding_config_in_dfc;
 
-    // If Hw padding supported dont update periph registers because they were updated in get_hw_padding
-    // TODO HRT-11006 : currently check is_hw_padding_supported and the feature_padding_payload because in MIPI Input stream
-    // Even if is_hw_padding_supported is true we will not use hw padding.
-    auto max_periph_bytes_from_hef = HefConfigurator::max_periph_bytes_value(DeviceBase::hef_arch_to_device_arch(hw_arch));
-    CHECK_EXPECTED(max_periph_bytes_from_hef);
-    const auto max_periph_bytes = MIN(max_periph_bytes_from_hef.value(), local_layer_info.max_shmifo_size);
-
-    const auto periph_requirements = calculate_periph_requirements(local_layer_info, buffer_info.desc_page_size,
-        is_periph_calculated_in_hailort, max_periph_bytes);
-    CHECK_EXPECTED(periph_requirements);
-
-    // Calculate and update value of periph bytes per buffer and periph buffers per frame
-    local_layer_info.nn_stream_config.periph_bytes_per_buffer = std::get<0>(periph_requirements.value());
-    local_layer_info.nn_stream_config.periph_buffers_per_frame = std::get<1>(periph_requirements.value());
+    auto updated_periph_layer_info = PeriphCalculator::calculate_periph_registers(local_layer_info,
+        buffer_info.desc_page_size, is_periph_calculated_in_hailort, hw_arch, is_core_hw_padding_config_in_dfc);
+    CHECK_EXPECTED(updated_periph_layer_info);
 
     auto updated_local_layer_info = calculate_credit_params(hw_consts, buffer_info.desc_page_size, should_optimize_credits,
-        local_layer_info);
+        updated_periph_layer_info.release());
     CHECK_EXPECTED(updated_local_layer_info);
 
     return updated_local_layer_info;
@@ -207,8 +113,9 @@ static hailo_status fill_boundary_input_layer_impl(ContextResources &context_res
 
     const auto buffer_info = vdma_channel.value()->get_boundary_buffer_info(transfer_size);
     const bool is_periph_calculated_in_hailort = resources_manager.get_supported_features().periph_calculation_in_hailort;
+    const bool is_core_hw_padding_config_in_dfc = resources_manager.get_supported_features().core_hw_padding_config_in_dfc;
     auto local_layer_info = update_layer_info(layer_info, buffer_info, hw_consts, hw_arch, should_optimize_credits,
-        is_periph_calculated_in_hailort);
+        is_periph_calculated_in_hailort, is_core_hw_padding_config_in_dfc);
     CHECK_EXPECTED_AS_STATUS(local_layer_info);
 
     const auto channel_id = vdma_channel.value()->get_channel_id();
@@ -252,8 +159,9 @@ static hailo_status fill_inter_context_input_layer(ContextResources &context_res
     auto &inter_context_buffer = inter_context_buffer_exp->get();
 
     const bool is_periph_calculated_in_hailort = resources_manager.get_supported_features().periph_calculation_in_hailort;
+    const bool is_core_hw_padding_config_in_dfc = resources_manager.get_supported_features().core_hw_padding_config_in_dfc;
     auto local_layer_info = update_layer_info(layer_info, inter_context_buffer.get_host_buffer_info(), hw_consts,
-        hw_arch, should_optimize_credits, is_periph_calculated_in_hailort);
+        hw_arch, should_optimize_credits, is_periph_calculated_in_hailort, is_core_hw_padding_config_in_dfc);
     CHECK_EXPECTED_AS_STATUS(local_layer_info);
 
     auto status = context_resources.add_edge_layer(local_layer_info.value(), channel_id.value(),
@@ -278,8 +186,9 @@ static hailo_status fill_boundary_output_layer(ContextResources &context_resourc
 
     const auto buffer_info = vdma_channel.value()->get_boundary_buffer_info(transfer_size);
     const bool is_periph_calculated_in_hailort = resources_manager.get_supported_features().periph_calculation_in_hailort;
+    const bool is_core_hw_padding_config_in_dfc = resources_manager.get_supported_features().core_hw_padding_config_in_dfc;
     auto local_layer_info = update_layer_info(layer_info, buffer_info, hw_consts, hw_arch, should_optimize_credits,
-        is_periph_calculated_in_hailort);
+        is_periph_calculated_in_hailort, is_core_hw_padding_config_in_dfc);
     CHECK_EXPECTED_AS_STATUS(local_layer_info);
 
     const auto channel_id = vdma_channel.value()->get_channel_id();
@@ -311,8 +220,9 @@ static hailo_status fill_inter_context_output_layer(ContextResources &context_re
     auto &inter_context_buffer = inter_context_buffer_exp->get();
 
     const bool is_periph_calculated_in_hailort = resources_manager.get_supported_features().periph_calculation_in_hailort;
+    const bool is_core_hw_padding_config_in_dfc = resources_manager.get_supported_features().core_hw_padding_config_in_dfc;
     auto local_layer_info = update_layer_info(layer_info, inter_context_buffer.get_host_buffer_info(), hw_consts,
-        hw_arch, should_optimize_credits, is_periph_calculated_in_hailort);
+        hw_arch, should_optimize_credits, is_periph_calculated_in_hailort, is_core_hw_padding_config_in_dfc);
     CHECK_EXPECTED_AS_STATUS(local_layer_info);
 
     auto status = context_resources.add_edge_layer(local_layer_info.value(), channel_id.value(),
@@ -376,8 +286,9 @@ static hailo_status fill_ddr_output_layer(ContextResources &context_resources,
     // optimize the credits.
     const bool should_optimize_credits = false;
     const bool is_periph_calculated_in_hailort = resources_manager.get_supported_features().periph_calculation_in_hailort;
+    const bool is_core_hw_padding_config_in_dfc = resources_manager.get_supported_features().core_hw_padding_config_in_dfc;
     auto local_layer_info = update_layer_info(layer_info, ddr_buffer->get().get_host_buffer_info(), hw_consts,
-        hw_arch, should_optimize_credits, is_periph_calculated_in_hailort);
+        hw_arch, should_optimize_credits, is_periph_calculated_in_hailort, is_core_hw_padding_config_in_dfc);
     CHECK_EXPECTED_AS_STATUS(local_layer_info);
 
     auto status = context_resources.add_edge_layer(local_layer_info.value(), ddr_pair_info.d2h_channel_id,
@@ -405,8 +316,9 @@ static hailo_status fill_ddr_input_layer(ContextResources &context_resources, Re
     // optimize the credits.
     const bool should_optimize_credits = false;
     const bool is_periph_calculated_in_hailort = resources_manager.get_supported_features().periph_calculation_in_hailort;
+    const bool is_core_hw_padding_config_in_dfc = resources_manager.get_supported_features().core_hw_padding_config_in_dfc;
     auto local_layer_info = update_layer_info(layer_info, ddr_info->host_buffer_info, hw_consts,
-        hw_arch, should_optimize_credits, is_periph_calculated_in_hailort);
+        hw_arch, should_optimize_credits, is_periph_calculated_in_hailort, is_core_hw_padding_config_in_dfc);
     CHECK_EXPECTED_AS_STATUS(local_layer_info);
 
     auto status = context_resources.add_edge_layer(local_layer_info.value(), ddr_info->h2d_channel_id,
@@ -681,7 +593,7 @@ static hailo_status proccess_write_ccw_action(const ContextSwitchConfigActionPtr
 static bool is_hailo1x_device_type(const hailo_device_architecture_t dev_arch)
 {
     // Compare with HAILO1X device archs
-    return (HAILO_ARCH_HAILO15H == dev_arch) || (HAILO_ARCH_PLUTO == dev_arch);
+    return (HAILO_ARCH_HAILO15H == dev_arch) || (HAILO_ARCH_HAILO15M == dev_arch) || (HAILO_ARCH_PLUTO == dev_arch);
 }
 
 static Expected<uint8_t> find_dummy_stream(const LayerInfo &layer_info, const ContextResources &context_resources,
diff --git a/hailort/libhailort/src/device_common/control.cpp b/hailort/libhailort/src/device_common/control.cpp
index f4a5558..b8d6aab 100644
--- a/hailort/libhailort/src/device_common/control.cpp
+++ b/hailort/libhailort/src/device_common/control.cpp
@@ -13,6 +13,7 @@
 #include "hef/hef_internal.hpp"
 #include "device_common/control.hpp"
 #include "hw_consts.hpp"
+#include "utils/soc_utils/partial_cluster_reader.hpp"
 
 #include "control_protocol.h"
 #include "byte_order.h"
@@ -38,6 +39,8 @@ namespace hailort
         "If doing continuous measurement, to enable overcurrent protection again you have to stop the power measurement on this dvm." \
     )
 
+#define FORCE_LAYOUT_INTERNAL_ENV_VAR "FORCE_LAYOUT_INTERNAL"
+
 typedef std::array<std::array<float64_t, CONTROL_PROTOCOL__POWER_MEASUREMENT_TYPES__COUNT>, CONTROL_PROTOCOL__DVM_OPTIONS_COUNT> power_conversion_multiplier_t;
 
 
@@ -3057,15 +3060,25 @@ Expected<CONTROL_PROTOCOL__get_extended_device_information_response_t> Control::
 
 Expected<uint32_t> Control::get_partial_clusters_layout_bitmap(Device &device)
 {
+    auto force_layout_env = std::getenv(FORCE_LAYOUT_INTERNAL_ENV_VAR);
+    if (force_layout_env) {
+        return std::stoi(std::string(force_layout_env));
+    }
+
     auto device_arch_exp = device.get_architecture();
     CHECK_EXPECTED(device_arch_exp);
-    if (HAILO_ARCH_HAILO8L != device_arch_exp.value()) {
-        // Partial clusters layout is only relevant in HAILO_ARCH_HAILO8L arch
+    if (HAILO_ARCH_HAILO8L != device_arch_exp.value() && HAILO_ARCH_HAILO15M != device_arch_exp.value()) {
+        // Partial clusters layout is only relevant in HAILO_ARCH_HAILO8L and HAILO_ARCH_HAILO15M arch
         return Expected<uint32_t>(PARTIAL_CLUSTERS_LAYOUT_IGNORE);
     }
-    auto extended_device_info_response = get_extended_device_info_response(device);
-    CHECK_EXPECTED(extended_device_info_response);
-    return BYTE_ORDER__ntohl(extended_device_info_response->partial_clusters_layout_bitmap);
+
+    if (HAILO_ARCH_HAILO15M == device_arch_exp.value()) {
+        return PartialClusterReader::get_partial_clusters_layout_bitmap(device_arch_exp.value());
+    } else {
+        auto extended_device_info_response = get_extended_device_info_response(device);
+        CHECK_EXPECTED(extended_device_info_response);
+        return BYTE_ORDER__ntohl(extended_device_info_response->partial_clusters_layout_bitmap);
+    }
 }
 
 Expected<hailo_extended_device_information_t> Control::get_extended_device_information(Device &device)
diff --git a/hailort/libhailort/src/device_common/device.cpp b/hailort/libhailort/src/device_common/device.cpp
index 22bb85a..57f6fb3 100644
--- a/hailort/libhailort/src/device_common/device.cpp
+++ b/hailort/libhailort/src/device_common/device.cpp
@@ -387,6 +387,29 @@ hailo_status Device::set_sleep_state(hailo_sleep_state_t sleep_state)
     return Control::set_sleep_state(*this, sleep_state);
 }
 
+hailo_status Device::dma_map(void *address, size_t size, hailo_stream_direction_t direction)
+{
+    (void) address;
+    (void) size;
+    (void) direction;
+    return HAILO_NOT_IMPLEMENTED;
+}
+
+hailo_status Device::dma_unmap(void *address, hailo_stream_direction_t direction)
+{
+    (void) address;
+    (void) direction;
+    return HAILO_NOT_IMPLEMENTED;
+}
+
+Expected<std::pair<vdma::MappedBufferPtr, bool>> Device::try_dma_map(vdma::DmaAbleBufferPtr buffer,
+    hailo_stream_direction_t direction)
+{
+    (void) buffer;
+    (void) direction;
+    return make_unexpected(HAILO_NOT_IMPLEMENTED);
+}
+
 hailo_status Device::direct_write_memory(uint32_t address, const void *buffer, uint32_t size)
 {
     (void) address;
diff --git a/hailort/libhailort/src/device_common/device_internal.cpp b/hailort/libhailort/src/device_common/device_internal.cpp
index 99c03f5..eca7a3a 100644
--- a/hailort/libhailort/src/device_common/device_internal.cpp
+++ b/hailort/libhailort/src/device_common/device_internal.cpp
@@ -166,7 +166,7 @@ Expected<firmware_type_t> DeviceBase::get_fw_type()
     if ((architecture.value() == HAILO_ARCH_HAILO8) || (architecture.value() == HAILO_ARCH_HAILO8L)) {
         firmware_type = FIRMWARE_TYPE_HAILO8;
     }
-    else if (architecture.value() == HAILO_ARCH_HAILO15H) {
+    else if ((architecture.value() == HAILO_ARCH_HAILO15H ) || (architecture.value() == HAILO_ARCH_HAILO15M)) {
         firmware_type = FIRMWARE_TYPE_HAILO15;
     }
     else if (architecture.value() == HAILO_ARCH_PLUTO) {
@@ -724,9 +724,11 @@ bool DeviceBase::is_hef_compatible(hailo_device_architecture_t device_arch, Prot
     case HAILO_ARCH_HAILO15H:
         // Compare with HW_ARCH__LAVENDER and HW_ARCH__GINGER to support hefs compiled for them
         return (hef_arch == PROTO__HW_ARCH__GINGER) || (hef_arch == PROTO__HW_ARCH__LAVENDER) ||
-            (hef_arch == PROTO__HW_ARCH__HAILO15H);
+            (hef_arch == PROTO__HW_ARCH__HAILO15H) || (hef_arch == PROTO__HW_ARCH__HAILO15M);
     case HAILO_ARCH_PLUTO:
         return (hef_arch == PROTO__HW_ARCH__PLUTO);
+    case HAILO_ARCH_HAILO15M:
+        return (hef_arch == PROTO__HW_ARCH__HAILO15M);
     default:
         return false;
     }
@@ -751,6 +753,8 @@ hailo_device_architecture_t DeviceBase::hef_arch_to_device_arch(ProtoHEFHwArch h
         return HAILO_ARCH_HAILO15H;
     case PROTO__HW_ARCH__PLUTO:
         return HAILO_ARCH_PLUTO;
+    case PROTO__HW_ARCH__HAILO15M:
+        return HAILO_ARCH_HAILO15M;
 
     default:
         return HAILO_ARCH_MAX_ENUM;
diff --git a/hailort/libhailort/src/eth/eth_stream.cpp b/hailort/libhailort/src/eth/eth_stream.cpp
index 56bb9bb..61164eb 100644
--- a/hailort/libhailort/src/eth/eth_stream.cpp
+++ b/hailort/libhailort/src/eth/eth_stream.cpp
@@ -83,7 +83,7 @@ hailo_status EthernetInputStream::deactivate_stream()
 
     // Aborting the stream to make sure all read/writes will exit.
     // Note - on ethernet stream there is no true "clear_abort" - one abort was called, the socket can't be reused.
-    status = abort();
+    status = abort_impl();
     CHECK_SUCCESS(status);
 
     return HAILO_SUCCESS;
@@ -94,7 +94,7 @@ hailo_status EthernetInputStream::activate_stream()
     hailo_status status = HAILO_UNINITIALIZED;
     CONTROL_PROTOCOL__config_stream_params_t params = {};
     
-    params.nn_stream_config = m_nn_stream_config;
+    params.nn_stream_config = m_layer_info.nn_stream_config;
     params.communication_type = CONTROL_PROTOCOL__COMMUNICATION_TYPE_UDP;
     params.is_input = true;
     params.stream_index = m_stream_info.index;
@@ -435,7 +435,7 @@ std::chrono::milliseconds EthernetInputStream::get_timeout() const
     return std::chrono::milliseconds((MILLISECONDS_IN_SECOND * m_udp.m_timeout.tv_sec) + (m_udp.m_timeout.tv_usec / MICROSECONDS_IN_MILLISECOND));
 }
 
-hailo_status EthernetInputStream::abort()
+hailo_status EthernetInputStream::abort_impl()
 {
     return m_udp.abort();
 }
@@ -466,7 +466,7 @@ hailo_status EthernetOutputStream::deactivate_stream()
 
     // Aborting the stream to make sure all read/writes will exit.
     // Note - on ethernet stream there is no true "clear_abort" - one abort was called, the socket can't be reused.
-    status = abort();
+    status = abort_impl();
     CHECK_SUCCESS(status);
 
     return HAILO_SUCCESS;
@@ -477,7 +477,7 @@ hailo_status EthernetOutputStream::activate_stream()
     hailo_status status = HAILO_UNINITIALIZED;
     CONTROL_PROTOCOL__config_stream_params_t params = {};
 
-    params.nn_stream_config = m_nn_stream_config;
+    params.nn_stream_config = m_layer_info.nn_stream_config;
     params.communication_type = CONTROL_PROTOCOL__COMMUNICATION_TYPE_UDP;
     params.is_input = false;
     params.stream_index = m_stream_info.index;
@@ -733,7 +733,7 @@ std::chrono::milliseconds EthernetOutputStream::get_timeout() const
     return std::chrono::milliseconds((MILLISECONDS_IN_SECOND * m_udp.m_timeout.tv_sec) + (m_udp.m_timeout.tv_usec / MICROSECONDS_IN_MILLISECOND));
 }
 
-hailo_status EthernetOutputStream::abort()
+hailo_status EthernetOutputStream::abort_impl()
 {
     return m_udp.abort();
 }
diff --git a/hailort/libhailort/src/eth/eth_stream.hpp b/hailort/libhailort/src/eth/eth_stream.hpp
index c726519..d18698f 100644
--- a/hailort/libhailort/src/eth/eth_stream.hpp
+++ b/hailort/libhailort/src/eth/eth_stream.hpp
@@ -66,7 +66,7 @@ protected:
 
 public:
     EthernetInputStream(Device &device, Udp &&udp, EventPtr &&core_op_activated_event, const LayerInfo &layer_info, hailo_status &status) :
-        InputStreamBase(layer_info, HAILO_STREAM_INTERFACE_ETH, std::move(core_op_activated_event), status), m_udp(std::move(udp)), m_device(device) {}
+        InputStreamBase(layer_info, std::move(core_op_activated_event), status), m_udp(std::move(udp)), m_device(device) {}
     virtual ~EthernetInputStream();
 
     static Expected<std::unique_ptr<EthernetInputStream>> create(Device &device,
@@ -83,8 +83,8 @@ public:
     virtual hailo_status deactivate_stream() override;
     virtual hailo_stream_interface_t get_interface() const override { return HAILO_STREAM_INTERFACE_ETH; }
     virtual std::chrono::milliseconds get_timeout() const override;
-    virtual hailo_status abort() override;
-    virtual hailo_status clear_abort() override {return HAILO_SUCCESS;}; // TODO (HRT-3799): clear abort state in the eth stream
+    virtual hailo_status abort_impl() override;
+    virtual hailo_status clear_abort_impl() override {return HAILO_SUCCESS;}; // TODO (HRT-3799): clear abort state in the eth stream
 };
 
 class EthernetInputStreamRateLimited : public EthernetInputStream {
@@ -146,7 +146,7 @@ private:
     Device &m_device;
 
     EthernetOutputStream(Device &device, const LayerInfo &edge_layer, Udp &&udp, EventPtr &&core_op_activated_event, hailo_status &status) :
-        OutputStreamBase(edge_layer, HAILO_STREAM_INTERFACE_ETH, std::move(core_op_activated_event), status),
+        OutputStreamBase(edge_layer, std::move(core_op_activated_event), status),
         leftover_buffer(),
         leftover_size(0),
         // Firmware starts sending sync sequence from 0, so treating the first previous as max value (that will be overflowed to 0)
@@ -188,8 +188,8 @@ public:
     virtual hailo_status deactivate_stream() override;
     virtual hailo_stream_interface_t get_interface() const override { return HAILO_STREAM_INTERFACE_ETH; }
     virtual std::chrono::milliseconds get_timeout() const override;
-    virtual hailo_status abort() override;
-    virtual hailo_status clear_abort() override {return HAILO_SUCCESS;}; // TODO (HRT-3799): clear abort state in the eth stream
+    virtual hailo_status abort_impl() override;
+    virtual hailo_status clear_abort_impl() override {return HAILO_SUCCESS;}; // TODO (HRT-3799): clear abort state in the eth stream
 };
 
 } /* namespace hailort */
diff --git a/hailort/libhailort/src/eth/hcp_config_core_op.cpp b/hailort/libhailort/src/eth/hcp_config_core_op.cpp
index abc2f85..c410890 100644
--- a/hailort/libhailort/src/eth/hcp_config_core_op.cpp
+++ b/hailort/libhailort/src/eth/hcp_config_core_op.cpp
@@ -103,4 +103,9 @@ hailo_status HcpConfigCoreOp::deactivate_impl()
     return HAILO_SUCCESS;
 }
 
+hailo_status HcpConfigCoreOp::shutdown()
+{
+    return abort_low_level_streams();
+}
+
 } /* namespace hailort */
diff --git a/hailort/libhailort/src/eth/hcp_config_core_op.hpp b/hailort/libhailort/src/eth/hcp_config_core_op.hpp
index a580a90..a273e96 100644
--- a/hailort/libhailort/src/eth/hcp_config_core_op.hpp
+++ b/hailort/libhailort/src/eth/hcp_config_core_op.hpp
@@ -51,6 +51,7 @@ public:
 
     virtual hailo_status activate_impl(uint16_t dynamic_batch_size) override;
     virtual hailo_status deactivate_impl() override;
+    virtual hailo_status shutdown() override;
     virtual Expected<HwInferResults> run_hw_infer_estimator() override;
 
     virtual ~HcpConfigCoreOp() = default;
diff --git a/hailort/libhailort/src/hailort.cpp b/hailort/libhailort/src/hailort.cpp
index 68c2f58..6f1faa8 100644
--- a/hailort/libhailort/src/hailort.cpp
+++ b/hailort/libhailort/src/hailort.cpp
@@ -51,20 +51,6 @@ using namespace hailort;
 // the storage.
 using ExportedBufferManager = ExportedResourceManager<BufferPtr, void *>;
 
-struct ThreeTupleHash {
-    template<typename T>
-    std::size_t operator()(const T& tuple) const {
-        auto hash = std::hash<typename std::tuple_element<0, T>::type>()(std::get<0>(tuple));
-        hash ^= std::hash<typename std::tuple_element<1, T>::type>()(std::get<1>(tuple));
-        hash ^= std::hash<typename std::tuple_element<2, T>::type>()(std::get<2>(tuple));
-        return hash;
-    }
-};
-
-// (buffer_addr, device_id, mapping_direction)
-using DmaMappingKey = std::tuple<void *, std::string, hailo_dma_buffer_direction_t>;
-using DmaMappingManager = ExportedResourceManager<DmaStoragePtr, DmaMappingKey, ThreeTupleHash>;
-
 COMPAT__INITIALIZER(hailort__initialize_logger)
 {
     // Init logger singleton if compiling only HailoRT
@@ -1126,15 +1112,6 @@ hailo_status hailo_free_buffer(void *buffer)
     return ExportedBufferManager::unregister_resource(buffer);
 }
 
-static Expected<DmaMappingKey> get_mapping_key(void *buffer, hailo_device device, hailo_dma_buffer_direction_t direction)
-{
-    hailo_device_id_t device_id{};
-    auto status = hailo_get_device_id(device, &device_id);
-    CHECK_SUCCESS_AS_EXPECTED(status);
-
-    return std::make_tuple(buffer, std::string(device_id.id), direction);
-}
-
 // TODO: hailo_dma_map_buffer_to_device/hailo_dma_unmap_buffer_from_device aren't thread safe when crossed with
 //       hailo_allocate_buffer/hailo_free_buffer (HRT-10669)
 hailo_status hailo_dma_map_buffer_to_device(void *buffer, size_t size, hailo_device device, hailo_dma_buffer_direction_t direction)
@@ -1149,39 +1126,17 @@ hailo_status hailo_dma_map_buffer_to_device(void *buffer, size_t size, hailo_dev
         // The mapping is held by the Buffer object
         auto mapping_result = hailort_allocated_buffer->get()->storage().dma_map(*reinterpret_cast<Device*>(device), direction);
         CHECK_EXPECTED_AS_STATUS(mapping_result);
-        const auto new_mapping = mapping_result.value();
-
-        if (!new_mapping) {
-            return HAILO_DMA_MAPPING_ALREADY_EXISTS;
-        }
-    } else {
-        // The buffer has been allocated by the user
-        // Create dma storage
-        auto dma_mapped_buffer = DmaStorage::create_from_user_address(buffer, size, direction, *reinterpret_cast<Device*>(device));
-        CHECK_EXPECTED_AS_STATUS(dma_mapped_buffer);
-        assert(buffer == dma_mapped_buffer.value()->user_address());
-        auto dma_mapped_buffer_ptr = dma_mapped_buffer.release();
-
-        // Store the mapping in manager (otherwise it'll be freed at the end of this func)
-        auto key = get_mapping_key(dma_mapped_buffer_ptr->user_address(), device, direction);
-        CHECK_EXPECTED_AS_STATUS(key);
-        const auto status = DmaMappingManager::register_resource(dma_mapped_buffer_ptr, key.release());
-        if (HAILO_INVALID_ARGUMENT == status) {
-            // TODO: This will change once we allow mapping the same buffer in different directions (HRT-10656).
-            //       Checking that the mapping exists will need to be at DmaStorage's level
-            return HAILO_DMA_MAPPING_ALREADY_EXISTS;
-        }
-        CHECK_SUCCESS(status);
+        const auto is_new_mapping = mapping_result.value();
+        return is_new_mapping ? HAILO_SUCCESS : HAILO_DMA_MAPPING_ALREADY_EXISTS;
     }
 
-    return HAILO_SUCCESS;
+    // The buffer has been allocated by the user
+    return reinterpret_cast<Device*>(device)->dma_map(buffer, size,
+        (HAILO_DMA_BUFFER_DIRECTION_H2D == direction) ? HAILO_H2D_STREAM : HAILO_D2H_STREAM);
 }
 
 hailo_status hailo_dma_unmap_buffer_from_device(void *buffer, hailo_device device, hailo_dma_buffer_direction_t direction)
 {
-    // TODO: support mapping the same buffer in different directions (HRT-10656)
-    (void)direction;
-
     CHECK_ARG_NOT_NULL(buffer);
     CHECK_ARG_NOT_NULL(device);
 
@@ -1193,9 +1148,9 @@ hailo_status hailo_dma_unmap_buffer_from_device(void *buffer, hailo_device devic
         return HAILO_SUCCESS;
     }
 
-    auto key = get_mapping_key(buffer, device, direction);
-    CHECK_EXPECTED_AS_STATUS(key);
-    return DmaMappingManager::unregister_resource(key.release());
+    // The buffer has been allocated by the user
+    return reinterpret_cast<Device*>(device)->dma_unmap(buffer,
+        (HAILO_DMA_BUFFER_DIRECTION_H2D == direction) ? HAILO_H2D_STREAM : HAILO_D2H_STREAM);
 }
 
 hailo_status hailo_calculate_eth_input_rate_limits(hailo_hef hef, const char *network_group_name, uint32_t fps,
@@ -1439,10 +1394,16 @@ hailo_status hailo_deactivate_network_group(hailo_activated_network_group activa
 
     auto net_group_casted = reinterpret_cast<ActivatedNetworkGroup*>(activated_network_group);
     delete net_group_casted;
-    
+
     return HAILO_SUCCESS;
 }
 
+hailo_status hailo_shutdown_network_group(hailo_configured_network_group network_group)
+{
+    CHECK_ARG_NOT_NULL(network_group);
+    return reinterpret_cast<ConfiguredNetworkGroup *>(network_group)->shutdown();
+}
+
 hailo_status hailo_set_notification_callback(hailo_device device, hailo_notification_callback callback,
     hailo_notification_id_t notification_id, void *opaque)
 {
@@ -2015,7 +1976,7 @@ hailo_status hailo_hef_get_bottleneck_fps(hailo_hef hef, const char *network_gro
     return HAILO_SUCCESS;
 }
 
-hailo_status hailo_make_input_vstream_params(hailo_configured_network_group network_group, bool quantized,
+hailo_status hailo_make_input_vstream_params(hailo_configured_network_group network_group, bool /*unused*/,
     hailo_format_type_t format_type, hailo_input_vstream_params_by_name_t *input_params,
     size_t *input_params_count)
 {
@@ -2024,7 +1985,7 @@ hailo_status hailo_make_input_vstream_params(hailo_configured_network_group netw
     CHECK_ARG_NOT_NULL(input_params_count);
 
     auto net_group_ptr = reinterpret_cast<ConfiguredNetworkGroup*>(network_group);
-    auto input_params_map = net_group_ptr->make_input_vstream_params(quantized, format_type, 
+    auto input_params_map = net_group_ptr->make_input_vstream_params({}, format_type, 
         HAILO_DEFAULT_VSTREAM_TIMEOUT_MS , HAILO_DEFAULT_VSTREAM_QUEUE_SIZE);
     CHECK_EXPECTED_AS_STATUS(input_params_map);
 
@@ -2050,7 +2011,7 @@ hailo_status hailo_make_input_vstream_params(hailo_configured_network_group netw
     return HAILO_SUCCESS;
 }
 
-hailo_status hailo_make_output_vstream_params(hailo_configured_network_group network_group, bool quantized,
+hailo_status hailo_make_output_vstream_params(hailo_configured_network_group network_group, bool /*unused*/,
     hailo_format_type_t format_type, hailo_output_vstream_params_by_name_t *output_vstream_params,
     size_t *output_params_count)
 {
@@ -2059,7 +2020,7 @@ hailo_status hailo_make_output_vstream_params(hailo_configured_network_group net
     CHECK_ARG_NOT_NULL(output_params_count);
 
     auto net_group_ptr = reinterpret_cast<ConfiguredNetworkGroup*>(network_group);
-    auto output_params_map = net_group_ptr->make_output_vstream_params(quantized, format_type,
+    auto output_params_map = net_group_ptr->make_output_vstream_params({}, format_type,
         HAILO_DEFAULT_VSTREAM_TIMEOUT_MS , HAILO_DEFAULT_VSTREAM_QUEUE_SIZE);
     CHECK_EXPECTED_AS_STATUS(output_params_map);
 
@@ -2597,14 +2558,14 @@ HAILORTAPI hailo_status hailo_get_network_infos(hailo_configured_network_group n
 }
 
 HAILORTAPI hailo_status hailo_hef_make_input_vstream_params(hailo_hef hef, const char *name, 
-    bool quantized, hailo_format_type_t format_type, 
+    bool /*unused*/, hailo_format_type_t format_type, 
     hailo_input_vstream_params_by_name_t *input_params, size_t *input_params_count)
 {
     CHECK_ARG_NOT_NULL(input_params);
     CHECK_ARG_NOT_NULL(input_params_count);
     const auto name_str = get_name_as_str(name);
 
-    auto input_params_map = (reinterpret_cast<Hef*>(hef))->make_input_vstream_params(name_str, quantized, format_type, 
+    auto input_params_map = (reinterpret_cast<Hef*>(hef))->make_input_vstream_params(name_str, {}, format_type, 
         HAILO_DEFAULT_VSTREAM_TIMEOUT_MS , HAILO_DEFAULT_VSTREAM_QUEUE_SIZE);
     CHECK_EXPECTED_AS_STATUS(input_params_map);
 
@@ -2631,14 +2592,14 @@ HAILORTAPI hailo_status hailo_hef_make_input_vstream_params(hailo_hef hef, const
 }
 
 hailo_status hailo_hef_make_output_vstream_params(hailo_hef hef, const char *name,
-    bool quantized, hailo_format_type_t format_type, 
+    bool /*unused*/, hailo_format_type_t format_type, 
     hailo_output_vstream_params_by_name_t *output_vstream_params, size_t *output_params_count)
 {
     CHECK_ARG_NOT_NULL(output_vstream_params);
     CHECK_ARG_NOT_NULL(output_params_count);
     const auto name_str = get_name_as_str(name);
 
-    auto output_params_map = (reinterpret_cast<Hef*>(hef))->make_output_vstream_params(name_str, quantized, format_type,
+    auto output_params_map = (reinterpret_cast<Hef*>(hef))->make_output_vstream_params(name_str, {}, format_type,
         HAILO_DEFAULT_VSTREAM_TIMEOUT_MS , HAILO_DEFAULT_VSTREAM_QUEUE_SIZE);
     CHECK_EXPECTED_AS_STATUS(output_params_map);
 
diff --git a/hailort/libhailort/src/hailort_defaults.cpp b/hailort/libhailort/src/hailort_defaults.cpp
index 9424b7c..5c819a8 100644
--- a/hailort/libhailort/src/hailort_defaults.cpp
+++ b/hailort/libhailort/src/hailort_defaults.cpp
@@ -143,26 +143,21 @@ hailo_format_t HailoRTDefaults::get_user_buffer_format()
     return get_user_buffer_format(true, HAILO_FORMAT_TYPE_AUTO);
 }
 
-hailo_format_t HailoRTDefaults::get_user_buffer_format(bool quantized, hailo_format_type_t format_type)
+hailo_format_t HailoRTDefaults::get_user_buffer_format(bool /*unused*/, hailo_format_type_t format_type)
 {
     hailo_format_t user_buffer_format{};
     user_buffer_format.type = format_type;
     user_buffer_format.order = HAILO_FORMAT_ORDER_AUTO;
+    user_buffer_format.flags = HAILO_FORMAT_FLAGS_NONE;
 
-    hailo_format_flags_t flags = HAILO_FORMAT_FLAGS_NONE;
-    if (quantized) {
-        flags = static_cast<hailo_format_flags_t>(flags | HAILO_FORMAT_FLAGS_QUANTIZED);
-    }
-
-    user_buffer_format.flags = flags;
     return user_buffer_format;
 }
 
-hailo_transform_params_t HailoRTDefaults::get_transform_params(bool quantized, hailo_format_type_t format_type)
+hailo_transform_params_t HailoRTDefaults::get_transform_params(bool /*unused*/, hailo_format_type_t format_type)
 {
     hailo_transform_params_t params{};
     params.transform_mode = HAILO_STREAM_TRANSFORM_COPY;
-    params.user_buffer_format = get_user_buffer_format(quantized, format_type);
+    params.user_buffer_format = get_user_buffer_format({}, format_type);
     return params;
 }
 
@@ -176,10 +171,10 @@ hailo_vstream_params_t HailoRTDefaults::get_vstreams_params()
     return get_vstreams_params(true, HAILO_FORMAT_TYPE_AUTO);
 }
 
-hailo_vstream_params_t HailoRTDefaults::get_vstreams_params(bool quantized, hailo_format_type_t format_type)
+hailo_vstream_params_t HailoRTDefaults::get_vstreams_params(bool /*unused*/, hailo_format_type_t format_type)
 {
     hailo_vstream_params_t params{};
-    params.user_buffer_format = get_user_buffer_format(quantized, format_type);
+    params.user_buffer_format = get_user_buffer_format({}, format_type);
     params.queue_size = HAILO_DEFAULT_VSTREAM_QUEUE_SIZE;
     params.timeout_ms = HAILO_DEFAULT_VSTREAM_TIMEOUT_MS;
     params.vstream_stats_flags = HAILO_VSTREAM_STATS_NONE;
@@ -193,9 +188,7 @@ hailo_transform_params_t HailoRTDefaults::get_transform_params(const hailo_strea
     params.transform_mode = HAILO_STREAM_TRANSFORM_COPY;
     params.user_buffer_format.type = stream_info.format.type;
     params.user_buffer_format.order = get_default_host_format_order(stream_info.format);
-    params.user_buffer_format.flags = static_cast<hailo_format_flags_t>(
-        HAILO_FORMAT_FLAGS_QUANTIZED &
-        ~HAILO_FORMAT_FLAGS_TRANSPOSED);
+    params.user_buffer_format.flags = HAILO_FORMAT_FLAGS_NONE;
     return params;
 }
 
diff --git a/hailort/libhailort/src/hef/context_switch_actions.cpp b/hailort/libhailort/src/hef/context_switch_actions.cpp
index 7c8b031..b060466 100644
--- a/hailort/libhailort/src/hef/context_switch_actions.cpp
+++ b/hailort/libhailort/src/hef/context_switch_actions.cpp
@@ -906,6 +906,7 @@ static CONTEXT_SWITCH_DEFS__stream_reg_info_t parse_nn_config(const CONTROL_PROT
     reg_info.periph_bytes_per_buffer = nn_config.periph_bytes_per_buffer;
     reg_info.periph_buffers_per_frame = nn_config.periph_buffers_per_frame;
     reg_info.is_periph_calculated_in_hailort = nn_config.is_periph_calculated_in_hailort;
+    reg_info.is_core_hw_padding_config_in_dfc = nn_config.is_core_hw_padding_config_in_dfc;
     return reg_info;
 }
 
diff --git a/hailort/libhailort/src/hef/core_op_metadata.cpp b/hailort/libhailort/src/hef/core_op_metadata.cpp
index c7ef727..c600123 100644
--- a/hailort/libhailort/src/hef/core_op_metadata.cpp
+++ b/hailort/libhailort/src/hef/core_op_metadata.cpp
@@ -328,7 +328,6 @@ Expected<std::vector<hailo_stream_info_t>> CoreOpMetadata::get_all_stream_infos(
     return res;
 }
 
-
 size_t CoreOpMetadata::get_contexts_count()
 {
     return (m_dynamic_contexts.size() + CONTROL_PROTOCOL__CONTEXT_SWITCH_NUMBER_OF_NON_DYNAMIC_CONTEXTS);
@@ -524,7 +523,7 @@ Expected<std::vector<std::string>> NetworkGroupMetadata::get_stream_names_from_v
 
     auto all_layers_infos = get_all_layer_infos(m_core_ops_metadata_per_arch);
     CHECK_EXPECTED(all_layers_infos);
-    for (auto &layer_info : all_layers_infos.release()) {
+    for (auto &layer_info : all_layers_infos.value()) {
         if (layer_info.is_mux) {
             if (is_edge_under_mux(layer_info, vstream_name)) {
                 // vstream_name is a demux of the layer info
@@ -538,7 +537,12 @@ Expected<std::vector<std::string>> NetworkGroupMetadata::get_stream_names_from_v
         } else if (vstream_name == layer_info.name) {
             // Multi planar case
             if (layer_info.is_multi_planar) {
-                for (auto &plane : layer_info.planes) {
+                auto planes = layer_info.planes;
+                // In multi-planar case we need to sort the streams based on their plane index -> we count on order to know which plane belongs to which stream
+                std::sort(planes.begin(), planes.end(), [](const auto &a, const auto & b) {
+                    return a.plane_index < b.plane_index;
+                });
+                for (const auto &plane : planes) {
                     results.push_back(plane.name);
                 }
             } else {
@@ -548,6 +552,7 @@ Expected<std::vector<std::string>> NetworkGroupMetadata::get_stream_names_from_v
         }
     }
     CHECK_AS_EXPECTED(0 < results.size(), HAILO_NOT_FOUND, "Did not found vstream {}", vstream_name);
+
     return results;
 }
 
diff --git a/hailort/libhailort/src/hef/core_op_metadata.hpp b/hailort/libhailort/src/hef/core_op_metadata.hpp
index 365164f..73a3f4d 100644
--- a/hailort/libhailort/src/hef/core_op_metadata.hpp
+++ b/hailort/libhailort/src/hef/core_op_metadata.hpp
@@ -30,6 +30,7 @@ struct SupportedFeatures {
     bool nms_burst_mode = false;
     bool output_scale_by_feature = false;
     bool periph_calculation_in_hailort = false;
+    bool core_hw_padding_config_in_dfc = false;
 };
 
 // For each config_stream_index we store vector of all ccw write length. The vector is used to build the config buffer.g
diff --git a/hailort/libhailort/src/hef/hef.cpp b/hailort/libhailort/src/hef/hef.cpp
index 38130ef..7e959ec 100644
--- a/hailort/libhailort/src/hef/hef.cpp
+++ b/hailort/libhailort/src/hef/hef.cpp
@@ -29,6 +29,7 @@
 #include "net_flow/ops/argmax_post_process.hpp"
 #include "net_flow/ops/softmax_post_process.hpp"
 #include "net_flow/ops/yolov5_seg_post_process.hpp"
+#include "net_flow/ops/yolov8_post_process.hpp"
 #include "hef/hef_internal.hpp"
 #include "vdma/pcie/pcie_device.hpp"
 #include "vdma/vdma_config_manager.hpp"
@@ -175,7 +176,7 @@ Hef::Hef(std::unique_ptr<Impl> pimpl) :
     pimpl(std::move(pimpl))
 {}
 
-Expected<std::vector<hailo_stream_info_t>> Hef::get_input_stream_infos(const std::string &name)
+Expected<std::vector<hailo_stream_info_t>> Hef::get_input_stream_infos(const std::string &name) const
 {
     auto network_pair = pimpl->get_network_group_and_network_name(name);
     CHECK_EXPECTED(network_pair);
@@ -183,7 +184,7 @@ Expected<std::vector<hailo_stream_info_t>> Hef::get_input_stream_infos(const std
     return pimpl->get_input_stream_infos(network_pair.value().first, network_pair.value().second);
 }
 
-Expected<std::vector<hailo_stream_info_t>> Hef::get_output_stream_infos(const std::string &name)
+Expected<std::vector<hailo_stream_info_t>> Hef::get_output_stream_infos(const std::string &name) const
 {
     auto network_pair = pimpl->get_network_group_and_network_name(name);
     CHECK_EXPECTED(network_pair);
@@ -191,7 +192,7 @@ Expected<std::vector<hailo_stream_info_t>> Hef::get_output_stream_infos(const st
     return pimpl->get_output_stream_infos(network_pair.value().first, network_pair.value().second);
 }
 
-Expected<std::vector<hailo_stream_info_t>> Hef::get_all_stream_infos(const std::string &name)
+Expected<std::vector<hailo_stream_info_t>> Hef::get_all_stream_infos(const std::string &name) const
 {
     auto network_pair = pimpl->get_network_group_and_network_name(name);
     CHECK_EXPECTED(network_pair);
@@ -199,7 +200,7 @@ Expected<std::vector<hailo_stream_info_t>> Hef::get_all_stream_infos(const std::
     return pimpl->get_all_stream_infos(network_pair.value().first, network_pair.value().second);
 }
 
-Expected<std::vector<hailo_network_info_t>> Hef::get_network_infos(const std::string &net_group_name)
+Expected<std::vector<hailo_network_info_t>> Hef::get_network_infos(const std::string &net_group_name) const
 {
     auto names_pair = pimpl->get_network_group_and_network_name(net_group_name);
     CHECK_EXPECTED(names_pair);
@@ -207,7 +208,7 @@ Expected<std::vector<hailo_network_info_t>> Hef::get_network_infos(const std::st
 }
 
 Expected<hailo_stream_info_t> Hef::get_stream_info_by_name(const std::string &stream_name,
-    hailo_stream_direction_t stream_direction, const std::string &net_group_name)
+    hailo_stream_direction_t stream_direction, const std::string &net_group_name) const
 {
     // Addressing the situation where net_group_name == ""
     auto net_group_name_pair = pimpl->get_network_group_and_network_name(net_group_name);
@@ -217,7 +218,7 @@ Expected<hailo_stream_info_t> Hef::get_stream_info_by_name(const std::string &st
     return pimpl->get_stream_info_by_name(stream_name, stream_direction, net_group_name_str);
 }
 
-Expected<std::vector<hailo_vstream_info_t>> Hef::get_input_vstream_infos(const std::string &name)
+Expected<std::vector<hailo_vstream_info_t>> Hef::get_input_vstream_infos(const std::string &name) const
 {
     auto network_pair = pimpl->get_network_group_and_network_name(name);
     CHECK_EXPECTED(network_pair);
@@ -225,7 +226,7 @@ Expected<std::vector<hailo_vstream_info_t>> Hef::get_input_vstream_infos(const s
     return pimpl->get_input_vstream_infos(network_pair.value().first, network_pair.value().second);
 }
 
-Expected<std::vector<hailo_vstream_info_t>> Hef::get_output_vstream_infos(const std::string &name)
+Expected<std::vector<hailo_vstream_info_t>> Hef::get_output_vstream_infos(const std::string &name) const
 {
     auto network_pair = pimpl->get_network_group_and_network_name(name);
     CHECK_EXPECTED(network_pair);
@@ -233,7 +234,7 @@ Expected<std::vector<hailo_vstream_info_t>> Hef::get_output_vstream_infos(const
     return pimpl->get_output_vstream_infos(network_pair.value().first, network_pair.value().second);
 }
 
-Expected<std::vector<hailo_vstream_info_t>> Hef::get_all_vstream_infos(const std::string &name)
+Expected<std::vector<hailo_vstream_info_t>> Hef::get_all_vstream_infos(const std::string &name) const
 {
     auto network_pair = pimpl->get_network_group_and_network_name(name);
     CHECK_EXPECTED(network_pair);
@@ -241,7 +242,7 @@ Expected<std::vector<hailo_vstream_info_t>> Hef::get_all_vstream_infos(const std
     return pimpl->get_all_vstream_infos(network_pair.value().first, network_pair.value().second);
 }
 
-Expected<std::vector<std::string>> Hef::get_sorted_output_names(const std::string &net_group_name)
+Expected<std::vector<std::string>> Hef::get_sorted_output_names(const std::string &net_group_name) const
 {
     // Addressing the situation where net_group_name == ""
     auto net_group_name_pair = pimpl->get_network_group_and_network_name(net_group_name);
@@ -251,7 +252,7 @@ Expected<std::vector<std::string>> Hef::get_sorted_output_names(const std::strin
     return pimpl->get_sorted_output_names(net_group_name_str);
 }
 
-Expected<size_t> Hef::get_number_of_input_streams(const std::string &net_group_name)
+Expected<size_t> Hef::get_number_of_input_streams(const std::string &net_group_name) const
 {
     // Addressing the situation where net_group_name == ""
     auto net_group_name_pair = pimpl->get_network_group_and_network_name(net_group_name);
@@ -261,7 +262,7 @@ Expected<size_t> Hef::get_number_of_input_streams(const std::string &net_group_n
     return pimpl->get_number_of_input_streams(net_group_name_str);
 }
 
-Expected<size_t> Hef::get_number_of_output_streams(const std::string &net_group_name)
+Expected<size_t> Hef::get_number_of_output_streams(const std::string &net_group_name) const
 {
     // Addressing the situation where net_group_name == ""
     auto net_group_name_pair = pimpl->get_network_group_and_network_name(net_group_name);
@@ -271,13 +272,13 @@ Expected<size_t> Hef::get_number_of_output_streams(const std::string &net_group_
     return pimpl->get_number_of_output_streams(net_group_name_str);
 }
 
-Expected<float64_t> Hef::get_bottleneck_fps(const std::string &net_group_name)
+Expected<float64_t> Hef::get_bottleneck_fps(const std::string &net_group_name) const
 {
     return pimpl->get_bottleneck_fps(net_group_name);
 }
 
 
-Expected<hailo_device_architecture_t> Hef::get_hef_device_arch()
+Expected<hailo_device_architecture_t> Hef::get_hef_device_arch() const
 {
     return DeviceBase::hef_arch_to_device_arch(pimpl->get_device_arch());
 }
@@ -288,19 +289,19 @@ Expected<std::string> Hef::device_arch_to_string(const hailo_device_architecture
 }
 
 Expected<std::string> Hef::get_vstream_name_from_original_name(const std::string &original_name,
-    const std::string &net_group_name)
+    const std::string &net_group_name) const
 {
     return pimpl->get_vstream_name_from_original_name(original_name, net_group_name);
 }
 
 Expected<std::vector<std::string>> Hef::get_original_names_from_vstream_name(const std::string &stream_name,
-    const std::string &net_group_name)
+    const std::string &net_group_name) const
 {
     return pimpl->get_original_names_from_vstream_name(stream_name, net_group_name);
 }
 
 Expected<std::vector<std::string>> Hef::get_stream_names_from_vstream_name(const std::string &vstream_name,
-    const std::string &net_group_name)
+    const std::string &net_group_name) const
 {
     auto network_group_name_pair = pimpl->get_network_group_and_network_name(net_group_name);
     CHECK_EXPECTED(network_group_name_pair);
@@ -310,7 +311,7 @@ Expected<std::vector<std::string>> Hef::get_stream_names_from_vstream_name(const
 }
 
 Expected<std::vector<std::string>> Hef::get_vstream_names_from_stream_name(const std::string &stream_name,
-    const std::string &net_group_name)
+    const std::string &net_group_name) const
 {
     auto network_group_name_pair = pimpl->get_network_group_and_network_name(net_group_name);
     CHECK_EXPECTED(network_group_name_pair);
@@ -493,6 +494,11 @@ hailo_status Hef::Impl::parse_hef_memview(const MemoryView &hef_memview)
     return HAILO_SUCCESS;
 }
 
+
+bool is_multi_layout(const ProtoHEFHwArch &hw_arch) {
+    return (hw_arch == ProtoHEFHwArch::PROTO__HW_ARCH__HAILO8L) || (hw_arch == ProtoHEFHwArch::PROTO__HW_ARCH__HAILO15M);
+}
+
 hailo_status Hef::Impl::fill_networks_metadata()
 {
     fill_extensions_bitset();
@@ -529,7 +535,7 @@ hailo_status Hef::Impl::fill_networks_metadata()
             sorted_network_names.push_back(HailoRTDefaults::get_network_name(network_group_name));
         }
 
-        if (ProtoHEFHwArch::PROTO__HW_ARCH__HAILO8L == get_device_arch()) {
+        if (is_multi_layout(get_device_arch())) {
             if (m_supported_features.hailo_net_flow) {
                 for (auto &partial_core_op : core_op.partial_core_ops) {
                     partial_clusters_layout_bitmap = partial_core_op->layout.partial_clusters_layout_bitmap();
@@ -844,6 +850,8 @@ SupportedFeatures Hef::Impl::get_supported_features(const ProtoHEFHeader &header
         header, hef_extensions, included_features);
     supported_features.periph_calculation_in_hailort = check_hef_extension(ProtoHEFExtensionType::PERIPH_CALCULATION_IN_HAILORT,
         header, hef_extensions, included_features);
+    supported_features.core_hw_padding_config_in_dfc = check_hef_optional_extension(ProtoHEFExtensionType::HW_PADDING,
+        header, hef_optional_extensions);
 
     return supported_features;
 }
@@ -957,6 +965,51 @@ Expected<net_flow::PostProcessOpMetadataPtr> create_yolov5_seg_op_metadata(const
         yolov5_seg_config, network_name);
 }
 
+Expected<net_flow::PostProcessOpMetadataPtr> create_yolov8_op_metadata(const ProtoHEFOp &op_proto,
+    const std::map<size_t, LayerInfo> &pad_index_to_streams_info, const std::map<size_t, size_t> &input_to_output_pads,
+    const std::string &network_name)
+{
+    auto nms_config = create_post_process_nms_config(op_proto);
+
+    net_flow::Yolov8PostProcessConfig yolov8_config{};
+    yolov8_config.image_height = (float32_t)op_proto.nms_op().yolov8_nms_op().image_height();
+    yolov8_config.image_width = (float32_t)op_proto.nms_op().yolov8_nms_op().image_width();
+
+    std::unordered_map<std::string, net_flow::BufferMetaData> inputs_metadata;
+    std::unordered_map<std::string, net_flow::BufferMetaData> outputs_metadata;
+    net_flow::BufferMetaData output_metadata{};
+    output_metadata.format = net_flow::NmsOpMetadata::expand_output_format_autos_by_op_type(
+        { HAILO_FORMAT_TYPE_AUTO, HAILO_FORMAT_ORDER_AUTO, HAILO_FORMAT_FLAGS_NONE }, net_flow::OperationType::YOLOV8);
+    outputs_metadata.insert({op_proto.output_pads()[0].name(), output_metadata});
+
+    for (auto &bbox_proto : op_proto.nms_op().yolov8_nms_op().bbox_decoders()) {
+        assert(contains(pad_index_to_streams_info, static_cast<size_t>(bbox_proto.reg_pad_index())));
+        auto reg_name = pad_index_to_streams_info.at(bbox_proto.reg_pad_index()).name;
+        assert(contains(pad_index_to_streams_info, static_cast<size_t>(bbox_proto.cls_pad_index())));
+        auto cls_name = pad_index_to_streams_info.at(bbox_proto.cls_pad_index()).name;
+        yolov8_config.reg_to_cls_inputs.emplace_back(net_flow::Yolov8MatchingLayersNames{reg_name, cls_name, bbox_proto.stride()});
+    }
+
+    for (auto &input_pad : op_proto.input_pads()) {
+        CHECK_AS_EXPECTED(contains(input_to_output_pads, static_cast<size_t>(input_pad.index())), HAILO_INVALID_HEF,
+            "NMS op is not connected to core op");
+        auto output_pad_index = input_to_output_pads.at(input_pad.index());
+        CHECK_AS_EXPECTED(contains(pad_index_to_streams_info, output_pad_index), HAILO_INVALID_HEF,
+            "Pad {} of post-process {} is not connected to any core output stream",
+                input_pad.index(), op_proto.name());
+        const auto &op_input_stream = pad_index_to_streams_info.at(output_pad_index);
+        net_flow::BufferMetaData input_metadata{};
+        input_metadata.format = op_input_stream.format;
+        input_metadata.quant_info = op_input_stream.quant_info;
+        input_metadata.shape = op_input_stream.shape;
+        input_metadata.padded_shape = op_input_stream.hw_shape;
+        inputs_metadata.insert({op_input_stream.name, input_metadata});
+    }
+
+    return net_flow::Yolov8OpMetadata::create(inputs_metadata, outputs_metadata, nms_config, yolov8_config,
+        network_name);
+}
+
 Expected<net_flow::PostProcessOpMetadataPtr> create_yolox_op_metadata(const ProtoHEFOp &op_proto,
     const std::map<size_t, LayerInfo> &pad_index_to_streams_info, const std::map<size_t, size_t> &input_to_output_pads,
     const std::string &network_name)
@@ -974,17 +1027,17 @@ Expected<net_flow::PostProcessOpMetadataPtr> create_yolox_op_metadata(const Prot
         { HAILO_FORMAT_TYPE_AUTO, HAILO_FORMAT_ORDER_AUTO, HAILO_FORMAT_FLAGS_NONE }, net_flow::OperationType::YOLOX);
     outputs_metadata.insert({op_proto.output_pads()[0].name(), output_metadata});
 
-    for (auto &bbox_proto : op_proto.nms_op().yolox_nms_op().bbox_decoders()) {
+    for (const auto &bbox_proto : op_proto.nms_op().yolox_nms_op().bbox_decoders()) {
         assert(contains(pad_index_to_streams_info, static_cast<size_t>(bbox_proto.reg_pad_index())));
         auto reg_name = pad_index_to_streams_info.at(bbox_proto.reg_pad_index()).name;
         assert(contains(pad_index_to_streams_info, static_cast<size_t>(bbox_proto.cls_pad_index())));
         auto cls_name = pad_index_to_streams_info.at(bbox_proto.cls_pad_index()).name;
         assert(contains(pad_index_to_streams_info, static_cast<size_t>(bbox_proto.obj_pad_index())));
         auto obj_name = pad_index_to_streams_info.at(bbox_proto.obj_pad_index()).name;
-        yolox_config.input_names.emplace_back(net_flow::MatchingLayersNames{reg_name, obj_name, cls_name});
+        yolox_config.input_names.emplace_back(net_flow::YoloxMatchingLayersNames{reg_name, obj_name, cls_name});
     }
 
-    for (auto &input_pad : op_proto.input_pads()) {
+    for (const auto &input_pad : op_proto.input_pads()) {
         CHECK_AS_EXPECTED(contains(input_to_output_pads, static_cast<size_t>(input_pad.index())), HAILO_INVALID_HEF,
             "NMS op is not connected to core op");
         auto output_pad_index = input_to_output_pads.at(input_pad.index());
@@ -1070,14 +1123,14 @@ Expected<net_flow::PostProcessOpMetadataPtr> create_ssd_op_metadata(const ProtoH
 }
 
 Expected<std::shared_ptr<net_flow::OpMetadata>> create_argmax_op_metadata(const LayerInfo &op_input_layer_info, const ProtoHEFPad &output_pad,
-    const std::string &output_name, const bool &is_hw_padding_supported, const std::string &network_name)
+    const std::string &output_name, const bool &is_core_hw_padding_supported, const std::string &network_name)
 {
     // create input meta
     std::unordered_map<std::string, hailort::net_flow::BufferMetaData> inputs_metadata;
     hailort::net_flow::BufferMetaData input_metadata{};
     input_metadata.shape = op_input_layer_info.shape;
     // If padding is done in HW, the padded shape is as the shape (TODO: Remove once HRT support hw_padding from DFC)
-    if (is_hw_padding_supported) {
+    if (is_core_hw_padding_supported) {
         input_metadata.padded_shape = input_metadata.shape;
     } else {
         input_metadata.padded_shape = op_input_layer_info.hw_shape;
@@ -1169,7 +1222,7 @@ Expected<std::shared_ptr<net_flow::OpMetadata>> create_softmax_op_metadata(const
 
 Expected<std::shared_ptr<net_flow::OpMetadata>> create_logits_op_metadata(const ProtoHEFOp &op_proto,
     const std::map<size_t, LayerInfo> &pad_index_to_streams_info, const std::map<size_t, size_t> &input_to_output_pads,
-    const ProtoHEFHwArch &hef_arch, const std::string &network_name)
+    const ProtoHEFHwArch &hef_arch, const std::string &network_name, const bool is_core_hw_padding_config_in_dfc)
 {
     // connect input_streams to net_flow element
     CHECK_AS_EXPECTED(op_proto.input_pads().size() == 1, HAILO_INVALID_HEF, "Logits op must have 1 input only");
@@ -1188,13 +1241,17 @@ Expected<std::shared_ptr<net_flow::OpMetadata>> create_logits_op_metadata(const
     const auto &op_input_layer_info = pad_index_to_streams_info.at(output_pad_index);
     auto max_periph_bytes_from_hef = HefConfigurator::max_periph_bytes_value(DeviceBase::hef_arch_to_device_arch(hef_arch));
     CHECK_EXPECTED(max_periph_bytes_from_hef);
-    const auto max_periph_bytes = (0 == op_input_layer_info.max_shmifo_size) ? max_periph_bytes_from_hef.value():
-        MIN(max_periph_bytes_from_hef.value(), op_input_layer_info.max_shmifo_size);
-    const auto is_hw_padding_supported = HefConfigurator::is_hw_padding_supported(op_input_layer_info, max_periph_bytes);
+
+    // TODO HRT-12099 - return invalid hef error when remove support for hefs with no max_shmifo size
+    const auto max_periph_bytes = (0 == op_input_layer_info.max_shmifo_size) ? max_periph_bytes_from_hef.value() :
+        std::min(max_periph_bytes_from_hef.value(), op_input_layer_info.max_shmifo_size);
+    const auto is_core_hw_padding_supported = HefConfigurator::is_core_hw_padding_supported(op_input_layer_info,
+        max_periph_bytes, is_core_hw_padding_config_in_dfc);
 
     switch (op_proto.logits_op().logits_type()) {
         case ProtoHEFLogitsType::PROTO_HEF_ARGMAX_TYPE: {
-            return create_argmax_op_metadata(op_input_layer_info, output_pad, output_pad.name(), is_hw_padding_supported, network_name);
+            return create_argmax_op_metadata(op_input_layer_info, output_pad, output_pad.name(),
+                is_core_hw_padding_supported, network_name);
         }
         case ProtoHEFLogitsType::PROTO_HEF_SOFTMAX_TYPE: {
             return create_softmax_op_metadata(op_input_layer_info, output_pad, output_pad.name(), network_name);
@@ -1280,6 +1337,13 @@ Expected<std::vector<net_flow::PostProcessOpMetadataPtr>> Hef::Impl::create_ops_
                         post_process_op_metadata = expected_post_process_op_metadata.release();
                         break;
                     }
+                    case ProtoHEFNmsOp::kYolov8NmsOp: {
+                        auto expected_post_process_op_metadata = create_yolov8_op_metadata(op_proto, pad_index_to_streams_info,
+                           input_to_output_pads, network_name);
+                        CHECK_EXPECTED(expected_post_process_op_metadata);
+                        post_process_op_metadata = expected_post_process_op_metadata.release();
+                        break;
+                    }
                     default: {
                         LOGGER__ERROR("Unsupported Net-Flow NMS-Op");
                         return make_unexpected(HAILO_INTERNAL_FAILURE);
@@ -1291,7 +1355,7 @@ Expected<std::vector<net_flow::PostProcessOpMetadataPtr>> Hef::Impl::create_ops_
             }
             case ProtoHEFOp::kLogitsOp: {
                 auto expected_logits_op_metadata = create_logits_op_metadata(op_proto, pad_index_to_streams_info,
-                    input_to_output_pads, hef_arch, network_name);
+                    input_to_output_pads, hef_arch, network_name, m_supported_features.core_hw_padding_config_in_dfc);
                 CHECK_EXPECTED(expected_logits_op_metadata);
                 auto post_process_op_metadata = expected_logits_op_metadata.release();
 
@@ -1336,42 +1400,6 @@ hailo_status Hef::Impl::validate_boundary_streams_were_created(const std::string
     return HAILO_SUCCESS;
 }
 
-Expected<CONTROL_PROTOCOL__nn_stream_config_t> HefConfigurator::parse_nn_stream_config(uint32_t width,
-    uint32_t hw_data_bytes, uint16_t core_buffers_per_frame, uint16_t core_bytes_per_buffer, bool hw_padding_supported, bool is_ddr,
-    uint16_t periph_buffers_per_frame, uint16_t periph_bytes_per_buffer)
-{
-    CONTROL_PROTOCOL__nn_stream_config_t stream_config = {};
-
-    stream_config.core_buffers_per_frame = core_buffers_per_frame;
-    stream_config.core_bytes_per_buffer = core_bytes_per_buffer;
-
-    stream_config.periph_buffers_per_frame = periph_buffers_per_frame;
-    stream_config.periph_bytes_per_buffer = periph_bytes_per_buffer;
-
-    // Set default to false and in case of extension enabled update value
-    stream_config.is_periph_calculated_in_hailort = false;
-
-    /* For DDR buffering - core buffers is depended on the amount of buffers per PCIe interrupt. No HW padding required */
-    if (is_ddr) {
-        stream_config.feature_padding_payload = 0;
-    } else {
-        if (hw_padding_supported) {
-            // We currently only support HW padding in hailort with format HAILO_FORMAT_ORDER_NHCW - which is padded by feature
-            // Padding should not affect the periph register values.
-            const uint32_t feature_padding_payload_32bit = static_cast<uint32_t>(width) * static_cast<uint32_t>(hw_data_bytes);
-            CHECK_AS_EXPECTED(IS_FIT_IN_UINT16(width * hw_data_bytes), HAILO_INVALID_HEF, "frame width {} is too big",
-                feature_padding_payload_32bit);
-            stream_config.feature_padding_payload = static_cast<uint16_t>(feature_padding_payload_32bit);
-        } else {
-            stream_config.feature_padding_payload = 0;
-        }
-        /* For now, no support for buffer padding */
-        stream_config.buffer_padding_payload = 0;
-        stream_config.buffer_padding = 0;
-    }
-    return stream_config;
-}
-
 Expected<CONTROL_PROTOCOL__nn_stream_config_t> HefConfigurator::parse_nn_stream_config(const ProtoHEFEdgeLayerBase &edge_layer,
     bool hw_padding_supported, const ProtoHEFEdgeConnectionType &edge_connection_type)
 {
@@ -1379,44 +1407,41 @@ Expected<CONTROL_PROTOCOL__nn_stream_config_t> HefConfigurator::parse_nn_stream_
         "core_bytes_per_buffer is too big");
     CHECK_AS_EXPECTED(IS_FIT_IN_UINT16(edge_layer.core_buffers_per_frame()), HAILO_INVALID_HEF,
         "core_buffers_per_frame is too big");
-
-    auto is_ddr = ProtoHEFEdgeConnectionType::PROTO__EDGE_CONNECTION_TYPE__DDR == edge_connection_type;
-
+    CHECK_AS_EXPECTED(!((ProtoHEFEdgeConnectionType::PROTO__EDGE_CONNECTION_TYPE__DDR == edge_connection_type) &&
+        hw_padding_supported), HAILO_INVALID_HEF, "DDR layer can't have hw_padding_supported");
     CHECK_AS_EXPECTED(IS_FIT_IN_UINT32(edge_layer.padded_width() * edge_layer.padded_features() *
         edge_layer.padded_height() * edge_layer.data_bytes()), HAILO_INVALID_HEF, "padded shape too big");
 
+    CONTROL_PROTOCOL__nn_stream_config_t stream_config = {};
+
+    stream_config.core_buffers_per_frame = static_cast<uint16_t>(edge_layer.core_buffers_per_frame());
+    stream_config.core_bytes_per_buffer = static_cast<uint16_t>(edge_layer.core_bytes_per_buffer());
+
     // TODO HRT-10993: Remove these parameters for the parse_nn_stream_config function call
-    // These values will get overrided in update_layer_info in resource_manager_builder - except in case of
-    // where we dont have resource manager (ethernet)
-    uint16_t initial_periph_bytes_per_buffer = static_cast<uint16_t>(edge_layer.core_bytes_per_buffer());
-    const uint16_t INITIAL_PERIPH_BUFFERS_PER_FRAME = static_cast<uint16_t>(edge_layer.core_buffers_per_frame());
+    // Initial periph register values - these values will get overrided in update_layer_info in resource_manager_builder,
+    //  except in case of where we dont have resource manager (ethernet)
+    stream_config.periph_buffers_per_frame = static_cast<uint16_t>(edge_layer.core_buffers_per_frame());
+    stream_config.periph_bytes_per_buffer = static_cast<uint16_t>(edge_layer.core_bytes_per_buffer());
 
     // If hw padding is enabled - and shape fits in uint16t - change initial periph value to be row size - in any case
     // Will get updated if there is resource manager - and in ethernet will have either core register values - and if hw 
     // padding will have hw padding values
     if (hw_padding_supported) {
         if (IS_FIT_IN_UINT16(edge_layer.width() * edge_layer.features() * edge_layer.data_bytes())) {
-            initial_periph_bytes_per_buffer = static_cast<uint16_t>(edge_layer.width() * edge_layer.features() *
+            stream_config.periph_bytes_per_buffer = static_cast<uint16_t>(edge_layer.width() * edge_layer.features() *
                 edge_layer.data_bytes());
         }
-    }
 
-    // Width and features only used in case hw_padding is supported. In that case, they represent the HW shape (without padding)
-    return parse_nn_stream_config(edge_layer.width(),edge_layer.data_bytes(), static_cast<uint16_t>(edge_layer.core_buffers_per_frame()),
-        static_cast<uint16_t>(edge_layer.core_bytes_per_buffer()), hw_padding_supported, is_ddr,
-        INITIAL_PERIPH_BUFFERS_PER_FRAME, initial_periph_bytes_per_buffer);
-}
-
-Expected<CONTROL_PROTOCOL__nn_stream_config_t> HefConfigurator::parse_nn_stream_config(const LayerInfo &edge_layer, bool hw_padding_supported)
-{
-    // TODO HRT-7177 - pass interface to layer info instead of re-calculated Layer info from stream_internal.hpp
-    // After passing stream interface, there is no need for this function. Just use CONTROL_PROTOCOL__nn_stream_config_t from layer info. 
-    assert(LayerType::BOUNDARY == edge_layer.type);
-    const auto is_ddr = false; // This function is called only on boundary layers, so no DDR
+        // We currently only support HW padding in hailort with format HAILO_FORMAT_ORDER_NHCW - which is padded by feature
+        // Padding should not affect the periph register values.
+        const uint32_t feature_padding_payload_32bit = static_cast<uint32_t>(edge_layer.width()) *
+            static_cast<uint32_t>(edge_layer.data_bytes());
+        CHECK_AS_EXPECTED(IS_FIT_IN_UINT16(static_cast<uint32_t>(edge_layer.width()) * static_cast<uint32_t>(edge_layer.data_bytes())),
+            HAILO_INVALID_HEF, "frame width {} is too big", feature_padding_payload_32bit);
+        stream_config.feature_padding_payload = static_cast<uint16_t>(feature_padding_payload_32bit);
+    }
 
-    return parse_nn_stream_config(edge_layer.hw_shape.width, edge_layer.hw_data_bytes, edge_layer.nn_stream_config.core_buffers_per_frame, 
-        edge_layer.nn_stream_config.core_bytes_per_buffer, hw_padding_supported, is_ddr, edge_layer.nn_stream_config.periph_buffers_per_frame, 
-        edge_layer.nn_stream_config.periph_bytes_per_buffer);
+    return stream_config;
 }
 
 // TODO HRT-11452: change to use hw consts
@@ -1428,61 +1453,70 @@ Expected<uint32_t> HefConfigurator::max_periph_bytes_value(const hailo_device_ar
         case HAILO_ARCH_HAILO8L:
             return HAILO8_INBOUND_DATA_STREAM_SIZE;
         case HAILO_ARCH_HAILO15H:
+        case HAILO_ARCH_HAILO15M:
         case HAILO_ARCH_PLUTO:
-            return HAILO15_PERIPH_BYTES_PER_BUFFER_MAX_SIZE;
+            return HAILO1X_PERIPH_BYTES_PER_BUFFER_MAX_SIZE;
         default:
             LOGGER__ERROR("Unknown device architecture!");
             return make_unexpected(HAILO_INVALID_ARGUMENT);
     }
 }
 
-// TODO HRT-11006: remove this function when hw padding is removed from InputStreamBase / OutputStreamBase constructor
-Expected<uint32_t> HefConfigurator::max_periph_bytes_value(const hailo_stream_interface_t interface)
+Expected<uint32_t> HefConfigurator::max_periph_padding_payload_value(const hailo_device_architecture_t hw_arch)
 {
-    switch (interface) {
-        case HAILO_STREAM_INTERFACE_ETH:
-        case HAILO_STREAM_INTERFACE_MIPI:
-        case HAILO_STREAM_INTERFACE_PCIE:
-            return HAILO8_INBOUND_DATA_STREAM_SIZE;
-        case HAILO_STREAM_INTERFACE_INTEGRATED:
-            return HAILO15_PERIPH_BYTES_PER_BUFFER_MAX_SIZE;
+    switch (hw_arch) {
+        case HAILO_ARCH_HAILO8_A0:
+        case HAILO_ARCH_HAILO8:
+        case HAILO_ARCH_HAILO8L:
+            return HAILO8_PERIPH_PAYLOAD_MAX_VALUE;
+        case HAILO_ARCH_HAILO15H:
+        case HAILO_ARCH_HAILO15M:
+        case HAILO_ARCH_PLUTO:
+            return HAILO1X_PERIPH_PAYLOAD_MAX_VALUE;
         default:
-            LOGGER__ERROR("Unknown stream interface!");
+            LOGGER__ERROR("Unknown device architecture!");
             return make_unexpected(HAILO_INVALID_ARGUMENT);
     }
 }
 
-bool HefConfigurator::is_hw_padding_supported(bool is_boundary, bool is_mux, hailo_format_order_t format_order,
-    uint16_t core_buffers_per_frame, uint32_t height, uint32_t width, uint32_t features, uint32_t hw_data_bytes,
-    const uint32_t max_periph_bytes_value)
+bool HefConfigurator::is_core_hw_padding_supported(const LayerInfo &layer_info, const uint32_t max_periph_bytes_value,
+    const bool is_core_hw_padding_config_in_dfc)
 {
-    if (!is_boundary || is_mux) {
+    if (!(LayerType::BOUNDARY == layer_info.type) || layer_info.is_mux || is_core_hw_padding_config_in_dfc) {
         return false;
     }
 
     // TODO: HRT-4462 support more orders
-    switch (format_order)
+    switch (layer_info.format.order)
     {
     case HAILO_FORMAT_ORDER_NHCW:
         break;
     default:
-        LOGGER__DEBUG("HW padding is not supported for format {} ", format_order);
+        LOGGER__DEBUG("HW padding is not supported for format {} ", layer_info.format.order);
         return false;
     }
 
-    if (core_buffers_per_frame != height) {
+    /* If the network is transposed, the width and height are swapped in LayerInfo c'tor, so need to swap it again for calculations */
+    auto height = layer_info.shape.height;
+    auto width = layer_info.shape.width;
+    if (layer_info.format.flags & HAILO_FORMAT_FLAGS_TRANSPOSED) {
+        std::swap(height, width);
+    }
+
+
+    if (layer_info.nn_stream_config.core_buffers_per_frame != height) {
         // TODO: HRT-3278
         LOGGER__DEBUG("HW padding is supported only on layers with core_buffers_per_frame == height");
         return false;
     }
 
-    if (((width * features) % 8) != 0) {
+    if (((width * layer_info.shape.features) % 8) != 0) {
         // TODO: HRT-963 support chunks
         LOGGER__DEBUG("HW padding is supported only when periph_bytes_per_buffer is a multiple of 8");
         return false;
     }
 
-    if ((width * features * hw_data_bytes) > (max_periph_bytes_value - 1)) {
+    if ((width * layer_info.shape.features * layer_info.hw_data_bytes) > (max_periph_bytes_value - 1)) {
         // TODO: HRT-4177
         LOGGER__DEBUG("HW padding is supported only on layers with shape size < stream size");
         return false;
@@ -1490,43 +1524,6 @@ bool HefConfigurator::is_hw_padding_supported(bool is_boundary, bool is_mux, hai
     return true;
 }
 
-bool HefConfigurator::is_hw_padding_supported(const LayerInfo &layer_info, const uint32_t max_periph_bytes_value)
-{
-    /* If the network is transposed, the width and height are swapped in LayerInfo c'tor, so need to swap it again for calculations */
-    auto height = layer_info.shape.height;
-    auto width = layer_info.shape.width;
-    if (layer_info.format.flags & HAILO_FORMAT_FLAGS_TRANSPOSED) {
-        std::swap(height, width);
-    }
-
-    auto is_boundary = (LayerType::BOUNDARY == layer_info.type);
-    return is_hw_padding_supported(is_boundary, layer_info.is_mux, layer_info.format.order,
-        layer_info.nn_stream_config.core_buffers_per_frame, height, width, 
-        layer_info.shape.features, layer_info.hw_data_bytes, max_periph_bytes_value);
-}
-
-bool HefConfigurator::is_hw_padding_supported(const ProtoHEFEdgeLayer &edge_layer, const uint32_t max_periph_bytes_value)
-{
-    auto is_boundary = (ProtoHEFEdgeConnectionType::PROTO__EDGE_CONNECTION_TYPE__BOUNDARY == edge_layer.context_switch_info().edge_connection_type());
-    auto is_mux = (ProtoHEFEdgeLayerType::PROTO__EDGE_LAYER_TYPE__MUX == edge_layer.edge_layer_type());
-    auto edge_layer_base = edge_layer.layer_info().edge_layer_base();
-    auto format_order_exp = HailoRTDefaults::get_device_format_order(edge_layer_base.format());
-    if (!format_order_exp) {
-        LOGGER__DEBUG("Failed to get format order. Not enabling hw padding");
-        return false;
-    }
-
-    if (!IS_FIT_IN_UINT16(edge_layer_base.core_buffers_per_frame())) {
-        LOGGER__DEBUG("Invalid core_buffers_per_frame. Not enabling hw padding");
-        return false;
-    }
-
-    auto format_order = format_order_exp.release();
-    return is_hw_padding_supported(is_boundary, is_mux, format_order, static_cast<uint16_t>(edge_layer_base.core_buffers_per_frame()),
-        edge_layer_base.height(), edge_layer_base.width(), edge_layer_base.features(), edge_layer_base.data_bytes(),
-        max_periph_bytes_value);
-}
-
 Expected<std::vector<hailo_stream_info_t>> Hef::Impl::get_input_stream_infos(const std::string &net_group_name,
     const std::string &network_name)
 {
@@ -1673,7 +1670,7 @@ Expected<std::pair<std::string, std::string>> Hef::Impl::get_network_group_and_n
     std::string network_group_name;
     if (name.empty()) {
         // Name is not given - addressing all networks in the first network_group
-        network_group_name = (ProtoHEFHwArch::PROTO__HW_ARCH__HAILO8L == get_device_arch()) ?
+        network_group_name = is_multi_layout(get_device_arch()) ?
             m_groups[0]->partial_network_groups(0).network_group().network_group_metadata().network_group_name()
             : m_groups[0]->network_group_metadata().network_group_name();
         LOGGER__INFO("No name was given. Addressing all networks of default network_group: {}",
@@ -1684,7 +1681,7 @@ Expected<std::pair<std::string, std::string>> Hef::Impl::get_network_group_and_n
         const ProtoHEFNetworkGroup *network_group_ptr = nullptr;
         for (const auto &network_group : m_groups) {
             // TODO: Handle new HEFs
-            network_group_ptr = (ProtoHEFHwArch::PROTO__HW_ARCH__HAILO8L == get_device_arch()) ?
+            network_group_ptr = is_multi_layout(get_device_arch()) ?
                 &network_group->partial_network_groups(0).network_group()
                 : network_group.get();
             network_group_name = network_group_ptr->network_group_metadata().network_group_name();
@@ -1721,7 +1718,7 @@ Expected<std::shared_ptr<ProtoHEFCoreOpMock>> Hef::Impl::get_core_op_by_net_grou
         auto network_group_name = HefUtils::get_network_group_name(*network_group_ptr, m_supported_features);
         LOGGER__INFO("No network_group name was given. Addressing default network_group: {}", network_group_name);
         const auto &core_op = m_core_ops_per_group[network_group_name][0];
-        if (ProtoHEFHwArch::PROTO__HW_ARCH__HAILO8L == get_device_arch()) {
+        if (is_multi_layout(get_device_arch())) {
             auto partial_core_op = core_op.partial_core_ops[0];
             return std::make_shared<ProtoHEFCoreOpMock>(*(partial_core_op->core_op));
         }
@@ -1730,7 +1727,7 @@ Expected<std::shared_ptr<ProtoHEFCoreOpMock>> Hef::Impl::get_core_op_by_net_grou
     CHECK_AS_EXPECTED(contains(m_core_ops_per_group, net_group_name), HAILO_NOT_FOUND,
         "HEF does not contain network_group with name {}", net_group_name);
     const auto &core_op = m_core_ops_per_group[net_group_name][0];
-    if (ProtoHEFHwArch::PROTO__HW_ARCH__HAILO8L == get_device_arch()) {
+    if (is_multi_layout(get_device_arch())) {
         auto partial_core_op = core_op.partial_core_ops[0];
         return std::make_shared<ProtoHEFCoreOpMock>(*(partial_core_op->core_op));
     }
@@ -1772,34 +1769,30 @@ static Expected<LayerType> get_layer_type(const ProtoHEFEdgeConnectionType &edge
     }
 }
 
-static void parse_layer_shape(LayerInfo &layer_info, const ProtoHEFEdgeLayerBase &base_info, const bool hw_padding_supported) {
+static hailo_3d_image_shape_t parse_layer_shape(const ProtoHEFEdgeLayerBase &base_info) {
     if (HEF__FORMAT__NMS != base_info.format()) {
-        layer_info.shape.height = base_info.height();
-        layer_info.shape.width = base_info.width();
-        layer_info.shape.features = base_info.features();
+        return hailo_3d_image_shape_t{base_info.height(), base_info.width(), base_info.features()};
     } else {
-        layer_info.shape.height = static_cast<uint32_t>(base_info.additional_info().nms_info().number_of_classes());
-        layer_info.shape.width = HailoRTCommon::BBOX_PARAMS;
-        layer_info.shape.features = static_cast<uint32_t>(base_info.additional_info().nms_info().max_output_size() *
-            base_info.additional_info().nms_info().input_division_factor());
+        return hailo_3d_image_shape_t{static_cast<uint32_t>(base_info.additional_info().nms_info().number_of_classes()),
+            HailoRTCommon::BBOX_PARAMS, static_cast<uint32_t>(base_info.additional_info().nms_info().max_output_size() *
+            base_info.additional_info().nms_info().input_division_factor())};
     }
-    if (hw_padding_supported) {
-        layer_info.hw_shape.height = base_info.height();
-        layer_info.hw_shape.width = base_info.width();
-        layer_info.hw_shape.features = base_info.features();
-    }
-    else {
-        layer_info.hw_shape.height = base_info.padded_height();
-        layer_info.hw_shape.width = base_info.padded_width();
-        layer_info.hw_shape.features = base_info.padded_features();
+}
+
+static hailo_3d_image_shape_t parse_layer_hw_shape(const ProtoHEFEdgeLayerBase &base_info,
+    const bool is_core_hw_padding_supported)
+{
+    if (is_core_hw_padding_supported) {
+        return hailo_3d_image_shape_t{base_info.height(), base_info.width(), base_info.features()};
+    } else {
+        return hailo_3d_image_shape_t{base_info.padded_height(), base_info.padded_width(), base_info.padded_features()}; 
     }
-    layer_info.hw_data_bytes = base_info.data_bytes();
 }
 
 hailo_status HefUtils::fill_layer_info_with_base_info(const ProtoHEFEdgeLayerBase &base_info, 
     const ProtoHEFEdgeConnectionType &edge_connection_type, const ProtoHEFNetworkGroupMetadata &network_group_proto,
-    bool hw_padding_supported, bool transposed, const uint8_t context_index, const uint8_t network_index,
-    LayerInfo &layer_info, const SupportedFeatures &supported_features, const ProtoHEFHwArch &hef_arch)
+    bool transposed, const uint8_t context_index, const uint8_t network_index, LayerInfo &layer_info,
+    const SupportedFeatures &supported_features, const ProtoHEFHwArch &hef_arch, const bool is_part_of_mux_layer)
 {
     auto format_order_exp = HailoRTDefaults::get_device_format_order(base_info.format());
     CHECK_EXPECTED_AS_STATUS(format_order_exp);
@@ -1810,11 +1803,13 @@ hailo_status HefUtils::fill_layer_info_with_base_info(const ProtoHEFEdgeLayerBas
     CHECK_EXPECTED_AS_STATUS(layer_type);
     layer_info.type = layer_type.value();
 
-    parse_layer_shape(layer_info, base_info, hw_padding_supported);
+    // Parse host shape - parse hw shape after determining if core hw padding is supported
+    layer_info.shape = parse_layer_shape(base_info);
+    layer_info.hw_data_bytes = base_info.data_bytes();
 
     // TODO: remove duplications with stream info parse
     layer_info.format.order = format_oder;
-    layer_info.format.flags = HAILO_FORMAT_FLAGS_QUANTIZED;
+    layer_info.format.flags = HAILO_FORMAT_FLAGS_NONE;
 
     // The check network_group_proto.transposed_net() is for supporting backward compatability for old hefs
     if ((network_group_proto.transposed_net() || transposed) && (layer_info.format.order != HAILO_FORMAT_ORDER_NC))  {
@@ -1831,13 +1826,30 @@ hailo_status HefUtils::fill_layer_info_with_base_info(const ProtoHEFEdgeLayerBas
     CHECK_EXPECTED_AS_STATUS(type);
     layer_info.format.type = type.value();
 
-    auto nn_stream_config = HefConfigurator::parse_nn_stream_config(base_info, hw_padding_supported,
+    auto max_periph_bytes_from_hef = HefConfigurator::max_periph_bytes_value(DeviceBase::hef_arch_to_device_arch(hef_arch));
+    CHECK_EXPECTED_AS_STATUS(max_periph_bytes_from_hef);
+    // TODO HRT-12099 - return invalid hef error when remove support for hefs with no max_shmifo size
+    const auto max_periph_bytes = (0 == base_info.max_shmifo_size()) ? max_periph_bytes_from_hef.value() :
+        std::min(max_periph_bytes_from_hef.value(), base_info.max_shmifo_size());
+
+    // TODO HRT-12051: remove when is_core_hw_padding_supported function is removed
+    // Need to set layer_info.nn_stream_config.core_buffers_per_frame for condition in is_core_hw_padding_supported
+    layer_info.nn_stream_config.core_buffers_per_frame = static_cast<uint16_t>(base_info.core_buffers_per_frame());
+    // TODO HRT-12051: is_part_of_mux_layer is only used for mux layer predecessors to make sure they dont have
+    //  core HW padding enabled - remove when core hw padding is removed
+    const bool core_hw_padding_supported = is_part_of_mux_layer ? false :
+        HefConfigurator::is_core_hw_padding_supported(layer_info, max_periph_bytes,
+        supported_features.core_hw_padding_config_in_dfc);
+    auto nn_stream_config = HefConfigurator::parse_nn_stream_config(base_info, core_hw_padding_supported,
         edge_connection_type);
     CHECK_EXPECTED_AS_STATUS(nn_stream_config, "Failed parse nn stream config");
     layer_info.nn_stream_config = nn_stream_config.release();
     layer_info.network_index = network_index;
     layer_info.context_index = context_index;
 
+    // TODO HRT-12051 - reunite with parse_layer_shape when is_core_hw_padding_supported function is removed
+    layer_info.hw_shape = parse_layer_hw_shape(base_info, core_hw_padding_supported);
+
     CHECK(IS_FIT_IN_UINT8(base_info.sys_index()), HAILO_INVALID_HEF,
         "Failed to parse HEF. Invalid sys_index: {}.", base_info.sys_index());
     layer_info.stream_index = static_cast<uint8_t>(base_info.sys_index());
@@ -1858,15 +1870,11 @@ hailo_status HefUtils::fill_layer_info_with_base_info(const ProtoHEFEdgeLayerBas
 }
 
 hailo_status HefUtils::fill_layer_info(const ProtoHEFEdgeLayerInfo &info, 
-    const ProtoHEFEdgeConnectionType &edge_connection_type,
-    const ProtoHEFCoreOpMock &core_op, hailo_stream_direction_t direction,
-    bool hw_padding_supported, const uint8_t context_index, const std::string &partial_network_name, 
-    uint8_t network_index, LayerInfo &layer_info, const SupportedFeatures &supported_features, const ProtoHEFHwArch &hef_arch)
+    const ProtoHEFEdgeConnectionType &edge_connection_type, const ProtoHEFCoreOpMock &core_op,
+    hailo_stream_direction_t direction, const uint8_t context_index, const std::string &partial_network_name, 
+    uint8_t network_index, LayerInfo &layer_info, const SupportedFeatures &supported_features, const ProtoHEFHwArch &hef_arch,
+    const bool is_part_of_mux_layer)
 {
-    auto status = fill_layer_info_with_base_info(info.edge_layer_base(), edge_connection_type, core_op.network_group_metadata,
-        hw_padding_supported, info.transposed(), context_index, network_index, layer_info, supported_features, hef_arch);
-    CHECK_SUCCESS(status);
-
     if (HAILO_MAX_STREAM_NAME_SIZE < (info.name().length() + 1)) {
         LOGGER__ERROR("The edge layer '{}' has a too long name (max is HAILO_MAX_STREAM_NAME_SIZE)", info.name());
         return HAILO_INTERNAL_FAILURE;
@@ -1885,8 +1893,12 @@ hailo_status HefUtils::fill_layer_info(const ProtoHEFEdgeLayerInfo &info,
     layer_info.quant_info.qp_scale = info.numeric_info().qp_scale();
     layer_info.quant_info.qp_zp = info.numeric_info().qp_zp();
 
+    auto status = fill_layer_info_with_base_info(info.edge_layer_base(), edge_connection_type, core_op.network_group_metadata,
+        info.transposed(), context_index, network_index, layer_info, supported_features, hef_arch, is_part_of_mux_layer);
+    CHECK_SUCCESS(status);
+
     int number_of_qps = (HailoRTCommon::is_nms(layer_info.format.order)) ? NMS_NUMBER_OF_QPS : layer_info.shape.features;
-    if (supported_features.output_scale_by_feature) {
+    if ((supported_features.output_scale_by_feature) && (direction == HAILO_D2H_STREAM)) {
         CHECK((info.numeric_info().qp_zps().size() == number_of_qps) && (info.numeric_info().qp_scales().size() == number_of_qps),
             HAILO_INVALID_HEF, "Invalid quantization infos vector in HEF!");
         // We set those values to 0 to idicate that we work with scale by feature
@@ -1955,7 +1967,7 @@ hailo_status HefUtils::fill_fused_nms_info(const ProtoHEFEdgeLayerFused &info, L
     auto format_order_exp = HailoRTDefaults::get_device_format_order(base_info.format());
     CHECK_EXPECTED_AS_STATUS(format_order_exp);
     layer_info.format.order = format_order_exp.release();
-    layer_info.format.flags = HAILO_FORMAT_FLAGS_QUANTIZED;
+    layer_info.format.flags = HAILO_FORMAT_FLAGS_NONE;
 
     layer_info.shape.height = static_cast<uint32_t>(info.nms_info().number_of_classes());
     layer_info.shape.width = HailoRTCommon::BBOX_PARAMS;
@@ -1992,16 +2004,10 @@ hailo_status HefUtils::fill_fused_nms_info(const ProtoHEFEdgeLayerFused &info, L
 }
 
 hailo_status HefUtils::fill_mux_info(const ProtoHEFEdgeLayerMux &info,
-    const ProtoHEFEdgeConnectionType &edge_connection_type, 
-    const ProtoHEFCoreOpMock &core_op, hailo_stream_direction_t direction,
-    bool hw_padding_supported, const uint8_t context_index, const std::string &partial_network_name, 
+    const ProtoHEFEdgeConnectionType &edge_connection_type, const ProtoHEFCoreOpMock &core_op,
+    hailo_stream_direction_t direction, const uint8_t context_index, const std::string &partial_network_name, 
     uint8_t network_index, LayerInfo &layer_info, const SupportedFeatures &supported_features, const ProtoHEFHwArch &hef_arch)
 {
-    const bool transposed = false;
-    auto status = fill_layer_info_with_base_info(info.edge_layer_base(), edge_connection_type, core_op.network_group_metadata,
-        hw_padding_supported, transposed, context_index, network_index, layer_info, supported_features, hef_arch);
-    CHECK_SUCCESS(status);
-
     if (HAILO_MAX_STREAM_NAME_SIZE < (info.name().length() + 1)) {
         LOGGER__ERROR("The edge layer '{}' has a too long name (max is HAILO_MAX_STREAM_NAME_SIZE)", info.name());
         return HAILO_INTERNAL_FAILURE;
@@ -2017,6 +2023,12 @@ hailo_status HefUtils::fill_mux_info(const ProtoHEFEdgeLayerMux &info,
     layer_info.predecessor.reserve(info.mux_data().number_of_predecessors());
     layer_info.height_gcd = info.mux_data().height_gcd();
     layer_info.height_ratios.reserve(info.mux_data().height_ratios_list_len());
+
+    const bool NOT_TRANSPOSED = false;
+    auto status = fill_layer_info_with_base_info(info.edge_layer_base(), edge_connection_type, core_op.network_group_metadata,
+        NOT_TRANSPOSED, context_index, network_index, layer_info, supported_features, hef_arch, true);
+    CHECK_SUCCESS(status);
+
     for (const auto &height_ratio : info.mux_data().height_ratios_list()) {
         layer_info.height_ratios.emplace_back(height_ratio);
     }
@@ -2029,18 +2041,16 @@ hailo_status HefUtils::fill_mux_info(const ProtoHEFEdgeLayerMux &info,
         LayerInfo temp_layer = {};
         switch (info.predecessors(i).edge_case()) {
             case ProtoHefEdge::kLayerInfo:
-                status = fill_layer_info(info.predecessors(i).layer_info(), edge_connection_type, core_op,
-                    direction, hw_padding_supported, context_index, partial_network_name, network_index, temp_layer,
-                    supported_features, hef_arch);
+                status = fill_layer_info(info.predecessors(i).layer_info(), edge_connection_type, core_op, direction,
+                    context_index, partial_network_name, network_index, temp_layer, supported_features, hef_arch, true);
                 if (HAILO_SUCCESS != status) {
                     return status;
                 }
                 layer_info.predecessor.push_back(temp_layer);
                 break;
             case ProtoHefEdge::kLayerMux:
-                status = fill_mux_info(info.predecessors(i).layer_mux(), edge_connection_type, core_op,
-                    direction, hw_padding_supported, context_index, partial_network_name, network_index, temp_layer,
-                    supported_features, hef_arch);
+                status = fill_mux_info(info.predecessors(i).layer_mux(), edge_connection_type, core_op, direction,
+                    context_index, partial_network_name, network_index, temp_layer, supported_features, hef_arch);
                 if (HAILO_SUCCESS != status) {
                     return status;
                 }
@@ -2072,9 +2082,8 @@ Expected<hailo_format_order_t> convert_planes_format_to_hailo_format_order(const
 }
 
 hailo_status HefUtils::fill_planes_info(const ProtoHEFEdgeLayerPlanes &info,
-    const ProtoHEFEdgeConnectionType &edge_connection_type, 
-    const ProtoHEFCoreOpMock &core_op, hailo_stream_direction_t direction,
-    bool hw_padding_supported, const uint8_t context_index, const std::string &partial_network_name, 
+    const ProtoHEFEdgeConnectionType &edge_connection_type, const ProtoHEFCoreOpMock &core_op,
+    hailo_stream_direction_t direction, const uint8_t context_index, const std::string &partial_network_name, 
     uint8_t network_index, LayerInfo &layer_info, const SupportedFeatures &supported_features, const ProtoHEFHwArch &hef_arch)
 {
     auto layer_type = get_layer_type(edge_connection_type);
@@ -2110,9 +2119,8 @@ hailo_status HefUtils::fill_planes_info(const ProtoHEFEdgeLayerPlanes &info,
     for (uint8_t i = 0; i < info.planes_size(); i++) {
         LayerInfo temp_layer = {};
         if (info.planes(i).edge_case() == ProtoHefEdge::kLayerInfo) {
-            auto status = fill_layer_info(info.planes(i).layer_info(), edge_connection_type, core_op,
-                direction, hw_padding_supported, context_index, partial_network_name, network_index, temp_layer,
-                supported_features, hef_arch);
+            auto status = fill_layer_info(info.planes(i).layer_info(), edge_connection_type, core_op, direction,
+                context_index, partial_network_name, network_index, temp_layer, supported_features, hef_arch, false);
             CHECK_SUCCESS(status);
             temp_layer.plane_index = i;
             layer_info.planes.push_back(temp_layer);
@@ -2157,9 +2165,9 @@ hailo_status HefUtils::fill_inter_context_layers_info(
     const uint8_t context_index,
     const ProtoHEFEdgeLayer &layer,
     const SupportedFeatures &supported_features,
-    ContextMetadata &context_metadata, const ProtoHEFHwArch &hef_arch)
+    ContextMetadata &context_metadata)
 {
-    auto layer_info = get_inter_context_layer_info(core_op, context_index, layer, supported_features, hef_arch);
+    auto layer_info = get_inter_context_layer_info(core_op, context_index, layer, supported_features);
     CHECK_EXPECTED_AS_STATUS(layer_info);
 
     context_metadata.add_inter_context_layer(layer_info.release());
@@ -2171,9 +2179,9 @@ hailo_status HefUtils::fill_ddr_layers_info(
     const uint8_t context_index,
     const ProtoHEFEdgeLayer &layer,
     const SupportedFeatures &supported_features,
-    ContextMetadata &context_metadata, const ProtoHEFHwArch &hef_arch)
+    ContextMetadata &context_metadata)
 {
-    auto layer_info = get_ddr_layer_info(core_op, context_index, layer, supported_features, hef_arch);
+    auto layer_info = get_ddr_layer_info(core_op, context_index, layer, supported_features);
     CHECK_EXPECTED_AS_STATUS(layer_info);
 
     context_metadata.add_ddr_layer(layer_info.release());
@@ -2599,12 +2607,12 @@ Expected<ContextMetadata> HefUtils::parse_single_dynamic_context(const ProtoHEFC
         } else if (ProtoHEFEdgeConnectionType::PROTO__EDGE_CONNECTION_TYPE__INTERMEDIATE ==
                 edge_layer.context_switch_info().edge_connection_type()) {
             auto status = fill_inter_context_layers_info(core_op, context_index, edge_layer,
-                supported_features, context_metadata, hef_arch);
+                supported_features, context_metadata);
             CHECK_SUCCESS_AS_EXPECTED(status);
         } else if (ProtoHEFEdgeConnectionType::PROTO__EDGE_CONNECTION_TYPE__DDR ==
                 edge_layer.context_switch_info().edge_connection_type()) {
             auto status = fill_ddr_layers_info(core_op, context_index, edge_layer,
-                supported_features, context_metadata, hef_arch);
+                supported_features, context_metadata);
             CHECK_SUCCESS_AS_EXPECTED(status);
         }
     }
@@ -2670,6 +2678,7 @@ static Expected<hailo_nms_burst_type_t> get_nms_burst_mode(const ProtoHEFNmsInfo
             return make_unexpected(HAILO_INVALID_HEF);
         }
     case PROTO__HW_ARCH__HAILO15H:
+    case PROTO__HW_ARCH__HAILO15M:
     case PROTO__HW_ARCH__GINGER:
     case PROTO__HW_ARCH__LAVENDER:
     case PROTO__HW_ARCH__PLUTO:
@@ -2704,6 +2713,7 @@ static Expected<hailo_nms_burst_type_t> get_nms_bbox_mode(const ProtoHEFNmsInfo
     case PROTO__HW_ARCH__HAILO8L:
         return HAILO_BURST_TYPE_H8_BBOX;
     case PROTO__HW_ARCH__HAILO15H:
+    case PROTO__HW_ARCH__HAILO15M:
     case PROTO__HW_ARCH__GINGER:
     case PROTO__HW_ARCH__LAVENDER:
     case PROTO__HW_ARCH__PLUTO:
@@ -2778,28 +2788,21 @@ Expected<LayerInfo> HefUtils::get_boundary_layer_info(const ProtoHEFCoreOpMock &
     auto network_index = static_cast<uint8_t>((support_multi_networks) ? layer.network_index() : 0);
     auto partial_network_name = HefUtils::get_partial_network_name_by_index(core_op, network_index, supported_features);
     CHECK_EXPECTED(partial_network_name);
-    auto max_periph_bytes_from_hef = HefConfigurator::max_periph_bytes_value(DeviceBase::hef_arch_to_device_arch(hef_arch));
-    CHECK_EXPECTED(max_periph_bytes_from_hef);
-    const auto max_periph_bytes = (0 == layer.layer_info().edge_layer_base().max_shmifo_size()) ? max_periph_bytes_from_hef.value():
-        MIN(max_periph_bytes_from_hef.value(), layer.layer_info().edge_layer_base().max_shmifo_size());
-    const bool hw_padding_supported = HefConfigurator::is_hw_padding_supported(layer, max_periph_bytes);
+
     if (ProtoHEFEdgeLayerType::PROTO__EDGE_LAYER_TYPE__INFO == layer.edge_layer_type()) {
         // TODO: return LayerInfo
-        auto status = fill_layer_info(layer.layer_info(), layer.context_switch_info().edge_connection_type(),
-            core_op, direction, hw_padding_supported, context_index, partial_network_name.value(), network_index, result,
-            supported_features, hef_arch);
+        auto status = fill_layer_info(layer.layer_info(), layer.context_switch_info().edge_connection_type(), core_op,
+            direction, context_index, partial_network_name.value(), network_index, result, supported_features, hef_arch, false);
         CHECK_SUCCESS_AS_EXPECTED(status);
     } else if (ProtoHEFEdgeLayerType::PROTO__EDGE_LAYER_TYPE__MUX == layer.edge_layer_type()) {
         // TODO: return LayerInfo
-        auto status = fill_mux_info(layer.layer_mux(), layer.context_switch_info().edge_connection_type(), 
-            core_op, direction, hw_padding_supported, context_index, partial_network_name.value(), network_index, result,
-            supported_features, hef_arch);
+        auto status = fill_mux_info(layer.layer_mux(), layer.context_switch_info().edge_connection_type(), core_op,
+            direction, context_index, partial_network_name.value(), network_index, result, supported_features, hef_arch);
         CHECK_SUCCESS_AS_EXPECTED(status);
     } else if (ProtoHEFEdgeLayerType::PROTO__EDGE_LAYER_TYPE__PLANES == layer.edge_layer_type()) {
         // TODO: return LayerInfo
-        auto status = fill_planes_info(layer.layer_planes(), layer.context_switch_info().edge_connection_type(), 
-            core_op, direction, hw_padding_supported, context_index, partial_network_name.value(), network_index, result,
-            supported_features, hef_arch);
+        auto status = fill_planes_info(layer.layer_planes(), layer.context_switch_info().edge_connection_type(), core_op,
+            direction, context_index, partial_network_name.value(), network_index, result, supported_features, hef_arch);
         CHECK_SUCCESS_AS_EXPECTED(status);
     } else {
         LOGGER__ERROR("Invalid layer type");
@@ -2834,8 +2837,7 @@ static Expected<ConnectedContextInfo> parse_connected_context_info(
 }
 
 Expected<LayerInfo> HefUtils::get_inter_context_layer_info(const ProtoHEFCoreOpMock &core_op,
-    const uint8_t context_index, const ProtoHEFEdgeLayer &layer, const SupportedFeatures &supported_features,
-    const ProtoHEFHwArch &hef_arch)
+    const uint8_t context_index, const ProtoHEFEdgeLayer &layer, const SupportedFeatures &supported_features)
 {
     LayerInfo result = {};
     CHECK_AS_EXPECTED(PROTO__EDGE_LAYER_TYPE__INFO == layer.edge_layer_type(), HAILO_INVALID_HEF, "Inter-context layer can't be mux.");
@@ -2847,15 +2849,12 @@ Expected<LayerInfo> HefUtils::get_inter_context_layer_info(const ProtoHEFCoreOpM
     CHECK_EXPECTED(partial_network_name);    
     result.network_name = HefUtils::get_network_name(core_op, partial_network_name.release());
     result.context_index = context_index;
-    auto max_periph_bytes_from_hef = HefConfigurator::max_periph_bytes_value(DeviceBase::hef_arch_to_device_arch(hef_arch));
-    CHECK_EXPECTED(max_periph_bytes_from_hef);
-    const auto max_periph_bytes = (0 == layer.layer_info().edge_layer_base().max_shmifo_size()) ? max_periph_bytes_from_hef.value():
-        MIN(max_periph_bytes_from_hef.value(), layer.layer_info().edge_layer_base().max_shmifo_size());
-    const bool hw_padding_supported = HefConfigurator::is_hw_padding_supported(layer, max_periph_bytes);
     result.name = layer.layer_info().name();
 
+    // Core hw padding is only supported on boundary layers
+    const bool CORE_HW_PADDING_NOT_SUPPORTED = false;
     auto nn_stream_config_exp = HefConfigurator::parse_nn_stream_config(layer.layer_info().edge_layer_base(),
-        hw_padding_supported, layer.context_switch_info().edge_connection_type());
+        CORE_HW_PADDING_NOT_SUPPORTED, layer.context_switch_info().edge_connection_type());
     CHECK_EXPECTED(nn_stream_config_exp);
     result.nn_stream_config = nn_stream_config_exp.release();
     CHECK_AS_EXPECTED(IS_FIT_IN_UINT8(layer.layer_info().edge_layer_base().sys_index()), HAILO_INVALID_HEF,
@@ -2867,7 +2866,18 @@ Expected<LayerInfo> HefUtils::get_inter_context_layer_info(const ProtoHEFCoreOpM
 
     result.max_shmifo_size = layer.layer_info().edge_layer_base().max_shmifo_size();
 
-    parse_layer_shape(result, layer.layer_info().edge_layer_base(), hw_padding_supported);
+    result.shape = parse_layer_shape(layer.layer_info().edge_layer_base());
+    result.hw_shape = parse_layer_hw_shape(layer.layer_info().edge_layer_base(), CORE_HW_PADDING_NOT_SUPPORTED);
+    result.hw_data_bytes = layer.layer_info().edge_layer_base().data_bytes();
+
+    auto format_order_exp = HailoRTDefaults::get_device_format_order(layer.layer_info().edge_layer_base().format());
+    CHECK_EXPECTED(format_order_exp);
+    auto format_oder = format_order_exp.release();
+    result.format.order = format_oder;
+    result.format.flags = HAILO_FORMAT_FLAGS_NONE;
+    auto type = HailoRTCommon::get_format_type(result.hw_data_bytes);
+    CHECK_EXPECTED(type);
+    result.format.type = type.value();
 
     result.direction = (ProtoHEFEdgeLayerDirection::PROTO__EDGE_LAYER_DIRECTION__DEVICE_TO_HOST ==
             layer.direction()) ? HAILO_D2H_STREAM : HAILO_H2D_STREAM;
@@ -2883,8 +2893,7 @@ Expected<LayerInfo> HefUtils::get_inter_context_layer_info(const ProtoHEFCoreOpM
 }
 
 Expected<LayerInfo> HefUtils::get_ddr_layer_info(const ProtoHEFCoreOpMock &core_op,
-    const uint8_t context_index, const ProtoHEFEdgeLayer &layer, const SupportedFeatures &supported_features,
-    const ProtoHEFHwArch &hef_arch)
+    const uint8_t context_index, const ProtoHEFEdgeLayer &layer, const SupportedFeatures &supported_features)
 {
     LayerInfo result = {};
     CHECK_AS_EXPECTED(PROTO__EDGE_LAYER_TYPE__INFO == layer.edge_layer_type(), HAILO_INVALID_HEF, "DDR layer can't be mux.");
@@ -2897,14 +2906,12 @@ Expected<LayerInfo> HefUtils::get_ddr_layer_info(const ProtoHEFCoreOpMock &core_
     CHECK_EXPECTED(partial_network_name);
     result.network_name = HefUtils::get_network_name(core_op, partial_network_name.release());
     result.context_index = context_index;
-    auto max_periph_bytes_from_hef = HefConfigurator::max_periph_bytes_value(DeviceBase::hef_arch_to_device_arch(hef_arch));
-    CHECK_EXPECTED(max_periph_bytes_from_hef);
-    const auto max_periph_bytes = (0 == layer.layer_info().edge_layer_base().max_shmifo_size()) ? max_periph_bytes_from_hef.value():
-        MIN(max_periph_bytes_from_hef.value(), layer.layer_info().edge_layer_base().max_shmifo_size());
-    const bool hw_padding_supported = HefConfigurator::is_hw_padding_supported(layer, max_periph_bytes);
     result.name = layer.layer_info().name();
+
+    // Core hw padding is only supported on boundary layers
+    const bool CORE_HW_PADDING_NOT_SUPPORTED = false;
     auto nn_stream_config_exp = HefConfigurator::parse_nn_stream_config(layer.layer_info().edge_layer_base(),
-        hw_padding_supported, layer.context_switch_info().edge_connection_type());
+        CORE_HW_PADDING_NOT_SUPPORTED, layer.context_switch_info().edge_connection_type());
     CHECK_EXPECTED(nn_stream_config_exp);
     result.nn_stream_config = nn_stream_config_exp.release();
     CHECK_AS_EXPECTED(IS_FIT_IN_UINT8(layer.layer_info().edge_layer_base().sys_index()), HAILO_INVALID_HEF,
@@ -2926,7 +2933,18 @@ Expected<LayerInfo> HefUtils::get_ddr_layer_info(const ProtoHEFCoreOpMock &core_
     result.direction = (ProtoHEFEdgeLayerDirection::PROTO__EDGE_LAYER_DIRECTION__DEVICE_TO_HOST ==
             layer.direction()) ? HAILO_D2H_STREAM : HAILO_H2D_STREAM;
 
-    parse_layer_shape(result, layer.layer_info().edge_layer_base(), hw_padding_supported);
+    result.shape = parse_layer_shape(layer.layer_info().edge_layer_base());
+    result.hw_shape = parse_layer_hw_shape(layer.layer_info().edge_layer_base(), CORE_HW_PADDING_NOT_SUPPORTED);
+    result.hw_data_bytes = layer.layer_info().edge_layer_base().data_bytes();
+
+    auto format_order_exp = HailoRTDefaults::get_device_format_order(layer.layer_info().edge_layer_base().format());
+    CHECK_EXPECTED(format_order_exp);
+    auto format_oder = format_order_exp.release();
+    result.format.order = format_oder;
+    result.format.flags = HAILO_FORMAT_FLAGS_NONE;
+    auto type = HailoRTCommon::get_format_type(result.hw_data_bytes);
+    CHECK_EXPECTED(type);
+    result.format.type = type.value();
 
     CHECK_AS_EXPECTED(IS_FIT_IN_UINT16(layer.layer_info().edge_layer_base().core_buffers_per_frame()), HAILO_INVALID_HEF,
         "Failed to parse HEF. Invalid core_buffers_per_frame: {}.", layer.layer_info().edge_layer_base().core_buffers_per_frame());
@@ -2974,11 +2992,13 @@ std::string HefUtils::get_network_name(const ProtoHEFCoreOpMock &core_op, const
 Expected<std::shared_ptr<ProtoHEFCoreOpMock>> Hef::Impl::get_core_op_per_arch(const ProtoHEFCoreOpMock &core_op,
     ProtoHEFHwArch hef_arch, hailo_device_architecture_t device_arch, uint32_t partial_clusters_layout_bitmap)
 {
-    if (ProtoHEFHwArch::PROTO__HW_ARCH__HAILO8L == hef_arch) {
-        // Hailo8 can work with Hailo8L configurations. in that case we choose one of the configurations
+    if (is_multi_layout(hef_arch)) {
+        // Full chip arch (ex: Hailo8) can work with partitial chip arch (ex: Hailo8L) configurations.
+        // in that case we choose one of the configurations.
         for (auto &partial_core_op : core_op.partial_core_ops) {
             if (partial_clusters_layout_bitmap == partial_core_op->layout.partial_clusters_layout_bitmap()
-                    || (HAILO_ARCH_HAILO8 == device_arch)) {
+                    || (HAILO_ARCH_HAILO8 == device_arch && ProtoHEFHwArch::PROTO__HW_ARCH__HAILO8L == hef_arch)
+                    || (HAILO_ARCH_HAILO15H == device_arch && ProtoHEFHwArch::PROTO__HW_ARCH__HAILO15M == hef_arch)) {
                 return std::make_shared<ProtoHEFCoreOpMock>(*(partial_core_op->core_op));
             }
         }
@@ -3296,7 +3316,7 @@ hailo_status Hef::Impl::validate_core_op_unique_layer_names(const ProtoHEFCoreOp
     return HAILO_SUCCESS;
 }
 
-std::vector<std::string> Hef::get_network_groups_names()
+std::vector<std::string> Hef::get_network_groups_names() const
 {
     return pimpl->get_network_groups_names();
 }
@@ -3349,7 +3369,7 @@ std::vector<std::string> Hef::Impl::get_network_groups_names()
     results.reserve(m_groups.size());
 
     for (const auto &net_group : m_groups) {
-        auto &network_group_name = (ProtoHEFHwArch::PROTO__HW_ARCH__HAILO8L == get_device_arch()) ?
+        auto &network_group_name = is_multi_layout(get_device_arch()) ?
             net_group->partial_network_groups(0).network_group().network_group_metadata().network_group_name()
             : net_group->network_group_metadata().network_group_name();
         results.push_back(network_group_name);
@@ -3357,7 +3377,7 @@ std::vector<std::string> Hef::Impl::get_network_groups_names()
     return results;
 }
 
-Expected<std::vector<hailo_network_group_info_t>> Hef::get_network_groups_infos()
+Expected<std::vector<hailo_network_group_info_t>> Hef::get_network_groups_infos() const
 {
     return pimpl->get_network_groups_infos();
 }
@@ -3443,7 +3463,7 @@ Expected<std::vector<std::string>> Hef::Impl::get_post_processes_infos_descripti
     return infos_strings;
 }
 
-Expected<std::string> Hef::get_description(bool stream_infos, bool vstream_infos)
+Expected<std::string> Hef::get_description(bool stream_infos, bool vstream_infos) const
 {
     auto arch = get_hef_device_arch();
     CHECK_EXPECTED(arch);
@@ -3513,13 +3533,13 @@ Expected<std::vector<hailo_network_group_info_t>> Hef::Impl::get_network_groups_
     for (const auto &group_name_to_core_op : m_core_ops_per_group) {
         const auto &core_op = group_name_to_core_op.second[0];
         hailo_network_group_info_t info = {};
-        auto &network_group_name = (ProtoHEFHwArch::PROTO__HW_ARCH__HAILO8L == get_device_arch()) ?
+        auto &network_group_name = is_multi_layout(get_device_arch()) ?
             core_op.partial_core_ops[0]->core_op->network_group_metadata.network_group_name()
             : core_op.network_group_metadata.network_group_name();
         CHECK_AS_EXPECTED(HAILO_MAX_NETWORK_GROUP_NAME_SIZE >= (network_group_name.length() + 1), HAILO_INTERNAL_FAILURE,
             "The network group '{}' has a too long name (max is HAILO_MAX_NETWORK_GROUP_NAME_SIZE)", network_group_name);
         strncpy(info.name, network_group_name.c_str(), network_group_name.length() + 1);
-        const auto number_contexts = (ProtoHEFHwArch::PROTO__HW_ARCH__HAILO8L == get_device_arch()) ?
+        const auto number_contexts = is_multi_layout(get_device_arch()) ?
             core_op.partial_core_ops[0]->core_op->contexts.size() : core_op.contexts.size();
         info.is_multi_context = (1 < number_contexts);
         results.push_back(info);
@@ -3528,46 +3548,46 @@ Expected<std::vector<hailo_network_group_info_t>> Hef::Impl::get_network_groups_
 }
 
 Expected<std::map<std::string, hailo_vstream_params_t>> Hef::make_input_vstream_params(
-    const std::string &name, bool quantized, hailo_format_type_t format_type, uint32_t timeout_ms,
+    const std::string &name, bool /*unused*/, hailo_format_type_t format_type, uint32_t timeout_ms,
     uint32_t queue_size)
 {
     auto network_pair = pimpl->get_network_group_and_network_name(name);
     CHECK_EXPECTED(network_pair);
     
-    return pimpl->make_input_vstream_params(network_pair.value().first, network_pair.value().second, quantized, format_type, 
+    return pimpl->make_input_vstream_params(network_pair.value().first, network_pair.value().second, format_type, 
         timeout_ms, queue_size);
 }
 
 Expected<std::map<std::string, hailo_vstream_params_t>> Hef::Impl::make_input_vstream_params(
-    const std::string &net_group_name, const std::string &network_name, bool quantized, 
+    const std::string &net_group_name, const std::string &network_name,
     hailo_format_type_t format_type, uint32_t timeout_ms, uint32_t queue_size)
 {
     std::map<std::string, hailo_vstream_params_t> input_vstreams_params;
     auto status = fill_missing_input_vstream_params_with_default(net_group_name,
-        network_name, input_vstreams_params, quantized, format_type, timeout_ms, queue_size);
+        network_name, input_vstreams_params, format_type, timeout_ms, queue_size);
     CHECK_SUCCESS_AS_EXPECTED(status);
 
     return input_vstreams_params;
 }
 
 Expected<std::map<std::string, hailo_vstream_params_t>> Hef::make_output_vstream_params(
-    const std::string &name, bool quantized, hailo_format_type_t format_type, uint32_t timeout_ms,
+    const std::string &name, bool /*unused*/, hailo_format_type_t format_type, uint32_t timeout_ms,
     uint32_t queue_size)
 {
     auto network_pair = pimpl->get_network_group_and_network_name(name);
     CHECK_EXPECTED(network_pair);
 
-    return pimpl->make_output_vstream_params(network_pair.value().first, network_pair.value().second, quantized, format_type, 
+    return pimpl->make_output_vstream_params(network_pair.value().first, network_pair.value().second, format_type, 
         timeout_ms, queue_size);
 }
 
 Expected<std::map<std::string, hailo_vstream_params_t>> Hef::Impl::make_output_vstream_params(
-    const std::string &net_group_name, const std::string &network_name, bool quantized, 
+    const std::string &net_group_name, const std::string &network_name,
     hailo_format_type_t format_type, uint32_t timeout_ms, uint32_t queue_size)
 {
     std::map<std::string, hailo_vstream_params_t> output_vstreams_params;
     auto status = fill_missing_output_vstream_params_with_default(net_group_name,
-        network_name, output_vstreams_params, quantized, format_type, timeout_ms, queue_size);
+        network_name, output_vstreams_params, format_type, timeout_ms, queue_size);
     CHECK_SUCCESS_AS_EXPECTED(status);
 
     return output_vstreams_params;
@@ -3575,36 +3595,34 @@ Expected<std::map<std::string, hailo_vstream_params_t>> Hef::Impl::make_output_v
 
 hailo_status Hef::Impl::fill_missing_input_vstream_params_with_default(const std::string &net_group_name,
     const std::string &network_name, std::map<std::string, hailo_vstream_params_t> &input_vstreams_params,
-    bool quantized, hailo_format_type_t format_type, uint32_t timeout_ms, uint32_t queue_size)
+    hailo_format_type_t format_type, uint32_t timeout_ms, uint32_t queue_size)
 {
     CHECK(contains(m_network_group_metadata, net_group_name), HAILO_NOT_FOUND);
     auto input_vstream_infos = m_network_group_metadata.at(net_group_name).get_input_vstream_infos(network_name);
     CHECK_EXPECTED_AS_STATUS(input_vstream_infos);
 
     return fill_missing_vstream_params_with_default(input_vstreams_params, input_vstream_infos.value(),
-        quantized, format_type, timeout_ms, queue_size);
+        format_type, timeout_ms, queue_size);
 }
 
 hailo_status Hef::Impl::fill_missing_output_vstream_params_with_default(const std::string &net_group_name,
     const std::string &network_name, std::map<std::string, hailo_vstream_params_t> &output_vstream_params,
-    bool quantized, hailo_format_type_t format_type, uint32_t timeout_ms, uint32_t queue_size)
+    hailo_format_type_t format_type, uint32_t timeout_ms, uint32_t queue_size)
 {
     CHECK(contains(m_network_group_metadata, net_group_name), HAILO_NOT_FOUND);
     auto output_vstream_infos = m_network_group_metadata.at(net_group_name).get_output_vstream_infos(network_name);
     CHECK_EXPECTED_AS_STATUS(output_vstream_infos);
 
     return fill_missing_vstream_params_with_default(output_vstream_params, output_vstream_infos.value(),
-        quantized, format_type, timeout_ms, queue_size);
+        format_type, timeout_ms, queue_size);
 }
 
 hailo_status Hef::Impl::fill_missing_vstream_params_with_default(std::map<std::string, hailo_vstream_params_t> &vstream_params,
-    std::vector<hailo_vstream_info_t> &vstream_infos, bool quantized, hailo_format_type_t format_type, uint32_t timeout_ms,
+    std::vector<hailo_vstream_info_t> &vstream_infos, hailo_format_type_t format_type, uint32_t timeout_ms,
     uint32_t queue_size)
 {
-    hailo_format_flags_t flags = static_cast<hailo_format_flags_t>(HAILO_FORMAT_FLAGS_NONE);
-    if (quantized) {
-        flags = static_cast<hailo_format_flags_t>(flags | HAILO_FORMAT_FLAGS_QUANTIZED);
-    }
+    hailo_format_flags_t flags = HAILO_FORMAT_FLAGS_NONE;
+
     for (const auto &vstream_info : vstream_infos) {
         std::string vstream_name(vstream_info.name);
         if (contains(vstream_params, vstream_name)) {
diff --git a/hailort/libhailort/src/hef/hef_internal.hpp b/hailort/libhailort/src/hef/hef_internal.hpp
index deafb2a..b1c4002 100644
--- a/hailort/libhailort/src/hef/hef_internal.hpp
+++ b/hailort/libhailort/src/hef/hef_internal.hpp
@@ -17,6 +17,7 @@
 #else
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wconversion"
+#pragma GCC diagnostic ignored "-Wunused-parameter"
 #endif
 #include "hef.pb.h"
 #if defined(_MSC_VER)
@@ -36,7 +37,7 @@
 #include "hef/layer_info.hpp"
 #include "hef/context_switch_actions.hpp"
 #include "net_flow/ops/op.hpp"
-#include "net_flow/pipeline/pipeline.hpp"
+#include "net_flow/pipeline/pipeline_internal.hpp"
 #include "device_common/control_protocol.hpp"
 
 #include "control_protocol.h"
@@ -154,19 +155,21 @@ static const std::vector<ProtoHEFExtensionType> SUPPORTED_EXTENSIONS = {
     OFFLOAD_ARGMAX,
     KO_RUN_ASAP,
     HAILO_NET_FLOW,
-    HAILO_NET_FLOW_YOLOV5_NMS, // Extention added in platform 4.12 release
-    HAILO_NET_FLOW_SSD_NMS, // Extention added in platform 4.14 release
-    WRITE_DATA_BY_TYPE, // Extention added in platform 4.14 release
-    NMS_OUTPUT_BURST, // Extention added in platform 4.14 release
-    DUAL_DIRECTION_STREAM_INDEX, // Extention added in platform 4.14 release
-    HAILO_NET_FLOW_ARGMAX, // Extention added in platform 4.14 release
-    HAILO_NET_FLOW_SOFTMAX, // Extention added in platform 4.14 release
-    ALIGNED_FORMAT_TYPE, // Extention added in platform 4.14 release
-    HAILO_NET_FLOW_YOLOX_NMS, // Extention added in platform 4.14 release
+    HAILO_NET_FLOW_YOLOV5_NMS, // Extension added in platform 4.12 release
+    HAILO_NET_FLOW_SSD_NMS, // Extension added in platform 4.14 release
+    WRITE_DATA_BY_TYPE, // Extension added in platform 4.14 release
+    NMS_OUTPUT_BURST, // Extension added in platform 4.14 release
+    DUAL_DIRECTION_STREAM_INDEX, // Extension added in platform 4.14 release
+    HAILO_NET_FLOW_ARGMAX, // Extension added in platform 4.14 release
+    HAILO_NET_FLOW_SOFTMAX, // Extension added in platform 4.14 release
+    ALIGNED_FORMAT_TYPE, // Extension added in platform 4.14 release
+    HAILO_NET_FLOW_YOLOX_NMS, // Extension added in platform 4.14 release
     OUTPUT_SCALE_PER_FEATURE, // Extension added in platform 4.14 release
     PERIPH_CALCULATION_IN_HAILORT, // Extension added in platform 4.14 release
     HAILO_NET_FLOW_YOLOV5_SEG_NMS, // Extension added in platform 4.15 release
-    HAILO_NET_FLOW_IOU_NMS // Extension added in platform 4.15 release
+    HAILO_NET_FLOW_IOU_NMS, // Extension added in platform 4.15 release
+    HW_PADDING, // Extension added in platform 4.16 release
+    HAILO_NET_FLOW_YOLOV8_NMS // Extension added in platform 4.16 release
 };
 
 static inline bool is_h2d_boundary_info_layer(const ProtoHEFEdgeLayer& layer)
@@ -298,19 +301,19 @@ public:
         const hailo_mipi_input_stream_params_t &mipi_params);
 
     Expected<std::map<std::string, hailo_vstream_params_t>> make_input_vstream_params(
-        const std::string &net_group_name, const std::string &network_name, bool quantized, 
+        const std::string &net_group_name, const std::string &network_name,
         hailo_format_type_t format_type, uint32_t timeout_ms, uint32_t queue_size);
     hailo_status fill_missing_input_vstream_params_with_default(const std::string &net_group_name,
         const std::string &network_name, std::map<std::string, hailo_vstream_params_t> &input_vstreams_params,
-        bool quantized, hailo_format_type_t format_type, uint32_t timeout_ms, uint32_t queue_size);
+        hailo_format_type_t format_type, uint32_t timeout_ms, uint32_t queue_size);
     Expected<std::map<std::string, hailo_vstream_params_t>> make_output_vstream_params(
-        const std::string &net_group_name, const std::string &network_name, bool quantized, 
+        const std::string &net_group_name, const std::string &network_name,
         hailo_format_type_t format_type, uint32_t timeout_ms, uint32_t queue_size);
     hailo_status fill_missing_output_vstream_params_with_default(const std::string &net_group_name,
         const std::string &network_name, std::map<std::string, hailo_vstream_params_t> &output_vstream_params,
-        bool quantized, hailo_format_type_t format_type, uint32_t timeout_ms, uint32_t queue_size);
+        hailo_format_type_t format_type, uint32_t timeout_ms, uint32_t queue_size);
     static hailo_status fill_missing_vstream_params_with_default(std::map<std::string, hailo_vstream_params_t> &vstream_params,
-        std::vector<hailo_vstream_info_t> &name_to_format_info, bool quantized, hailo_format_type_t format_type, uint32_t timeout_ms,
+        std::vector<hailo_vstream_info_t> &name_to_format_info, hailo_format_type_t format_type, uint32_t timeout_ms,
         uint32_t queue_size);
     // Also adds information to CoreOpMetadata
     // TODO: When supporting multiple core ops in same netflow - Change metadata param to a map of core_ops_metadata.
@@ -431,22 +434,13 @@ public:
 
     static Expected<CONTROL_PROTOCOL__nn_stream_config_t> parse_nn_stream_config(const ProtoHEFEdgeLayerBase &edge_layer,
         bool hw_padding_supported, const ProtoHEFEdgeConnectionType &edge_connection_type);
-    static Expected<CONTROL_PROTOCOL__nn_stream_config_t> parse_nn_stream_config(const LayerInfo &edge_layer,
-        bool hw_padding_supported);
 
     static Expected<uint32_t> max_periph_bytes_value(const hailo_device_architecture_t hw_arch);
-    static Expected<uint32_t> max_periph_bytes_value(const hailo_stream_interface_t interface);
+    static Expected<uint32_t> max_periph_padding_payload_value(const hailo_device_architecture_t hw_arch);
 
-    static bool is_hw_padding_supported(const ProtoHEFEdgeLayer &edge_layer, const uint32_t max_periph_bytes_value);
-    static bool is_hw_padding_supported(const LayerInfo &layer_info, const uint32_t max_periph_bytes_value);
-private:
-    static Expected<CONTROL_PROTOCOL__nn_stream_config_t> parse_nn_stream_config(uint32_t width, uint32_t hw_data_bytes,
-        uint16_t core_buffers_per_frame, uint16_t core_bytes_per_buffer, bool hw_padding_supported, bool is_ddr,
-        uint16_t periph_buffers_per_frame, uint16_t periph_bytes_per_buffer);
 
-    static bool is_hw_padding_supported(bool is_boundary, bool is_mux, hailo_format_order_t format_order,
-        uint16_t core_buffers_per_frame, uint32_t height, uint32_t width, uint32_t features, uint32_t hw_data_bytes,
-        const uint32_t max_periph_bytes_value);
+    static bool is_core_hw_padding_supported(const LayerInfo &layer_info, const uint32_t max_periph_bytes_value,
+        const bool is_core_hw_padding_config_in_dfc);
 };
 
 class HefUtils final
@@ -463,22 +457,22 @@ public:
         const ProtoHEFHwArch &hef_arch);
     static Expected<LayerInfo> get_inter_context_layer_info(
         const ProtoHEFCoreOpMock &core_op, const uint8_t context_index,
-        const ProtoHEFEdgeLayer &layer, const SupportedFeatures &supported_features, const ProtoHEFHwArch &hef_arch);
+        const ProtoHEFEdgeLayer &layer, const SupportedFeatures &supported_features);
     static hailo_status fill_inter_context_layers_info(
         const ProtoHEFCoreOpMock &core_op,
         const uint8_t context_index,
         const ProtoHEFEdgeLayer &layer,
         const SupportedFeatures &supported_features,
-        ContextMetadata &context_metadata, const ProtoHEFHwArch &hef_arch);
+        ContextMetadata &context_metadata);
     static Expected<LayerInfo> get_ddr_layer_info(
         const ProtoHEFCoreOpMock &core_op, const uint8_t context_index,
-        const ProtoHEFEdgeLayer &layer, const SupportedFeatures &supported_features, const ProtoHEFHwArch &hef_arch);
+        const ProtoHEFEdgeLayer &layer, const SupportedFeatures &supported_features);
     static hailo_status fill_ddr_layers_info(
         const ProtoHEFCoreOpMock &core_op,
         const uint8_t context_index,
         const ProtoHEFEdgeLayer &layer,
         const SupportedFeatures &supported_features,
-        ContextMetadata &context_metadata, const ProtoHEFHwArch &hef_arch);
+        ContextMetadata &context_metadata);
     static hailo_status check_ddr_pairs_match(
         const std::vector<LayerInfo> &context_ddr_input_layers,
         const std::vector<LayerInfo> &context_ddr_output_layers,
@@ -503,30 +497,28 @@ public:
     static std::string get_network_name(const std::string &net_group_name, const std::string &partial_network_name);
 
 private:
+    // TODO HRT-12051: Remove is_part_of_mux_layer parameter when core_hw_padding is removed
     static hailo_status fill_layer_info_with_base_info(const ProtoHEFEdgeLayerBase &base_info,
-        const ProtoHEFEdgeConnectionType &edge_connection_type,
-        const ProtoHEFNetworkGroupMetadata &network_group_proto, bool hw_padding_supported, bool transposed, 
-        const uint8_t context_index, const uint8_t network_index, LayerInfo &layer_info,
-        const SupportedFeatures &supported_features, const ProtoHEFHwArch &hef_arch);
+        const ProtoHEFEdgeConnectionType &edge_connection_type, const ProtoHEFNetworkGroupMetadata &network_group_proto,
+        bool transposed, const uint8_t context_index, const uint8_t network_index, LayerInfo &layer_info,
+        const SupportedFeatures &supported_features, const ProtoHEFHwArch &hef_arch, const bool is_part_of_mux_layer);
+    // TODO HRT-12051: Remove is_part_of_mux_layer parameter when core_hw_padding is removed
     static hailo_status fill_layer_info(const ProtoHEFEdgeLayerInfo &info,
-        const ProtoHEFEdgeConnectionType &edge_connection_type,
-        const ProtoHEFCoreOpMock &core_op, hailo_stream_direction_t direction,
-        bool hw_padding_supported, const uint8_t context_index, const std::string &partial_network_name, 
+        const ProtoHEFEdgeConnectionType &edge_connection_type, const ProtoHEFCoreOpMock &core_op,
+        hailo_stream_direction_t direction, const uint8_t context_index, const std::string &partial_network_name, 
         uint8_t network_index, LayerInfo &layer_info, const SupportedFeatures &supported_features,
-        const ProtoHEFHwArch &hef_arch);
+        const ProtoHEFHwArch &hef_arch, const bool is_part_of_mux_layer);
     static hailo_status fill_fused_nms_info(const ProtoHEFEdgeLayerFused &info,
             LayerInfo &layer_info, hailo_quant_info_t &defuse_quant_info, const std::string &network_name,
             const bool burst_mode_enabled, const ProtoHEFHwArch &hef_arch);
     static hailo_status fill_mux_info(const ProtoHEFEdgeLayerMux &info,
-        const ProtoHEFEdgeConnectionType &edge_connection_type,
-        const ProtoHEFCoreOpMock &core_op, hailo_stream_direction_t direction,
-        bool hw_padding_supported, const uint8_t context_index, const std::string &partial_network_name, 
+        const ProtoHEFEdgeConnectionType &edge_connection_type, const ProtoHEFCoreOpMock &core_op,
+        hailo_stream_direction_t direction, const uint8_t context_index, const std::string &partial_network_name, 
         uint8_t network_index, LayerInfo &layer_info, const SupportedFeatures &supported_features,
         const ProtoHEFHwArch &hef_arch);
     static hailo_status fill_planes_info(const ProtoHEFEdgeLayerPlanes &info,
-        const ProtoHEFEdgeConnectionType &edge_connection_type,
-        const ProtoHEFCoreOpMock &core_op, hailo_stream_direction_t direction,
-        bool hw_padding_supported, const uint8_t context_index, const std::string &partial_network_name, 
+        const ProtoHEFEdgeConnectionType &edge_connection_type, const ProtoHEFCoreOpMock &core_op,
+        hailo_stream_direction_t direction, const uint8_t context_index, const std::string &partial_network_name, 
         uint8_t network_index, LayerInfo &layer_info, const SupportedFeatures &supported_features,
         const ProtoHEFHwArch &hef_arch);
 };
diff --git a/hailort/libhailort/src/hef/layer_info.hpp b/hailort/libhailort/src/hef/layer_info.hpp
index 90c3214..385e86e 100644
--- a/hailort/libhailort/src/hef/layer_info.hpp
+++ b/hailort/libhailort/src/hef/layer_info.hpp
@@ -134,8 +134,7 @@ public:
                 stream_info.hw_shape.height = layer.hw_shape.height;
                 stream_info.hw_shape.width = layer.hw_shape.width;
                 stream_info.hw_shape.features = layer.hw_shape.features;
-                stream_info.hw_frame_size =
-                    stream_info.hw_shape.height * stream_info.hw_shape.width * stream_info.hw_shape.features * stream_info.hw_data_bytes;
+                stream_info.hw_frame_size = HailoRTCommon::get_periph_frame_size(stream_info.hw_shape, stream_info.format);
             }
             stream_info.direction = layer.direction;
             stream_info.index = layer.stream_index;
@@ -273,7 +272,7 @@ public:
         if (HAILO_FORMAT_ORDER_HAILO_NMS == layer_info.format.order) {
             return get_nms_layer_transfer_size(layer_info);
         }
-        return (layer_info.hw_shape.width * layer_info.hw_shape.features * layer_info.hw_shape.height * layer_info.hw_data_bytes);
+        return HailoRTCommon::get_periph_frame_size(layer_info.hw_shape, layer_info.format);
     }
 
 private:
diff --git a/hailort/libhailort/src/hw_consts.hpp b/hailort/libhailort/src/hw_consts.hpp
index 114d9cf..c576eed 100644
--- a/hailort/libhailort/src/hw_consts.hpp
+++ b/hailort/libhailort/src/hw_consts.hpp
@@ -13,19 +13,14 @@
 /** stable constants **************************************************/
 
 /** Package constants *********************************************************/
+// TODO HRT-11452 - use hw consts here instead of these defines
 #define HAILO8_INBOUND_DATA_STREAM_SIZE                                     (0x00010000L)
-// Max periph bytes per buffer for hailo15 because (we use its value shifted right by 3 - according to the spec) to
+#define HAILO8_PERIPH_PAYLOAD_MAX_VALUE                                     (0x0000FFFFL)
+// Max periph bytes per buffer for hailo1x because (we use its value shifted right by 3 - according to the spec) to
 // configure shmifo credit size - which in hailo15 only has a width of 10 bits
-#define HAILO15_PERIPH_BYTES_PER_BUFFER_MAX_SIZE                            (0x00002000L)
+#define HAILO1X_PERIPH_BYTES_PER_BUFFER_MAX_SIZE                            (0x00002000L)
+#define HAILO1X_PERIPH_PAYLOAD_MAX_VALUE                                    (0x007FFFFFL)
 
-/** PCIe constants and macors ************************************************/
-#define PCIE_CONFIG_BASE_ADDRESS                                                        (0x00200000L)                                                                   // <hw_base_addresses_macros.h>::HW_BASE_ADDRESSES__PCIE_CONFIG(0, 0, 0)
-#define PCIE_BRIDGE_CONFIG__ATR_PARAM_ATR0_PCIE_WIN1__ATR_IMPL__SET(dst) (dst) =        ((dst) & ~0x00000001L) | ((uint32_t)(1) << 0)                                   // <pcie_bridge_config_macros.h>::PCIE_BRIDGE_CONFIG__ATR_PARAM_ATR0_PCIE_WIN1__ATR_IMPL__SET
-#define PCIE_BRIDGE_CONFIG__ATR_PARAM_ATR0_PCIE_WIN1__ATR_SIZE__MODIFY(dst, src)        (dst) = ((dst) & ~0x0000007EL) | (((uint32_t)(src) << 1) & 0x0000007EL)         // <pcie_bridge_config_macros.h>::PCIE_BRIDGE_CONFIG__ATR_PARAM_ATR0_PCIE_WIN1__ATR_SIZE__MODIFY
-#define PCIE_BRIDGE_CONFIG__ATR_PARAM_ATR0_PCIE_WIN1__SOURCE_ADDR__MODIFY(dst, src)     (dst) = ((dst) & ~0xFFFFF000L) | (((uint32_t)(src) << 12) & 0xFFFFF000L)        // <pcie_bridge_config_macros.h>::PCIE_BRIDGE_CONFIG__ATR_PARAM_ATR0_PCIE_WIN1__SOURCE_ADDR__MODIFY
-#define PCIE_BRIDGE_CONFIG__ATR_PARAM_ATR1_PCIE_WIN1__ATR_IMPL__SET(dst) (dst) =        ((dst) & ~0x00000001L) | ((uint32_t)(1) << 0)                                   // <pcie_bridge_config_macros.h>::PCIE_BRIDGE_CONFIG__ATR_PARAM_ATR1_PCIE_WIN1__ATR_IMPL__SET
-#define PCIE_BRIDGE_CONFIG__ATR_PARAM_ATR1_PCIE_WIN1__ATR_SIZE__MODIFY(dst, src)        (dst) = ((dst) & ~0x0000007EL) | (((uint32_t)(src) << 1) & 0x0000007EL)         // <pcie_bridge_config_macros.h>::PCIE_BRIDGE_CONFIG__ATR_PARAM_ATR1_PCIE_WIN1__ATR_SIZE__MODIFY
-#define PCIE_BRIDGE_CONFIG__ATR_PARAM_ATR1_PCIE_WIN1__SOURCE_ADDR__MODIFY(dst, src)     (dst) = ((dst) & ~0xFFFFF000L) | (((uint32_t)(src) << 12) & 0xFFFFF000L)        // <pcie_bridge_config_macros.h>::PCIE_BRIDGE_CONFIG__ATR_PARAM_ATR1_PCIE_WIN1__SOURCE_ADDR__MODIFY
 
 /** Vdma Channel registers ***************************************************/
 #define VDMA_CHANNEL_CONTROL_OFFSET         (0x00)
diff --git a/hailort/libhailort/src/mipi/mipi_stream.cpp b/hailort/libhailort/src/mipi/mipi_stream.cpp
index 6332f48..82bd8a1 100644
--- a/hailort/libhailort/src/mipi/mipi_stream.cpp
+++ b/hailort/libhailort/src/mipi/mipi_stream.cpp
@@ -25,7 +25,7 @@ namespace hailort
 
 MipiInputStream::MipiInputStream(Device &device, const CONTROL_PROTOCOL__mipi_input_config_params_t &mipi_params,
     EventPtr &&core_op_activated_event, const LayerInfo &layer_info, hailo_status &status) :
-    InputStreamBase(layer_info, HAILO_STREAM_INTERFACE_MIPI, std::move(core_op_activated_event), status),
+    InputStreamBase(layer_info, std::move(core_op_activated_event), status),
     m_device(device),
     m_is_stream_activated(false),
     m_mipi_input_params(mipi_params)
@@ -61,12 +61,12 @@ std::chrono::milliseconds MipiInputStream::get_timeout() const
     return std::chrono::milliseconds(0);
 }
 
-hailo_status MipiInputStream::abort()
+hailo_status MipiInputStream::abort_impl()
 {
     return HAILO_INVALID_OPERATION;
 }
 
-hailo_status MipiInputStream::clear_abort()
+hailo_status MipiInputStream::clear_abort_impl()
 {
     return HAILO_INVALID_OPERATION;
 }
@@ -108,7 +108,10 @@ hailo_status MipiInputStream::activate_stream()
     hailo_status status = HAILO_UNINITIALIZED;
     CONTROL_PROTOCOL__config_stream_params_t params = {};
 
-    params.nn_stream_config = m_nn_stream_config;
+    // Core HW padding is not supported on MIPI
+    m_layer_info.nn_stream_config.feature_padding_payload = 0;
+    params.nn_stream_config = m_layer_info.nn_stream_config;
+
     params.communication_type = CONTROL_PROTOCOL__COMMUNICATION_TYPE_MIPI;
     params.is_input = true;
     params.stream_index = m_stream_info.index;
diff --git a/hailort/libhailort/src/mipi/mipi_stream.hpp b/hailort/libhailort/src/mipi/mipi_stream.hpp
index d9223d6..5a8f948 100644
--- a/hailort/libhailort/src/mipi/mipi_stream.hpp
+++ b/hailort/libhailort/src/mipi/mipi_stream.hpp
@@ -55,8 +55,8 @@ public:
     virtual hailo_status deactivate_stream() override;
     virtual hailo_stream_interface_t get_interface() const override { return HAILO_STREAM_INTERFACE_MIPI; }
     virtual std::chrono::milliseconds get_timeout() const override;
-    virtual hailo_status abort() override;
-    virtual hailo_status clear_abort() override;
+    virtual hailo_status abort_impl() override;
+    virtual hailo_status clear_abort_impl() override;
 };
 
 } /* namespace hailort */
diff --git a/hailort/libhailort/src/net_flow/CMakeLists.txt b/hailort/libhailort/src/net_flow/CMakeLists.txt
index 55aa0c0..c49a12b 100644
--- a/hailort/libhailort/src/net_flow/CMakeLists.txt
+++ b/hailort/libhailort/src/net_flow/CMakeLists.txt
@@ -8,9 +8,12 @@ set(SRC_FILES
     ${CMAKE_CURRENT_SOURCE_DIR}/ops/argmax_post_process.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/ops/softmax_post_process.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/ops/yolov5_seg_post_process.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/ops/yolov8_post_process.cpp
 
     ${CMAKE_CURRENT_SOURCE_DIR}/pipeline/pipeline.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/pipeline/pipeline_builder.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/pipeline/inference_pipeline.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/pipeline/pipeline_internal.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/pipeline/vstream.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/pipeline/async_infer_runner.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/pipeline/infer_model.cpp
diff --git a/hailort/libhailort/src/net_flow/ops/nms_post_process.cpp b/hailort/libhailort/src/net_flow/ops/nms_post_process.cpp
index efe8608..38bd271 100644
--- a/hailort/libhailort/src/net_flow/ops/nms_post_process.cpp
+++ b/hailort/libhailort/src/net_flow/ops/nms_post_process.cpp
@@ -49,8 +49,6 @@ hailo_status NmsOpMetadata::validate_format_info()
             output_metadata.first);
         CHECK(!(HAILO_FORMAT_FLAGS_HOST_ARGMAX & output_metadata.second.format.flags), HAILO_INVALID_ARGUMENT, "Output {} is marked as argmax, which is not supported for this model.",
             output_metadata.first);
-        CHECK(!(HAILO_FORMAT_FLAGS_QUANTIZED & output_metadata.second.format.flags), HAILO_INVALID_ARGUMENT, "Output {} is marked as quantized, which is not supported for this model.",
-            output_metadata.first);
     }
     if (m_type == OperationType::IOU) {
         assert(1 == m_inputs_metadata.size());
@@ -69,9 +67,6 @@ hailo_status NmsOpMetadata::validate_format_info()
                 HailoRTCommon::get_format_type_str(input_metadata.second.format.type));
 
             CHECK(input_metadata.second.format.type == first_input_type, HAILO_INVALID_ARGUMENT,"All inputs format type should be the same");
-
-            CHECK(HAILO_FORMAT_FLAGS_QUANTIZED == input_metadata.second.format.flags, HAILO_INVALID_ARGUMENT, "The given input format flag is not supported,"
-                "should be HAILO_FORMAT_FLAGS_QUANTIZED");
         }
     }
 
@@ -180,7 +175,7 @@ void NmsPostProcessOp::fill_nms_format_buffer(MemoryView &buffer, const std::vec
                                 + (num_of_detections_before[detection.m_class_id] * sizeof(hailo_bbox_float32_t));
 
         assert((buffer_offset + sizeof(hailo_bbox_float32_t)) <= buffer.size());
-        memcpy((hailo_bbox_float32_t*)(buffer.data() + buffer_offset), &detection.m_bbox, sizeof(hailo_bbox_float32_t));
+        *(hailo_bbox_float32_t*)(buffer.data() + buffer_offset) = *(hailo_bbox_float32_t*)&(detection.m_bbox);
         num_of_detections_before[detection.m_class_id]++;
         classes_detections_count[detection.m_class_id]--;
     }
@@ -191,11 +186,10 @@ void NmsPostProcessOp::fill_nms_format_buffer(MemoryView &buffer, const std::vec
     }
 }
 
-hailo_status NmsPostProcessOp::hailo_nms_format(std::vector<DetectionBbox> &&detections,
-    MemoryView dst_view, std::vector<uint32_t> &classes_detections_count)
+hailo_status NmsPostProcessOp::hailo_nms_format(MemoryView dst_view)
 {
-    remove_overlapping_boxes(detections, classes_detections_count, m_nms_metadata->nms_config().nms_iou_th);
-    fill_nms_format_buffer(dst_view, detections, classes_detections_count, m_nms_metadata->nms_config());
+    remove_overlapping_boxes(m_detections, m_classes_detections_count, m_nms_metadata->nms_config().nms_iou_th);
+    fill_nms_format_buffer(dst_view, m_detections, m_classes_detections_count, m_nms_metadata->nms_config());
     return HAILO_SUCCESS;
 }
 
diff --git a/hailort/libhailort/src/net_flow/ops/nms_post_process.hpp b/hailort/libhailort/src/net_flow/ops/nms_post_process.hpp
index f746981..ae2623b 100644
--- a/hailort/libhailort/src/net_flow/ops/nms_post_process.hpp
+++ b/hailort/libhailort/src/net_flow/ops/nms_post_process.hpp
@@ -32,6 +32,7 @@ namespace net_flow
 #define INVALID_BBOX_DIM (std::numeric_limits<float32_t>::max())
 #define INVALID_NMS_DETECTION (std::numeric_limits<uint32_t>::max())
 #define INVALID_NMS_SCORE (std::numeric_limits<float32_t>::max())
+#define INVALID_NMS_CONFIG (-1)
 
 inline bool operator==(const hailo_bbox_float32_t &first, const hailo_bbox_float32_t &second) {
     return first.y_min == second.y_min && first.x_min == second.x_min && first.y_max == second.y_max && first.x_max == second.x_max && first.score == second.score;
@@ -44,13 +45,17 @@ inline bool operator==(const hailo_bbox_t &first, const hailo_bbox_t &second) {
 struct DetectionBbox
 {
     DetectionBbox(float32_t x_min, float32_t y_min, float32_t width, float32_t height, float32_t score, uint32_t class_id)
-        : m_class_id(class_id), m_bbox{y_min, x_min, (y_min + height), (x_min + width), score} {}
+        : m_class_id(class_id), m_bbox{y_min, x_min, (y_min + height), (x_min + width), score}, m_bbox_with_mask{} {}
 
     DetectionBbox(const hailo_bbox_float32_t &bbox, uint32_t class_id)
-        : m_class_id(class_id), m_bbox(bbox) {}
+        : m_class_id(class_id), m_bbox(bbox), m_bbox_with_mask{} {}
 
-    DetectionBbox(const hailo_bbox_float32_t &bbox, uint32_t class_id, std::vector<float32_t> &&mask)
-        : m_class_id(class_id), m_bbox(bbox), m_mask(std::move(mask)) {}
+    DetectionBbox(const hailo_bbox_float32_t &bbox, uint16_t class_id, std::vector<float32_t> &&mask,
+        float32_t image_height, float32_t image_width)
+        : m_class_id(class_id), m_coefficients(std::move(mask)), m_bbox(bbox),
+            m_bbox_with_mask{{bbox.y_min, bbox.x_min, bbox.y_max, bbox.x_max}, bbox.score, class_id,
+                get_mask_size_in_bytes(image_height, image_width), nullptr}
+        {}
 
     DetectionBbox() : DetectionBbox(hailo_bbox_float32_t{
         INVALID_BBOX_DIM,
@@ -60,19 +65,30 @@ struct DetectionBbox
         INVALID_BBOX_DIM
     }, INVALID_NMS_DETECTION) {}
 
-    inline uint32_t get_bbox_rounded_height(float32_t image_height) const
+    inline uint32_t get_bbox_height(float32_t image_height) const
     {
-        return static_cast<uint32_t>(std::round((m_bbox.y_max - m_bbox.y_min) * image_height));
+        return static_cast<uint32_t>(std::ceil((m_bbox.y_max - m_bbox.y_min) * image_height));
     }
 
-    inline uint32_t get_bbox_rounded_width(float32_t image_width) const
+    inline uint32_t get_bbox_width(float32_t image_width) const
     {
-        return static_cast<uint32_t>(std::round((m_bbox.x_max - m_bbox.x_min) * image_width));
+        return static_cast<uint32_t>(std::ceil((m_bbox.x_max - m_bbox.x_min) * image_width));
+    }
+
+    inline size_t get_mask_size_in_bytes(float32_t image_height, float32_t image_width) const
+    {
+        auto box_height = get_bbox_height(image_height);
+        auto box_width = get_bbox_width(image_width);
+        auto mask_size = box_width * box_height;
+
+        return mask_size;
     }
 
     uint32_t m_class_id;
+    std::vector<float32_t> m_coefficients; // Used in segmentation networks
+    // TODO: HRT-12093 - Unite usage and remove `hailo_bbox_float32_t`.
     hailo_bbox_float32_t m_bbox;
-    std::vector<float32_t> m_mask; // Used in segmentation networks, otherwise there is no mask.
+    hailo_detection_with_byte_mask_t m_bbox_with_mask;
 };
 
 inline bool operator==(const DetectionBbox &first, const DetectionBbox &second) {
@@ -246,8 +262,19 @@ public:
 protected:
     NmsPostProcessOp(std::shared_ptr<NmsOpMetadata> metadata)
         : Op(static_cast<PostProcessOpMetadataPtr>(metadata))
+        , m_classes_detections_count(metadata->nms_config().number_of_classes, 0)
         , m_nms_metadata(metadata)
-    {}
+    {
+        m_detections.reserve(metadata->nms_config().max_proposals_per_class * metadata->nms_config().number_of_classes);
+    }
+
+    void clear_before_frame()  
+    {
+        m_detections.clear();
+        m_detections.reserve(m_nms_metadata->nms_config().max_proposals_per_class * m_nms_metadata->nms_config().number_of_classes);
+
+        m_classes_detections_count.assign(m_nms_metadata->nms_config().number_of_classes, 0);
+    }
 
     template<typename DstType = float32_t, typename SrcType>
     std::pair<uint32_t, float32_t> get_max_class(const SrcType *data, uint32_t entry_idx, uint32_t classes_start_index,
@@ -281,9 +308,10 @@ protected:
         return max_id_score_pair;
     }
 
-    hailo_status hailo_nms_format(std::vector<DetectionBbox> &&detections,
-        MemoryView dst_view, std::vector<uint32_t> &classes_detections_count);
+    hailo_status hailo_nms_format(MemoryView dst_view);
 
+    std::vector<DetectionBbox> m_detections;
+    std::vector<uint32_t> m_classes_detections_count;
 private:
     std::shared_ptr<NmsOpMetadata> m_nms_metadata;
 
diff --git a/hailort/libhailort/src/net_flow/ops/op.hpp b/hailort/libhailort/src/net_flow/ops/op.hpp
index 894c17b..2d94e7a 100644
--- a/hailort/libhailort/src/net_flow/ops/op.hpp
+++ b/hailort/libhailort/src/net_flow/ops/op.hpp
@@ -15,6 +15,7 @@
 
 #include "hailo/hailort.h"
 #include "hailo/buffer.hpp"
+#include "hailo/network_group.hpp"
 #include "net_flow/ops/op_metadata.hpp"
 
 #include "common/utils.hpp"
diff --git a/hailort/libhailort/src/net_flow/ops/op_metadata.hpp b/hailort/libhailort/src/net_flow/ops/op_metadata.hpp
index 803d883..d07a444 100644
--- a/hailort/libhailort/src/net_flow/ops/op_metadata.hpp
+++ b/hailort/libhailort/src/net_flow/ops/op_metadata.hpp
@@ -30,6 +30,7 @@ struct BufferMetaData
 enum class OperationType {
     YOLOX,
     YOLOV5,
+    YOLOV8,
     YOLOV5SEG,
     SSD,
     SOFTMAX,
@@ -78,6 +79,8 @@ public:
             return "YOLOV5";
         case OperationType::YOLOV5SEG:
             return "YOLOV5SEG";
+        case OperationType::YOLOV8:
+            return "YOLOV8";
         case OperationType::SSD:
             return "SSD";
         case OperationType::SOFTMAX:
@@ -109,8 +112,6 @@ protected:
     virtual hailo_status validate_params() = 0;
 };
 
-using PostProcessOpMetadataPtr = std::shared_ptr<OpMetadata>;
-
 }
 }
 
diff --git a/hailort/libhailort/src/net_flow/ops/softmax_post_process.cpp b/hailort/libhailort/src/net_flow/ops/softmax_post_process.cpp
index eb6132f..9e40fef 100644
--- a/hailort/libhailort/src/net_flow/ops/softmax_post_process.cpp
+++ b/hailort/libhailort/src/net_flow/ops/softmax_post_process.cpp
@@ -34,6 +34,26 @@ hailo_status SoftmaxPostProcessOp::execute_not_supported(const BufferMetaData &i
         return HAILO_INVALID_ARGUMENT;
     }
 
+hailo_status SoftmaxPostProcessOp::softmax(float32_t *src, float32_t *dst, size_t num_of_elements)
+{
+    // In order to avoid overflows, we will perform the following:
+    // We find the maximal value and then we substract it from all of the values.
+    // This will preserve the original softmax values + prevent overflows
+    float32_t max_val = *std::max_element(src, src + num_of_elements);
+    float32_t sum_exp = 0; // denominator
+    for (uint32_t c = 0; c < num_of_elements; c++) {
+        auto &current_value = *(src + c);
+        current_value -= max_val; // This step preserves the original softmax values + prevent overflows
+        current_value = std::exp(static_cast<float32_t>(current_value)); // Set src[c] to e^(src[c]) so that we only calculate it once
+        sum_exp += current_value;
+    }
+    for (uint32_t c = 0; c < num_of_elements; c++) {
+        const auto &current_value = *(src + c);
+        dst[c] = static_cast<float32_t>(current_value / sum_exp);
+    }
+    return HAILO_SUCCESS;
+}
+
 SoftmaxFunction SoftmaxPostProcessOp::m_softmax_function_array[SOFTMAX_NUM_OF_POSSIBLE_FORMAT_ORDERS][SOFTMAX_NUM_OF_POSSIBLE_FORMAT_TYPES][SOFTMAX_NUM_OF_POSSIBLE_FORMAT_TYPES]
 {
     // Currently supported on:
@@ -176,10 +196,6 @@ hailo_status SoftmaxOpMetadata::validate_format_info()
     auto &input_metadata = m_inputs_metadata.begin()->second;
     auto &output_metadata = m_outputs_metadata.begin()->second;
 
-    CHECK(
-        ((input_metadata.format.flags & HAILO_FORMAT_FLAGS_QUANTIZED) == 0) && ((output_metadata.format.flags & HAILO_FORMAT_FLAGS_QUANTIZED) == 0),
-        HAILO_INVALID_OPERATION, "Softmax op is supported only on dequantized data");
-
     CHECK(
         ((input_metadata.format.order == HAILO_FORMAT_ORDER_NHWC) &&  (output_metadata.format.order == HAILO_FORMAT_ORDER_NHWC)) ||
         ((input_metadata.format.order == HAILO_FORMAT_ORDER_NC) && (output_metadata.format.order == HAILO_FORMAT_ORDER_NC)),
@@ -197,8 +213,6 @@ hailo_status SoftmaxOpMetadata::validate_format_info()
         HailoRTCommon::get_format_type_str(HAILO_FORMAT_TYPE_FLOAT32));
     CHECK(!(HAILO_FORMAT_FLAGS_HOST_ARGMAX & output_metadata.format.flags), HAILO_INVALID_ARGUMENT, "Output {} is marked as argmax, which is not supported for this model.",
         m_outputs_metadata.begin()->first);
-    CHECK(!(HAILO_FORMAT_FLAGS_QUANTIZED & output_metadata.format.flags), HAILO_INVALID_ARGUMENT, "Output {} is marked as quantized, which is not supported for this model.",
-        m_outputs_metadata.begin()->first);
 
     return HAILO_SUCCESS;
 }
diff --git a/hailort/libhailort/src/net_flow/ops/softmax_post_process.hpp b/hailort/libhailort/src/net_flow/ops/softmax_post_process.hpp
index 48b26ff..8e4e341 100644
--- a/hailort/libhailort/src/net_flow/ops/softmax_post_process.hpp
+++ b/hailort/libhailort/src/net_flow/ops/softmax_post_process.hpp
@@ -83,26 +83,7 @@ private:
             for (uint32_t w = 0; w < input_metadata.shape.width; w++) { // W axis - coloums
                 dst_type *src_col = src_row + (w * src_width_size);
                 src_type *dst_col = dst_row + (w * dst_width_size);
-                // In order to avoid overflows, we will perform the following:
-                // For each HW, we will find the maximal c value and then we will substract this value from
-                // all of the values in this HW. This will preserve the original softmax values + prevent overflows
-                src_type max_val = std::numeric_limits<float>::min();
-                for (uint32_t c = 0; c < input_metadata.shape.features; c++) {
-                    auto &current_value = *(src_col + c);
-                    if (current_value > max_val)
-                        max_val = current_value;
-                }
-                dst_type sum_exp = 0; // denominator
-                for (uint32_t c = 0; c < input_metadata.shape.features; c++) { // C axis - features
-                    auto &current_value = *(src_col + c);
-                    current_value -= max_val; // This step preserves the original softmax values + prevent overflows
-                    current_value = std::exp(static_cast<float32_t>(current_value)); // Set src_ptr[c] to e^(src_ptr[c]) so that we only calculate it once
-                    sum_exp += current_value;
-                }
-                for (uint32_t c = 0; c < input_metadata.shape.features; c++) {
-                    const auto &current_value = *(src_col + c);
-                    dst_col[c] = static_cast<dst_type>(current_value / sum_exp);
-                }
+                softmax(src_col, dst_col, input_metadata.shape.features);
             }
         }
         return HAILO_SUCCESS;
@@ -115,25 +96,7 @@ private:
         (void) output_metadata;
         auto src_ptr = (src_type*)inputs.begin()->second.data();
         auto dst_ptr = (dst_type*)outputs.begin()->second.data();
-        // In order to avoid overflows, we will perform the following:
-        // For each HW, we will find the maximal c value and then we will substract this value from
-        // all of the values in this HW. This will preserve the original softmax values + prevent overflows
-        src_type max_val = std::numeric_limits<float>::min();
-        for (uint32_t c = 0; c < input_metadata.shape.features; c++) {
-            auto &current_value = *(src_ptr + c);
-            if (current_value > max_val)
-                max_val = current_value;
-        }
-        dst_type sum_exp = 0;
-        for (uint32_t c = 0; c < input_metadata.shape.features; c++) {
-            auto &current_value = *(src_ptr + c);
-            current_value -= max_val; // This step preserves the original softmax values + prevent overflows
-            current_value = std::exp(static_cast<dst_type>(current_value)); // Set src_ptr[c] to e^(src_ptr[c])
-            sum_exp += current_value;
-        }
-        for (uint32_t c = 0; c < input_metadata.shape.features; c++) {
-            dst_ptr[c] = static_cast<dst_type>(src_ptr[c] / sum_exp);
-        }
+        softmax(src_ptr, dst_ptr, input_metadata.shape.features);
         return HAILO_SUCCESS;
     }
 
@@ -151,6 +114,8 @@ private:
         // 3rd dim represent the output data type (only float_32 is supported)
         static SoftmaxFunction m_softmax_function_array[SOFTMAX_NUM_OF_POSSIBLE_FORMAT_ORDERS][SOFTMAX_NUM_OF_POSSIBLE_FORMAT_TYPES][SOFTMAX_NUM_OF_POSSIBLE_FORMAT_TYPES];
 
+        static hailo_status softmax(float32_t *src, float32_t *dst, size_t num_of_elements);
+
 };
 
 } /* namespace net_flow */
diff --git a/hailort/libhailort/src/net_flow/ops/ssd_post_process.cpp b/hailort/libhailort/src/net_flow/ops/ssd_post_process.cpp
index e56d3b7..1d1f130 100644
--- a/hailort/libhailort/src/net_flow/ops/ssd_post_process.cpp
+++ b/hailort/libhailort/src/net_flow/ops/ssd_post_process.cpp
@@ -114,25 +114,21 @@ hailo_status SSDPostProcessOp::execute(const std::map<std::string, MemoryView> &
         "Anchors vector count must be equal to data vector count. Anchors size is {}, data size is {}",
             m_metadata->ssd_config().anchors.size(), inputs.size());
 
-    std::vector<DetectionBbox> detections;
-    std::vector<uint32_t> classes_detections_count(m_metadata->nms_config().number_of_classes, 0);
-    detections.reserve(m_metadata->nms_config().max_proposals_per_class * m_metadata->nms_config().number_of_classes);
+    clear_before_frame();
     for (const auto &reg_to_cls : m_metadata->ssd_config().reg_to_cls_inputs) {
         assert(contains(inputs, reg_to_cls.first));
         assert(contains(inputs, reg_to_cls.second));
         auto status = extract_detections(reg_to_cls.first, reg_to_cls.second,
-            inputs.at(reg_to_cls.first), inputs.at(reg_to_cls.second),
-            detections, classes_detections_count);
+            inputs.at(reg_to_cls.first), inputs.at(reg_to_cls.second));
         CHECK_SUCCESS(status);
     }
 
     // TODO: Add support for TF_FORMAT_ORDER
-    return hailo_nms_format(std::move(detections), outputs.begin()->second, classes_detections_count);
+    return hailo_nms_format(outputs.begin()->second);
 }
 
 hailo_status SSDPostProcessOp::extract_detections(const std::string &reg_input_name, const std::string &cls_input_name,
-    const MemoryView &reg_buffer, const MemoryView &cls_buffer,
-    std::vector<DetectionBbox> &detections, std::vector<uint32_t> &classes_detections_count)
+    const MemoryView &reg_buffer, const MemoryView &cls_buffer)
 {
     const auto &inputs_metadata = m_metadata->inputs_metadata();
     const auto &ssd_config = m_metadata->ssd_config();
@@ -202,8 +198,7 @@ hailo_status SSDPostProcessOp::extract_detections(const std::string &reg_input_n
                         reg_idx + Y_OFFSET,
                         reg_idx + W_OFFSET,
                         reg_idx + H_OFFSET,
-                        cls_idx, wa, ha, xcenter_a, ycenter_a,
-                        detections, classes_detections_count);
+                        cls_idx, wa, ha, xcenter_a, ycenter_a);
                     CHECK_SUCCESS(status);
                 } else if (inputs_metadata.at(reg_input_name).format.type == HAILO_FORMAT_TYPE_UINT16) {
                     auto status = extract_bbox_detections<float32_t, uint16_t>(
@@ -213,8 +208,7 @@ hailo_status SSDPostProcessOp::extract_detections(const std::string &reg_input_n
                         reg_idx + Y_OFFSET,
                         reg_idx + W_OFFSET,
                         reg_idx + H_OFFSET,
-                        cls_idx, wa, ha, xcenter_a, ycenter_a,
-                        detections, classes_detections_count);
+                        cls_idx, wa, ha, xcenter_a, ycenter_a);
                     CHECK_SUCCESS(status);
                 } else if (inputs_metadata.at(reg_input_name).format.type == HAILO_FORMAT_TYPE_FLOAT32) {
                     // For testing - TODO: HRT-9341 - Remove after generator tests are in, and return error.
@@ -225,8 +219,7 @@ hailo_status SSDPostProcessOp::extract_detections(const std::string &reg_input_n
                         reg_idx + Y_OFFSET,
                         reg_idx + W_OFFSET,
                         reg_idx + H_OFFSET,
-                        cls_idx, wa, ha, xcenter_a, ycenter_a,
-                        detections, classes_detections_count);
+                        cls_idx, wa, ha, xcenter_a, ycenter_a);
                     CHECK_SUCCESS(status);
                 } else {
                     CHECK_SUCCESS(HAILO_INVALID_ARGUMENT, "SSD post-process received invalid reg input type: {}",
diff --git a/hailort/libhailort/src/net_flow/ops/ssd_post_process.hpp b/hailort/libhailort/src/net_flow/ops/ssd_post_process.hpp
index 114e14d..ba1331e 100644
--- a/hailort/libhailort/src/net_flow/ops/ssd_post_process.hpp
+++ b/hailort/libhailort/src/net_flow/ops/ssd_post_process.hpp
@@ -97,8 +97,7 @@ private:
     std::shared_ptr<SSDOpMetadata> m_metadata;
 
     template<typename DstType = float32_t, typename SrcType>
-    void extract_bbox_classes(const hailo_bbox_float32_t &dims_bbox, SrcType *cls_data, const BufferMetaData &cls_metadata, uint32_t cls_index,
-        std::vector<DetectionBbox> &detections, std::vector<uint32_t> &classes_detections_count)
+    void extract_bbox_classes(const hailo_bbox_float32_t &dims_bbox, SrcType *cls_data, const BufferMetaData &cls_metadata, uint32_t cls_index)
     {
         const auto &nms_config = m_metadata->nms_config();
         if (nms_config.cross_classes) {
@@ -108,8 +107,8 @@ private:
             auto bbox = dims_bbox;
             bbox.score = max_id_score_pair.second;
             if (max_id_score_pair.second >= nms_config.nms_score_th) {
-                detections.emplace_back(DetectionBbox(bbox, max_id_score_pair.first));
-                classes_detections_count[max_id_score_pair.first]++;
+                m_detections.emplace_back(DetectionBbox(bbox, max_id_score_pair.first));
+                m_classes_detections_count[max_id_score_pair.first]++;
             }
         } else {
             for (uint32_t class_index = 0; class_index < nms_config.number_of_classes; class_index++) {
@@ -135,8 +134,8 @@ private:
                 }
                 auto bbox = dims_bbox;
                 bbox.score = class_score;
-                detections.emplace_back(bbox, class_id);
-                classes_detections_count[class_id]++;
+                m_detections.emplace_back(bbox, class_id);
+                m_classes_detections_count[class_id]++;
             }
         }
     }
@@ -145,8 +144,7 @@ private:
     hailo_status extract_bbox_detections(const std::string &reg_input_name, const std::string &cls_input_name,
         const MemoryView &reg_buffer, const MemoryView &cls_buffer,
         uint64_t x_index, uint64_t y_index, uint64_t w_index, uint64_t h_index,
-        uint32_t cls_index, float32_t wa, float32_t ha, float32_t xcenter_a, float32_t ycenter_a,
-        std::vector<DetectionBbox> &detections, std::vector<uint32_t> &classes_detections_count)
+        uint32_t cls_index, float32_t wa, float32_t ha, float32_t xcenter_a, float32_t ycenter_a)
     {
         const auto &inputs_metadata = m_metadata->inputs_metadata();
         const auto &ssd_config = m_metadata->ssd_config();
@@ -185,13 +183,13 @@ private:
         const auto &cls_metadata = inputs_metadata.at(cls_input_name);
         if (cls_metadata.format.type == HAILO_FORMAT_TYPE_UINT8) {
             extract_bbox_classes<DstType, uint8_t>(dims_bbox, (uint8_t*)cls_data, cls_metadata,
-                cls_index, detections, classes_detections_count);
+                cls_index);
         } else if (cls_metadata.format.type == HAILO_FORMAT_TYPE_UINT16) {
             extract_bbox_classes<DstType, uint16_t>(dims_bbox, (uint16_t*)cls_data, cls_metadata,
-                cls_index, detections, classes_detections_count);
+                cls_index);
         } else if (cls_metadata.format.type == HAILO_FORMAT_TYPE_FLOAT32) {
             extract_bbox_classes<DstType, float32_t>(dims_bbox, (float32_t*)cls_data, cls_metadata,
-                cls_index, detections, classes_detections_count);
+                cls_index);
         } else {
             CHECK_SUCCESS(HAILO_INVALID_ARGUMENT, "SSD post-process received invalid cls input type: {}",
                 cls_metadata.format.type);
@@ -206,14 +204,11 @@ private:
      * @param[in] cls_input_name                Name of the classes input
      * @param[in] reg_buffer                    Buffer containing the boxes data after inference
      * @param[in] cls_buffer                    Buffer containing the classes ids after inference.
-     * @param[inout] detections                 A vector of ::DetectionBbox objects, to add the detected bboxes to.
-     * @param[inout] classes_detections_count   A vector of uint32_t, to add count of detections count per class to.
      *
      * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error.
     */
     hailo_status extract_detections(const std::string &reg_input_name, const std::string &cls_input_name,
-        const MemoryView &reg_buffer, const MemoryView &cls_buffer,
-        std::vector<DetectionBbox> &detections, std::vector<uint32_t> &classes_detections_count);
+        const MemoryView &reg_buffer, const MemoryView &cls_buffer);
 };
 
 }
diff --git a/hailort/libhailort/src/net_flow/ops/yolov5_op_metadata.hpp b/hailort/libhailort/src/net_flow/ops/yolov5_op_metadata.hpp
new file mode 100644
index 0000000..145f84e
--- /dev/null
+++ b/hailort/libhailort/src/net_flow/ops/yolov5_op_metadata.hpp
@@ -0,0 +1,70 @@
+/**
+ * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+**/
+/**
+ * @file yolov5_op_metadata.hpp
+ * @brief YOLOV5 op metadata
+ **/
+
+#ifndef _HAILO_YOLO_OP_METADATA_HPP_
+#define _HAILO_YOLO_OP_METADATA_HPP_
+
+#include "net_flow/ops/op_metadata.hpp"
+
+namespace hailort
+{
+namespace net_flow
+{
+
+struct YoloPostProcessConfig
+{
+    // The image height.
+    float32_t image_height = 0;
+
+    // The image width.
+    float32_t image_width = 0;
+
+    // A vector of anchors, each element in the vector represents the anchors for a specific layer
+    // Each layer anchors vector is structured as {w,h} pairs.
+    std::map<std::string, std::vector<int>> anchors;
+};
+
+class Yolov5OpMetadata : public NmsOpMetadata
+{
+public:
+    static Expected<std::shared_ptr<OpMetadata>> create(const std::unordered_map<std::string, BufferMetaData> &inputs_metadata,
+                                                        const std::unordered_map<std::string, BufferMetaData> &outputs_metadata,
+                                                        const NmsPostProcessConfig &nms_post_process_config,
+                                                        const YoloPostProcessConfig &yolov5_post_process_config,
+                                                        const std::string &network_name);
+    std::string get_op_description() override;
+    hailo_status validate_format_info() override;
+    YoloPostProcessConfig &yolov5_config() { return m_yolov5_config;};
+
+protected:
+    Yolov5OpMetadata(const std::unordered_map<std::string, BufferMetaData> &inputs_metadata,
+                       const std::unordered_map<std::string, BufferMetaData> &outputs_metadata,
+                       const NmsPostProcessConfig &nms_post_process_config,
+                       const std::string &name,
+                       const std::string &network_name,
+                       const YoloPostProcessConfig &yolov5_post_process_config,
+                       const OperationType op_type)
+        : NmsOpMetadata(inputs_metadata, outputs_metadata, nms_post_process_config, name, network_name, op_type)
+        , m_yolov5_config(yolov5_post_process_config)
+    {}
+
+    hailo_status validate_params() override;
+
+private:
+    YoloPostProcessConfig m_yolov5_config;
+
+};
+
+} // namespace net_flow
+} // namespace hailort
+
+#endif // _HAILO_YOLOV5_OP_METADATA_HPP_
+
+
+
diff --git a/hailort/libhailort/src/net_flow/ops/yolov5_post_process.cpp b/hailort/libhailort/src/net_flow/ops/yolov5_post_process.cpp
index e8033fa..0e08b41 100644
--- a/hailort/libhailort/src/net_flow/ops/yolov5_post_process.cpp
+++ b/hailort/libhailort/src/net_flow/ops/yolov5_post_process.cpp
@@ -66,14 +66,11 @@ hailo_status YOLOv5PostProcessOp::execute(const std::map<std::string, MemoryView
 {
     const auto &inputs_metadata = m_metadata->inputs_metadata();
     const auto &yolo_config = m_metadata->yolov5_config();
-    const auto &nms_config = m_metadata->nms_config();
     CHECK(inputs.size() == yolo_config.anchors.size(), HAILO_INVALID_ARGUMENT,
         "Anchors vector count must be equal to data vector count. Anchors size is {}, data size is {}",
             yolo_config.anchors.size(), inputs.size());
 
-    std::vector<DetectionBbox> detections;
-    std::vector<uint32_t> classes_detections_count(nms_config.number_of_classes, 0);
-    detections.reserve(nms_config.max_proposals_per_class * nms_config.number_of_classes);
+    clear_before_frame();
     for (const auto &name_to_input : inputs) {
         hailo_status status;
         auto &name = name_to_input.first;
@@ -82,10 +79,10 @@ hailo_status YOLOv5PostProcessOp::execute(const std::map<std::string, MemoryView
         assert(contains(yolo_config.anchors, name));
         if (input_metadata.format.type == HAILO_FORMAT_TYPE_UINT8) {
             status = extract_detections<float32_t, uint8_t>(name_to_input.second, input_metadata.quant_info, input_metadata.shape,
-                input_metadata.padded_shape, yolo_config.anchors.at(name), detections, classes_detections_count);
+                input_metadata.padded_shape, yolo_config.anchors.at(name));
         } else if (input_metadata.format.type == HAILO_FORMAT_TYPE_UINT16) {
             status = extract_detections<float32_t, uint16_t>(name_to_input.second, input_metadata.quant_info, input_metadata.shape,
-                input_metadata.padded_shape, yolo_config.anchors.at(name), detections, classes_detections_count);
+                input_metadata.padded_shape, yolo_config.anchors.at(name));
         } else {
             CHECK_SUCCESS(HAILO_INVALID_ARGUMENT, "YOLO post-process received invalid input type {}", input_metadata.format.type);
         }
@@ -93,7 +90,7 @@ hailo_status YOLOv5PostProcessOp::execute(const std::map<std::string, MemoryView
     }
 
     // TODO: Add support for TF_FORMAT_ORDER
-    return hailo_nms_format(std::move(detections), outputs.begin()->second, classes_detections_count);
+    return hailo_nms_format(outputs.begin()->second);
 }
 
 hailo_bbox_float32_t YOLOv5PostProcessOp::decode(float32_t tx, float32_t ty, float32_t tw, float32_t th,
diff --git a/hailort/libhailort/src/net_flow/ops/yolov5_post_process.hpp b/hailort/libhailort/src/net_flow/ops/yolov5_post_process.hpp
index 7472afb..903a1da 100644
--- a/hailort/libhailort/src/net_flow/ops/yolov5_post_process.hpp
+++ b/hailort/libhailort/src/net_flow/ops/yolov5_post_process.hpp
@@ -14,7 +14,7 @@
 #define _HAILO_YOLO_POST_PROCESS_HPP_
 
 #include "net_flow/ops/nms_post_process.hpp"
-#include "net_flow/ops/op_metadata.hpp"
+#include "net_flow/ops/yolov5_op_metadata.hpp"
 
 namespace hailort
 {
@@ -23,50 +23,6 @@ namespace net_flow
 
 #define MASK_COEFFICIENT_SIZE (32)
 
-struct YoloPostProcessConfig
-{
-    // The image height.
-    float32_t image_height = 0;
-
-    // The image width.
-    float32_t image_width = 0;
-
-    // A vector of anchors, each element in the vector represents the anchors for a specific layer
-    // Each layer anchors vector is structured as {w,h} pairs.
-    std::map<std::string, std::vector<int>> anchors;
-};
-
-class Yolov5OpMetadata : public NmsOpMetadata
-{
-public:
-    static Expected<std::shared_ptr<OpMetadata>> create(const std::unordered_map<std::string, BufferMetaData> &inputs_metadata,
-                                                        const std::unordered_map<std::string, BufferMetaData> &outputs_metadata,
-                                                        const NmsPostProcessConfig &nms_post_process_config,
-                                                        const YoloPostProcessConfig &yolov5_post_process_config,
-                                                        const std::string &network_name);
-    std::string get_op_description() override;
-    hailo_status validate_format_info() override;
-    YoloPostProcessConfig &yolov5_config() { return m_yolov5_config;};
-
-protected:
-    Yolov5OpMetadata(const std::unordered_map<std::string, BufferMetaData> &inputs_metadata,
-                       const std::unordered_map<std::string, BufferMetaData> &outputs_metadata,
-                       const NmsPostProcessConfig &nms_post_process_config,
-                       const std::string &name,
-                       const std::string &network_name,
-                       const YoloPostProcessConfig &yolov5_post_process_config,
-                       const OperationType op_type)
-        : NmsOpMetadata(inputs_metadata, outputs_metadata, nms_post_process_config, name, network_name, op_type)
-        , m_yolov5_config(yolov5_post_process_config)
-    {}
-
-    hailo_status validate_params() override;
-
-private:
-    YoloPostProcessConfig m_yolov5_config;
-
-};
-
 class YOLOv5PostProcessOp : public NmsPostProcessOp
 {
 public:
@@ -95,32 +51,34 @@ protected:
 
 
     template<typename DstType = float32_t, typename SrcType>
-    void check_threshold_and_add_detection(std::vector<DetectionBbox> &detections,
-        std::vector<uint32_t> &classes_detections_count, hailo_bbox_float32_t bbox, hailo_quant_info_t &quant_info,
+    void check_threshold_and_add_detection(hailo_bbox_float32_t bbox, hailo_quant_info_t &quant_info,
         uint32_t class_index, SrcType* data, uint32_t entry_idx, uint32_t padded_width, DstType objectness)
     {
         const auto &nms_config = m_metadata->nms_config();
+        const auto &yolov5_config = m_metadata->yolov5_config();
         if (bbox.score >= nms_config.nms_score_th) {
             if (should_add_mask()) {
                 // We will not preform the sigmoid on the mask at this point -
                 // It should happen on the result of the vector mask multiplication with the proto_mask layer.
                 uint32_t mask_index_start_index = CLASSES_START_INDEX + nms_config.number_of_classes;
-                std::vector<float32_t> mask(MASK_COEFFICIENT_SIZE, 0.0f);
+                std::vector<float32_t> mask_coefficients(MASK_COEFFICIENT_SIZE, 0.0f);
                 for (size_t i = 0; i < MASK_COEFFICIENT_SIZE; i++) {
-                    auto mask_offset = entry_idx + (mask_index_start_index + i) * padded_width;
-                    mask[i] = (Quantization::dequantize_output<DstType, SrcType>(data[mask_offset], quant_info) * objectness);
+                    auto coeffs_offset = entry_idx + (mask_index_start_index + i) * padded_width;
+                    mask_coefficients[i] = (Quantization::dequantize_output<DstType, SrcType>(
+                        data[coeffs_offset], quant_info) * objectness);
                 }
-                detections.emplace_back(DetectionBbox(bbox, class_index, std::move(mask)));
+                m_detections.emplace_back(DetectionBbox(bbox, static_cast<uint16_t>(class_index), std::move(mask_coefficients),
+                    yolov5_config.image_height, yolov5_config.image_width));
             } else {
-                detections.emplace_back(DetectionBbox(bbox, class_index));
+                m_detections.emplace_back(DetectionBbox(bbox, class_index));
             }
-            classes_detections_count[class_index]++;
+            m_classes_detections_count[class_index]++;
         }
     }
 
     template<typename DstType = float32_t, typename SrcType>
-    void decode_classes_scores(std::vector<DetectionBbox> &detections, std::vector<uint32_t> &classes_detections_count,
-        hailo_bbox_float32_t &bbox, hailo_quant_info_t &quant_info, SrcType* data, uint32_t entry_idx, uint32_t class_start_idx,
+    void decode_classes_scores(hailo_bbox_float32_t &bbox,
+        hailo_quant_info_t &quant_info, SrcType* data, uint32_t entry_idx, uint32_t class_start_idx,
         DstType objectness, uint32_t padded_width)
     {
         const auto &nms_config = m_metadata->nms_config();
@@ -129,7 +87,7 @@ protected:
             // Pre-NMS optimization. If NMS checks IoU over different classes, only the maximum class is relevant
             auto max_id_score_pair = get_max_class<DstType, SrcType>(data, entry_idx, class_start_idx, objectness, quant_info, padded_width);
             bbox.score = max_id_score_pair.second;
-            check_threshold_and_add_detection(detections, classes_detections_count, bbox, quant_info, max_id_score_pair.first,
+            check_threshold_and_add_detection(bbox, quant_info, max_id_score_pair.first,
                 data, entry_idx, padded_width, objectness);
         }
         else {
@@ -138,7 +96,7 @@ protected:
                 auto class_confidence = dequantize_and_sigmoid<DstType, SrcType>(
                     data[class_entry_idx], quant_info);
                 bbox.score = class_confidence * objectness;
-                check_threshold_and_add_detection(detections, classes_detections_count, bbox, quant_info, class_index,
+                check_threshold_and_add_detection(bbox, quant_info, class_index,
                     data, entry_idx, padded_width, objectness);
             }
         }
@@ -152,15 +110,13 @@ protected:
      * @param[in] shape                         Shape corresponding to the @a buffer layer.
      * @param[in] layer_anchors                 The layer anchors corresponding to layer receiving the @a buffer.
      *                                          Each anchor is structured as {width, height} pairs.
-     * @param[inout] detections                 A vector of ::DetectionBbox objects, to add the detected bboxes to.
-     * @param[inout] classes_detections_count   A vector of uint32_t, to add count of detections count per class to.
      *
      * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error.
     */
     template<typename DstType = float32_t, typename SrcType>
     hailo_status extract_detections(const MemoryView &buffer, hailo_quant_info_t quant_info,
         hailo_3d_image_shape_t shape, hailo_3d_image_shape_t padded_shape,
-        const std::vector<int> &layer_anchors, std::vector<DetectionBbox> &detections, std::vector<uint32_t> &classes_detections_count)
+        const std::vector<int> &layer_anchors)
     {
         const uint32_t X_OFFSET = X_INDEX * padded_shape.width;
         const uint32_t Y_OFFSET = Y_INDEX * padded_shape.width;
@@ -200,7 +156,7 @@ protected:
                     auto bbox = decode(tx, ty, tw, th, layer_anchors[anchor * 2], layer_anchors[anchor * 2 + 1], col, row,
                         shape.width, shape.height);
 
-                    decode_classes_scores(detections, classes_detections_count, bbox, quant_info, data, entry_idx,
+                    decode_classes_scores(bbox, quant_info, data, entry_idx,
                         CLASSES_START_INDEX, objectness, padded_shape.width);
                 }
             }
diff --git a/hailort/libhailort/src/net_flow/ops/yolov5_seg_op_metadata.hpp b/hailort/libhailort/src/net_flow/ops/yolov5_seg_op_metadata.hpp
new file mode 100644
index 0000000..6039835
--- /dev/null
+++ b/hailort/libhailort/src/net_flow/ops/yolov5_seg_op_metadata.hpp
@@ -0,0 +1,60 @@
+/**
+ * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+**/
+/**
+ * @file yolov5_seg_op_metadata.hpp
+ * @brief YOLOv5 Instance Segmentation Post-Process op metadata
+ **/
+
+#ifndef _HAILO_YOLOV5_SEG_OP_METADATA_HPP_
+#define _HAILO_YOLOV5_SEG_OP_METADATA_HPP_
+
+#include "hailo/hailort.h"
+#include "net_flow/ops/yolov5_op_metadata.hpp"
+
+namespace hailort
+{
+namespace net_flow
+{
+
+struct YoloV5SegPostProcessConfig
+{
+    // User given mask threshold. A pixel will consider part of the mask if it's value is higher then the mask_threshold.
+    double mask_threshold;
+    std::string proto_layer_name;
+};
+
+class Yolov5SegOpMetadata : public Yolov5OpMetadata
+{
+public:
+    static Expected<std::shared_ptr<OpMetadata>> create(const std::unordered_map<std::string, BufferMetaData> &inputs_metadata,
+                                                        const std::unordered_map<std::string, BufferMetaData> &outputs_metadata,
+                                                        const NmsPostProcessConfig &nms_post_process_config,
+                                                        const YoloPostProcessConfig &yolov5_config,
+                                                        const YoloV5SegPostProcessConfig &yolov5_seg_config,
+                                                        const std::string &network_name);
+    hailo_status validate_format_info() override;
+    std::string get_op_description() override;
+    YoloV5SegPostProcessConfig &yolov5seg_config() { return m_yolo_seg_config;};
+    virtual Expected<hailo_vstream_info_t> get_output_vstream_info() override;
+
+private:
+    Yolov5SegOpMetadata(const std::unordered_map<std::string, BufferMetaData> &inputs_metadata,
+                       const std::unordered_map<std::string, BufferMetaData> &outputs_metadata,
+                       const NmsPostProcessConfig &nms_post_process_config,
+                       const YoloPostProcessConfig &yolo_config,
+                       const YoloV5SegPostProcessConfig &yolo_seg_config,
+                       const std::string &network_name)
+        : Yolov5OpMetadata(inputs_metadata, outputs_metadata, nms_post_process_config, "YOLOv5Seg-Post-Process",
+            network_name, yolo_config, OperationType::YOLOV5SEG),
+        m_yolo_seg_config(yolo_seg_config)
+    {}
+
+    YoloV5SegPostProcessConfig m_yolo_seg_config;
+};
+
+} /* namespace hailort */
+} /* namespace net_flow */
+
+#endif /* _HAILO_YOLOV5_SEG_POST_PROCESS_HPP_ */
\ No newline at end of file
diff --git a/hailort/libhailort/src/net_flow/ops/yolov5_seg_post_process.cpp b/hailort/libhailort/src/net_flow/ops/yolov5_seg_post_process.cpp
index 813abd8..539a985 100644
--- a/hailort/libhailort/src/net_flow/ops/yolov5_seg_post_process.cpp
+++ b/hailort/libhailort/src/net_flow/ops/yolov5_seg_post_process.cpp
@@ -135,11 +135,8 @@ hailo_status Yolov5SegPostProcess::execute(const std::map<std::string, MemoryVie
     const auto &inputs_metadata = m_metadata->inputs_metadata();
     const auto &yolo_config = m_metadata->yolov5_config();
     const auto &yolov5seg_config = m_metadata->yolov5seg_config();
-    const auto &nms_config = m_metadata->nms_config();
 
-    std::vector<DetectionBbox> detections;
-    std::vector<uint32_t> classes_detections_count(nms_config.number_of_classes, 0);
-    detections.reserve(nms_config.max_proposals_per_class * nms_config.number_of_classes);
+    clear_before_frame();
     for (const auto &name_to_input : inputs) {
         hailo_status status;
         auto &name = name_to_input.first;
@@ -163,16 +160,16 @@ hailo_status Yolov5SegPostProcess::execute(const std::map<std::string, MemoryVie
         assert(contains(yolo_config.anchors, name));
         if (input_metadata.format.type == HAILO_FORMAT_TYPE_UINT8) {
             status = extract_detections<float32_t, uint8_t>(name_to_input.second, input_metadata.quant_info, input_metadata.shape,
-                input_metadata.padded_shape, yolo_config.anchors.at(name), detections, classes_detections_count);
+                input_metadata.padded_shape, yolo_config.anchors.at(name));
         } else if (input_metadata.format.type == HAILO_FORMAT_TYPE_UINT16) {
             status = extract_detections<float32_t, uint16_t>(name_to_input.second, input_metadata.quant_info, input_metadata.shape,
-                input_metadata.padded_shape, yolo_config.anchors.at(name), detections, classes_detections_count);
+                input_metadata.padded_shape, yolo_config.anchors.at(name));
         }
         CHECK_SUCCESS(status);
     }
 
-    remove_overlapping_boxes(detections, classes_detections_count, m_metadata->nms_config().nms_iou_th);
-    auto status = fill_nms_with_byte_mask_format(outputs.begin()->second, detections, classes_detections_count);
+    remove_overlapping_boxes(m_detections, m_classes_detections_count, m_metadata->nms_config().nms_iou_th);
+    auto status = fill_nms_with_byte_mask_format(outputs.begin()->second);
     CHECK_SUCCESS(status);
 
     return HAILO_SUCCESS;
@@ -193,7 +190,7 @@ void Yolov5SegPostProcess::mult_mask_vector_and_proto_matrix(const DetectionBbox
     for (uint32_t i = 0; i < mult_size; i++) {
         float32_t sum = 0.0f;
         for (uint32_t j = 0; j < proto_layer_shape.features; j++) {
-            sum += detection.m_mask[j] * proto_layer[j * mult_size + i];
+            sum += detection.m_coefficients[j] * proto_layer[j * mult_size + i];
         }
         mult_result[i] = sigmoid(sum);
     }
@@ -213,22 +210,22 @@ hailo_status Yolov5SegPostProcess::crop_and_copy_mask(const DetectionBbox &detec
         static_cast<uint32_t>(yolov5_config.image_height), 0, 1, STBIR_ALPHA_CHANNEL_NONE, 0,
         STBIR_EDGE_CLAMP, STBIR_FILTER_TRIANGLE, STBIR_COLORSPACE_LINEAR, NULL);
 
-    auto x_min = static_cast<uint32_t>(std::round(detection.m_bbox.x_min * yolov5_config.image_width));
-    auto x_max = static_cast<uint32_t>(std::round(detection.m_bbox.x_max * yolov5_config.image_width));
-    auto y_min = static_cast<uint32_t>(std::round(detection.m_bbox.y_min * yolov5_config.image_height));
-    auto y_max = static_cast<uint32_t>(std::round(detection.m_bbox.y_max * yolov5_config.image_height));
-    auto box_width = detection.get_bbox_rounded_width(yolov5_config.image_width);
+    auto x_min = static_cast<uint32_t>(std::ceil(detection.m_bbox.x_min * yolov5_config.image_width));
+    auto x_max = static_cast<uint32_t>(std::ceil(detection.m_bbox.x_max * yolov5_config.image_width));
+    auto y_min = static_cast<uint32_t>(std::ceil(detection.m_bbox.y_min * yolov5_config.image_height));
+    auto y_max = static_cast<uint32_t>(std::ceil(detection.m_bbox.y_max * yolov5_config.image_height));
+    auto box_width = detection.get_bbox_width(yolov5_config.image_width);
 
-    float32_t *dst_mask = (float32_t*)(buffer.data() + buffer_offset);
+    uint8_t *dst_mask = (uint8_t*)(buffer.data() + buffer_offset);
     for (uint32_t i = y_min; i <= y_max; i++) {
         for (uint32_t j = x_min; j <= x_max; j++) {
             auto image_mask_idx = (i * static_cast<uint32_t>(yolov5_config.image_width)) + j;
             auto cropped_mask_idx = ((i-y_min) * box_width) + (j-x_min);
 
             if (resized_mask_to_image_dim_ptr[image_mask_idx] > mask_threshold) {
-                dst_mask[cropped_mask_idx] = 1.0f;
+                dst_mask[cropped_mask_idx] = 1;
             } else {
-                dst_mask[cropped_mask_idx] = 0.0f;
+                dst_mask[cropped_mask_idx] = 0;
             }
         }
     }
@@ -245,123 +242,64 @@ hailo_status Yolov5SegPostProcess::calc_and_copy_mask(const DetectionBbox &detec
     return HAILO_SUCCESS;
 }
 
-uint32_t Yolov5SegPostProcess::get_mask_size(const DetectionBbox &detection)
-{
-    auto &yolov5_config = m_metadata->yolov5_config();
-    auto box_height = detection.get_bbox_rounded_height(yolov5_config.image_height);
-    auto box_width = detection.get_bbox_rounded_width(yolov5_config.image_width);
-    auto mask_size = box_width * box_height;
-
-    // Add padding if needed
-    uint32_t remainder = mask_size % 8;
-    uint32_t adjustment = (remainder != 0) ? (8 - remainder) : 0;
-    uint32_t result = static_cast<uint32_t>(mask_size + adjustment);
-    return result;
-}
-
-Expected<uint32_t> Yolov5SegPostProcess::copy_detection_to_result_buffer(MemoryView &buffer, const DetectionBbox &detection,
-    uint32_t buffer_offset, std::vector<uint32_t> &classes_detections_count)
+Expected<uint32_t> Yolov5SegPostProcess::copy_detection_to_result_buffer(MemoryView &buffer, DetectionBbox &detection,
+    uint32_t buffer_offset)
 {
-    auto detection_byte_size = 0;
-    float32_t mask_size_bytes = static_cast<float32_t>(get_mask_size(detection)) * sizeof(float32_t);
+    uint32_t copied_bytes_amount = 0;
 
     // Copy bbox
-    uint32_t size_to_copy = sizeof(detection.m_bbox);
+    uint32_t size_to_copy = sizeof(detection.m_bbox_with_mask);
     assert((buffer_offset + size_to_copy) <= buffer.size());
-    memcpy((hailo_bbox_float32_t*)(buffer.data() + buffer_offset), &detection.m_bbox, size_to_copy);
-    buffer_offset += size_to_copy;
-    detection_byte_size += size_to_copy;
+    detection.m_bbox_with_mask.mask = (buffer.data() + buffer_offset + size_to_copy);
 
-    // Copy mask size
-    size_to_copy = sizeof(mask_size_bytes);
-    assert((buffer_offset + size_to_copy) <= buffer.size());
-    memcpy((buffer.data() + buffer_offset), &mask_size_bytes, size_to_copy);
+    *(hailo_detection_with_byte_mask_t*)(buffer.data() + buffer_offset) =
+        *(hailo_detection_with_byte_mask_t*)&(detection.m_bbox_with_mask);
     buffer_offset += size_to_copy;
-    detection_byte_size += size_to_copy;
+    copied_bytes_amount += size_to_copy;
 
     // Calc and copy mask
     auto status = calc_and_copy_mask(detection, buffer, buffer_offset);
     CHECK_SUCCESS_AS_EXPECTED(status);
-    detection_byte_size += static_cast<uint32_t>(mask_size_bytes);
-
-    classes_detections_count[detection.m_class_id]--;
-    return detection_byte_size;
-}
-
-uint32_t Yolov5SegPostProcess::copy_bbox_count_to_result_buffer(MemoryView &buffer, uint32_t class_detection_count, uint32_t buffer_offset)
-{
-    float32_t bbox_count_casted = static_cast<float32_t>(class_detection_count);
-    uint32_t size_to_copy = sizeof(bbox_count_casted);
+    copied_bytes_amount += static_cast<uint32_t>(detection.m_bbox_with_mask.mask_size);
 
-    assert((buffer_offset + size_to_copy) <= buffer.size());
-    memcpy((buffer.data() + buffer_offset), &bbox_count_casted, size_to_copy);
-    return size_to_copy;
-}
-
-uint32_t Yolov5SegPostProcess::copy_zero_bbox_count(MemoryView &buffer, uint32_t classes_with_zero_detections_count, uint32_t buffer_offset)
-{
-    uint32_t size_to_copy = static_cast<uint32_t>(sizeof(float32_t)) * classes_with_zero_detections_count;
-
-    assert((buffer_offset + size_to_copy) <= buffer.size());
-    memset((buffer.data() + buffer_offset), 0, size_to_copy);
-    return size_to_copy;
+    m_classes_detections_count[detection.m_class_id]--;
+    return copied_bytes_amount;
 }
 
-hailo_status Yolov5SegPostProcess::fill_nms_with_byte_mask_format(MemoryView &buffer, std::vector<DetectionBbox> &detections,
-    std::vector<uint32_t> &classes_detections_count)
+hailo_status Yolov5SegPostProcess::fill_nms_with_byte_mask_format(MemoryView &buffer)
 {
-    // TODO: HRT-11734 - Improve performance by adding a new format that doesn't require the sort
-    // Sort by class_id
-    std::sort(detections.begin(), detections.end(),
-        [](DetectionBbox a, DetectionBbox b)
-        { return (a.m_class_id != b.m_class_id) ? (a.m_class_id < b.m_class_id) : (a.m_bbox.score > b.m_bbox.score); });
-
     const auto &nms_config = m_metadata->nms_config();
     uint32_t ignored_detections_count = 0;
-    int curr_class_id = -1;
-    uint32_t buffer_offset = 0;
-    for (auto &detection : detections) {
+    uint16_t detections_count = 0;
+    // The beginning of the output buffer will contain the detections_count first, here we save space for it.
+    uint32_t buffer_offset = sizeof(detections_count);
+    for (auto &detection : m_detections) {
         if (REMOVED_CLASS_SCORE == detection.m_bbox.score) {
             // Detection was removed in remove_overlapping_boxes()
             continue;
         }
-        if (0 == classes_detections_count[detection.m_class_id]) {
+        if (0 == m_classes_detections_count[detection.m_class_id]) {
             // This class' detections count is higher then m_nms_config.max_proposals_per_class.
             // This detection is ignored due to having lower score (detections vector is sorted by score).
             continue;
         }
 
         // If class's detections count is higher then max_proposals_per_class we set the detection count of that class to the max
-        // and ignore the rest by reducing the classes_detections_count[detection.m_class_id] after copying the bbox to result buffer.
-        if (nms_config.max_proposals_per_class < classes_detections_count[detection.m_class_id]) {
-            ignored_detections_count += (classes_detections_count[detection.m_class_id] - nms_config.max_proposals_per_class);
-            classes_detections_count[detection.m_class_id] = nms_config.max_proposals_per_class;
+        // and ignore the rest by reducing the m_classes_detections_count[detection.m_class_id] after copying the bbox to result buffer.
+        if (nms_config.max_proposals_per_class < m_classes_detections_count[detection.m_class_id]) {
+            ignored_detections_count += (m_classes_detections_count[detection.m_class_id] - nms_config.max_proposals_per_class);
+            m_classes_detections_count[detection.m_class_id] = nms_config.max_proposals_per_class;
         }
 
-        if (static_cast<int>(detection.m_class_id) == curr_class_id) {
-            auto buffer_offset_expected = copy_detection_to_result_buffer(buffer, detection, buffer_offset, classes_detections_count);
-            CHECK_EXPECTED_AS_STATUS(buffer_offset_expected);
-            buffer_offset += buffer_offset_expected.value();
-        }
-        else if (static_cast<int>(detection.m_class_id) == (curr_class_id + 1)) {
-            buffer_offset += copy_bbox_count_to_result_buffer(buffer, classes_detections_count[detection.m_class_id], buffer_offset);
-            auto buffer_offset_expected = copy_detection_to_result_buffer(buffer, detection, buffer_offset, classes_detections_count);
-            buffer_offset += buffer_offset_expected.value();
-            curr_class_id = detection.m_class_id;
-        }
-        else {
-            // no detections for classes between (curr_class_id, detection.m_class_id)
-            auto zero_detections_classes_count = (detection.m_class_id - curr_class_id);
-            buffer_offset += copy_zero_bbox_count(buffer, zero_detections_classes_count, buffer_offset);
-
-            // Copy the new class box
-            buffer_offset += copy_bbox_count_to_result_buffer(buffer, classes_detections_count[detection.m_class_id], buffer_offset);
-            auto buffer_offset_expected = copy_detection_to_result_buffer(buffer, detection, buffer_offset, classes_detections_count);
-            buffer_offset += buffer_offset_expected.value();
-            curr_class_id = detection.m_class_id;
-        }
+        auto copied_bytes_amount = copy_detection_to_result_buffer(buffer, detection, buffer_offset);
+        CHECK_EXPECTED_AS_STATUS(copied_bytes_amount);
+        buffer_offset += copied_bytes_amount.release();
+        detections_count++;
     }
 
+    // Copy detections count to the beginning of the buffer
+    *(uint16_t*)buffer.data() = detections_count;
+
     if (0 != ignored_detections_count) {
         LOGGER__INFO("{} Detections were ignored, due to `max_bboxes_per_class` defined as {}.",
             ignored_detections_count, nms_config.max_proposals_per_class);
diff --git a/hailort/libhailort/src/net_flow/ops/yolov5_seg_post_process.hpp b/hailort/libhailort/src/net_flow/ops/yolov5_seg_post_process.hpp
index 541edc2..ae3a7b2 100644
--- a/hailort/libhailort/src/net_flow/ops/yolov5_seg_post_process.hpp
+++ b/hailort/libhailort/src/net_flow/ops/yolov5_seg_post_process.hpp
@@ -13,48 +13,13 @@
 #include "hailo/hailort.h"
 #include "net_flow/ops/yolov5_post_process.hpp"
 #include "transform/transform_internal.hpp"
+#include "net_flow/ops/yolov5_seg_op_metadata.hpp"
 
 namespace hailort
 {
 namespace net_flow
 {
 
-struct YoloV5SegPostProcessConfig
-{
-    // User given mask threshold. A pixel will consider part of the mask if it's value is higher then the mask_threshold.
-    double mask_threshold;
-    std::string proto_layer_name;
-};
-
-class Yolov5SegOpMetadata : public Yolov5OpMetadata
-{
-public:
-    static Expected<std::shared_ptr<OpMetadata>> create(const std::unordered_map<std::string, BufferMetaData> &inputs_metadata,
-                                                        const std::unordered_map<std::string, BufferMetaData> &outputs_metadata,
-                                                        const NmsPostProcessConfig &nms_post_process_config,
-                                                        const YoloPostProcessConfig &yolov5_config,
-                                                        const YoloV5SegPostProcessConfig &yolov5_seg_config,
-                                                        const std::string &network_name);
-    hailo_status validate_format_info() override;
-    std::string get_op_description() override;
-    YoloV5SegPostProcessConfig &yolov5seg_config() { return m_yolo_seg_config;};
-    virtual Expected<hailo_vstream_info_t> get_output_vstream_info() override;
-
-private:
-    Yolov5SegOpMetadata(const std::unordered_map<std::string, BufferMetaData> &inputs_metadata,
-                       const std::unordered_map<std::string, BufferMetaData> &outputs_metadata,
-                       const NmsPostProcessConfig &nms_post_process_config,
-                       const YoloPostProcessConfig &yolo_config,
-                       const YoloV5SegPostProcessConfig &yolo_seg_config,
-                       const std::string &network_name)
-        : Yolov5OpMetadata(inputs_metadata, outputs_metadata, nms_post_process_config, "YOLOv5Seg-Post-Process",
-            network_name, yolo_config, OperationType::YOLOV5SEG),
-        m_yolo_seg_config(yolo_seg_config)
-    {}
-
-    YoloV5SegPostProcessConfig m_yolo_seg_config;
-};
-
 class Yolov5SegPostProcess : public YOLOv5PostProcessOp
 {
 public:
@@ -98,17 +63,14 @@ private:
     Yolov5SegPostProcess(std::shared_ptr<Yolov5SegOpMetadata> metadata, Buffer &&mask_mult_result_buffer,
         Buffer &&resized_mask, Buffer &&transformed_proto_buffer, Buffer &&dequantized_proto_buffer);
 
-    hailo_status fill_nms_with_byte_mask_format(MemoryView &buffer, std::vector<DetectionBbox> &detections,
-        std::vector<uint32_t> &classes_detections_count);
+    hailo_status fill_nms_with_byte_mask_format(MemoryView &buffer);
     void mult_mask_vector_and_proto_matrix(const DetectionBbox &detection);
-    uint32_t get_mask_size(const DetectionBbox &detection);
 
     hailo_status calc_and_copy_mask(const DetectionBbox &detection, MemoryView &buffer, uint32_t buffer_offset);
     hailo_status crop_and_copy_mask(const DetectionBbox &detection, MemoryView &buffer, uint32_t buffer_offset);
-    uint32_t copy_zero_bbox_count(MemoryView &buffer, uint32_t classes_with_zero_detections_count, uint32_t buffer_offset);
-    uint32_t copy_bbox_count_to_result_buffer(MemoryView &buffer, uint32_t class_detection_count, uint32_t buffer_offset);
-    Expected<uint32_t> copy_detection_to_result_buffer(MemoryView &buffer, const DetectionBbox &detection, uint32_t buffer_offset,
-        std::vector<uint32_t> &classes_detections_count);
+
+    // Returns the number of copied bytes
+    Expected<uint32_t> copy_detection_to_result_buffer(MemoryView &buffer, DetectionBbox &detection, uint32_t buffer_offset);
 
     std::shared_ptr<Yolov5SegOpMetadata> m_metadata;
     Buffer m_mask_mult_result_buffer;
diff --git a/hailort/libhailort/src/net_flow/ops/yolov8_post_process.cpp b/hailort/libhailort/src/net_flow/ops/yolov8_post_process.cpp
new file mode 100644
index 0000000..788146a
--- /dev/null
+++ b/hailort/libhailort/src/net_flow/ops/yolov8_post_process.cpp
@@ -0,0 +1,193 @@
+/**
+ * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+**/
+/**
+ * @file yolov8_post_process.cpp
+ * @brief YOLOV8 post process
+ *
+ **/
+
+#include "net_flow/ops/yolov8_post_process.hpp"
+#include "net_flow/ops/softmax_post_process.hpp"
+
+namespace hailort
+{
+namespace net_flow
+{
+
+Expected<std::shared_ptr<OpMetadata>> Yolov8OpMetadata::create(const std::unordered_map<std::string, BufferMetaData> &inputs_metadata,
+    const std::unordered_map<std::string, BufferMetaData> &outputs_metadata, const NmsPostProcessConfig &nms_post_process_config,
+    const Yolov8PostProcessConfig &yolov8_post_process_config, const std::string &network_name)
+{
+    // Creating the meta data
+    auto op_metadata = std::shared_ptr<Yolov8OpMetadata>(new (std::nothrow) Yolov8OpMetadata(inputs_metadata, outputs_metadata, nms_post_process_config,
+        yolov8_post_process_config, network_name));
+    CHECK_AS_EXPECTED(op_metadata != nullptr, HAILO_OUT_OF_HOST_MEMORY);
+
+    auto status = op_metadata->validate_params();
+    CHECK_SUCCESS_AS_EXPECTED(status);
+
+    return std::shared_ptr<OpMetadata>(std::move(op_metadata));
+}
+
+std::string Yolov8OpMetadata::get_op_description()
+{
+    auto nms_config_info = get_nms_config_description();
+    auto config_info = fmt::format("Op {}, Name: {}, {}, Image height: {:.2f}, Image width: {:.2f}",
+                        OpMetadata::get_operation_type_str(m_type), m_name, nms_config_info, m_yolov8_config.image_height, m_yolov8_config.image_width);
+    return config_info;
+}
+
+hailo_status Yolov8OpMetadata::validate_params()
+{
+    CHECK_SUCCESS(NmsOpMetadata::validate_params());
+
+    // We go over the inputs metadata and check that it includes all of the regs and clss
+    for (const auto &layer_names : m_yolov8_config.reg_to_cls_inputs) {
+        CHECK(contains(m_inputs_metadata, layer_names.reg), HAILO_INVALID_ARGUMENT,
+            "YOLOV8PostProcessOp: inputs_metadata does not contain regression layer {}", layer_names.reg);
+        CHECK(contains(m_inputs_metadata, layer_names.cls), HAILO_INVALID_ARGUMENT,
+            "YOLOV8PostProcessOp: inputs_metadata does not contain classification layer {}", layer_names.cls);
+
+        const auto &reg_input_metadata = m_inputs_metadata.at(layer_names.reg);
+        const auto &cls_input_metadata = m_inputs_metadata.at(layer_names.cls);
+
+        // Checking that both outputs (reg and cls) has the same shape and format
+        // NOTE: padded shape might be different because features might be different,
+        // and padding is added when width*features % 8 != 0
+        CHECK((reg_input_metadata.shape.height == cls_input_metadata.shape.height)
+            && (reg_input_metadata.shape.width == cls_input_metadata.shape.width),
+            HAILO_INVALID_ARGUMENT, "YOLOV8PostProcess: regression input {} has different shape than classification input {}",
+                layer_names.reg, layer_names.cls);
+
+        CHECK((cls_input_metadata.format.type == reg_input_metadata.format.type)
+            && (cls_input_metadata.format.flags == reg_input_metadata.format.flags)
+            && (cls_input_metadata.format.order == reg_input_metadata.format.order),
+            HAILO_INVALID_ARGUMENT, "YOLOV8PostProcess: regression input {} has different format than classification input {}",
+                layer_names.reg, layer_names.cls);
+
+        // Checking that number of features of all outputs are multiples of 4
+        CHECK(((reg_input_metadata.shape.features % 4) == 0),
+            HAILO_INVALID_ARGUMENT, "YOLOV8PostProcess: regression input {} is not a multiple of 4",
+                layer_names.reg);
+    }
+    return HAILO_SUCCESS;
+}
+
+hailo_status Yolov8OpMetadata::validate_format_info()
+{
+    return NmsOpMetadata::validate_format_info();
+}
+
+Expected<std::shared_ptr<Op>> YOLOV8PostProcessOp::create(std::shared_ptr<Yolov8OpMetadata> metadata)
+{
+    auto status = metadata->validate_format_info();
+    CHECK_SUCCESS_AS_EXPECTED(status);
+
+    auto op = std::shared_ptr<YOLOV8PostProcessOp>(new (std::nothrow) YOLOV8PostProcessOp(metadata));
+    CHECK_AS_EXPECTED(op != nullptr, HAILO_OUT_OF_HOST_MEMORY);
+
+    return std::shared_ptr<Op>(std::move(op));
+}
+
+hailo_status YOLOV8PostProcessOp::execute(const std::map<std::string, MemoryView> &inputs, std::map<std::string, MemoryView> &outputs)
+{
+    const auto &yolov8_config = m_metadata->yolov8_config();
+    const auto &inputs_metadata = m_metadata->inputs_metadata();
+
+    clear_before_frame();
+    for (const auto &reg_to_cls_name : yolov8_config.reg_to_cls_inputs) {
+        hailo_status status;
+        assert(contains(inputs, reg_to_cls_name.cls));
+        assert(contains(inputs, reg_to_cls_name.reg));
+
+        auto &input_metadata = inputs_metadata.at(reg_to_cls_name.reg);
+
+        if (HAILO_FORMAT_TYPE_UINT8 == input_metadata.format.type) {
+            status = extract_detections<float32_t, uint8_t>(reg_to_cls_name, inputs.at(reg_to_cls_name.reg),
+                inputs.at(reg_to_cls_name.cls), reg_to_cls_name.stride);
+        } else if (HAILO_FORMAT_TYPE_UINT16 == input_metadata.format.type) {
+            status = extract_detections<float32_t, uint16_t>(reg_to_cls_name, inputs.at(reg_to_cls_name.reg),
+                inputs.at(reg_to_cls_name.cls), reg_to_cls_name.stride);
+        } else {
+            CHECK_SUCCESS(HAILO_INVALID_ARGUMENT, "YOLO post-process received invalid input type {}", input_metadata.format.type);
+        }
+
+        CHECK_SUCCESS(status);
+    }
+    return hailo_nms_format(outputs.begin()->second);
+}
+
+template<typename DstType, typename SrcType>
+hailo_bbox_float32_t YOLOV8PostProcessOp::get_bbox(uint32_t row, uint32_t col, uint32_t stride, const hailo_3d_image_shape_t &reg_padded_shape,
+    const hailo_quant_info_t &reg_quant_info, SrcType *reg_data, std::vector<std::vector<DstType>> &d_matrix, DstType class_confidence)
+{
+    auto reg_row_size = reg_padded_shape.width * reg_padded_shape.features;
+    auto reg_feature_size = reg_padded_shape.width;
+    auto reg_idx = (reg_row_size * row) + col;
+
+    // For each HxW - reshape from features to 4 x (features/4) + dequantize
+    // For example - reshape from 64 to 4X16 - 4 vectors of 16 values
+    for (uint32_t feature = 0; feature < reg_padded_shape.features; feature++) {
+        auto &tmp_vector = d_matrix.at(feature / (reg_padded_shape.features / NUM_OF_D_VALUES));
+        tmp_vector[feature % (reg_padded_shape.features / NUM_OF_D_VALUES)] = Quantization::dequantize_output<DstType, SrcType>(reg_data[reg_idx + feature*reg_feature_size], reg_quant_info);
+    }
+
+    // Performing softmax operation on each of the vectors
+    for (uint32_t vector_index = 0; vector_index < d_matrix.size(); vector_index++) {
+        auto &tmp_vector = d_matrix.at(vector_index);
+        SoftmaxPostProcessOp::softmax(tmp_vector.data(), tmp_vector.data(), tmp_vector.size());
+    }
+
+    // Performing dot product on each vector
+    // (A, B, C, ..., F, G) -> 0*A + 1*B + 2*C + ... + 14*F + 15*G
+    for (uint32_t vector_index = 0; vector_index < NUM_OF_D_VALUES; vector_index++) {
+        m_d_values_matrix[vector_index] = dot_product(d_matrix.at(vector_index));
+    }
+
+    // The decode function extract x_min, y_min, x_max, y_max from d1, d2, d3, d4
+    const auto &d1 = m_d_values_matrix.at(0);
+    const auto &d2 = m_d_values_matrix.at(1);
+    const auto &d3 = m_d_values_matrix.at(2);
+    const auto &d4 = m_d_values_matrix.at(3);
+    auto bbox = decode(d1, d2, d3, d4, col, row, stride);
+    bbox.score = class_confidence;
+    return bbox;
+}
+
+hailo_bbox_float32_t YOLOV8PostProcessOp::decode(float32_t d1, float32_t d2, float32_t d3, float32_t d4,
+    uint32_t col, uint32_t row, uint32_t stride) const
+{
+    const auto &image_width = m_metadata->yolov8_config().image_width;
+    const auto &image_height = m_metadata->yolov8_config().image_height;
+
+    auto x_center = (static_cast<float32_t>(col) + 0.5f) * static_cast<float32_t>(stride) / image_width;
+    auto y_center = (static_cast<float32_t>(row) + 0.5f) * static_cast<float32_t>(stride) / image_height;
+
+    // The values d1, d2, d3, d4 represents the four distances from the center (x_center, y_center) to each of the bbox boundaries
+    // From d1, d2, d3, d4 we extract the values of x_min, y_min, x_max, y_max
+    auto x_min = x_center - (d1 * static_cast<float32_t>(stride) / image_width);
+    auto y_min = y_center - (d2 * static_cast<float32_t>(stride) / image_height);
+    auto x_max = x_center + (d3 * static_cast<float32_t>(stride) / image_width);
+    auto y_max = y_center + (d4 * static_cast<float32_t>(stride) / image_height);
+
+    return hailo_bbox_float32_t{y_min, x_min, y_max, x_max, 0};
+}
+
+float32_t YOLOV8PostProcessOp::dot_product(std::vector<float> &values)
+{
+    // Performs dot product on the elements:
+    // (A, B, C, ..., F, G) -> 0*A + 1*B + 2*C + ... + 14*F + 15*G
+    float32_t sum = 0;
+    float32_t counter = 0;
+    for (const auto &element : values) {
+        sum += (counter * element);
+        counter += 1.0f;
+    }
+
+    return sum;
+}
+
+}
+}
\ No newline at end of file
diff --git a/hailort/libhailort/src/net_flow/ops/yolov8_post_process.hpp b/hailort/libhailort/src/net_flow/ops/yolov8_post_process.hpp
new file mode 100644
index 0000000..25d0196
--- /dev/null
+++ b/hailort/libhailort/src/net_flow/ops/yolov8_post_process.hpp
@@ -0,0 +1,182 @@
+/**
+ * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+**/
+/**
+ * @file yolov8_post_process.hpp
+ * @brief YOLOV8 post process
+ *
+ **/
+
+#ifndef _HAILO_YOLOV8_POST_PROCESS_HPP_
+#define _HAILO_YOLOV8_POST_PROCESS_HPP_
+
+#include "net_flow/ops/nms_post_process.hpp"
+#include "net_flow/ops/op_metadata.hpp"
+namespace hailort
+{
+namespace net_flow
+{
+
+struct Yolov8MatchingLayersNames
+{
+    // Regression layer
+    std::string reg;
+
+    // Classifications layer
+    std::string cls;
+
+    uint32_t stride;
+};
+
+struct Yolov8PostProcessConfig
+{
+    // The image height.
+    float32_t image_height = 0;
+
+    // The image width.
+    float32_t image_width = 0;
+
+    // A vector off two strings that represents the relations between the outputs names.
+    std::vector<Yolov8MatchingLayersNames> reg_to_cls_inputs;
+};
+
+class Yolov8OpMetadata : public NmsOpMetadata
+{
+public:
+    static Expected<std::shared_ptr<OpMetadata>> create(const std::unordered_map<std::string, BufferMetaData> &inputs_metadata,
+                                                        const std::unordered_map<std::string, BufferMetaData> &outputs_metadata,
+                                                        const NmsPostProcessConfig &nms_post_process_config,
+                                                        const Yolov8PostProcessConfig &yolov8_post_process_config,
+                                                        const std::string &network_name);
+    hailo_status validate_format_info() override;
+    std::string get_op_description() override;
+    Yolov8PostProcessConfig &yolov8_config() { return m_yolov8_config;};
+
+private:
+    Yolov8PostProcessConfig m_yolov8_config;
+    Yolov8OpMetadata(const std::unordered_map<std::string, BufferMetaData> &inputs_metadata,
+                       const std::unordered_map<std::string, BufferMetaData> &outputs_metadata,
+                       const NmsPostProcessConfig &nms_post_process_config,
+                       const Yolov8PostProcessConfig &yolov8_post_process_config,
+                       const std::string &network_name)
+        : NmsOpMetadata(inputs_metadata, outputs_metadata, nms_post_process_config, "YOLOV8-Post-Process", network_name, OperationType::YOLOV8)
+        , m_yolov8_config(yolov8_post_process_config)
+    {}
+
+    hailo_status validate_params() override;
+};
+
+class YOLOV8PostProcessOp : public NmsPostProcessOp
+{
+public:
+    static Expected<std::shared_ptr<Op>> create(std::shared_ptr<Yolov8OpMetadata> metadata);
+
+    hailo_status execute(const std::map<std::string, MemoryView> &inputs, std::map<std::string, MemoryView> &outputs) override;
+
+private:
+    std::shared_ptr<Yolov8OpMetadata> m_metadata;
+    std::vector<float32_t> m_d_values_matrix;  // Holds the values of the bbox boundaries distances from the stride's center
+    std::unordered_map<std::string, std::vector<std::vector<float32_t>>> m_d_matrix; // Holds the values from which we compute those distances
+    YOLOV8PostProcessOp(std::shared_ptr<Yolov8OpMetadata> metadata)
+        : NmsPostProcessOp(static_cast<std::shared_ptr<NmsOpMetadata>>(metadata))
+        , m_metadata(metadata), m_d_values_matrix(NUM_OF_D_VALUES)
+    {
+        for (const auto &input_metadata : m_metadata->inputs_metadata()) {
+            m_d_matrix[input_metadata.first] = std::vector<std::vector<float32_t>>(NUM_OF_D_VALUES,
+                                                    std::vector<float32_t>(input_metadata.second.padded_shape.features / NUM_OF_D_VALUES));
+        }
+    }
+
+    static const uint32_t CLASSES_START_INDEX = 0;
+    static const uint32_t NO_OBJECTNESS = 1;
+    static const uint32_t NUM_OF_D_VALUES = 4;
+
+    template<typename DstType = float32_t, typename SrcType>
+    hailo_status extract_detections(const Yolov8MatchingLayersNames &layers_names, const MemoryView &reg_buffer, const MemoryView &cls_buffer,
+        uint32_t stride)
+    {
+        const auto &inputs_metadata = m_metadata->inputs_metadata();
+        const auto &nms_config = m_metadata->nms_config();
+
+        assert(contains(inputs_metadata, layers_names.reg));
+        assert(contains(inputs_metadata, layers_names.cls));
+        const auto &reg_padded_shape = inputs_metadata.at(layers_names.reg).padded_shape;
+        const auto &cls_padded_shape = inputs_metadata.at(layers_names.cls).padded_shape;
+        const auto &reg_quant_info = inputs_metadata.at(layers_names.reg).quant_info;
+        const auto &cls_quant_info = inputs_metadata.at(layers_names.cls).quant_info;
+
+        // Validate regression buffer size
+        auto number_of_entries = reg_padded_shape.height * reg_padded_shape.width;
+        auto buffer_size = number_of_entries * reg_padded_shape.features * sizeof(SrcType);
+        CHECK(buffer_size == reg_buffer.size(), HAILO_INVALID_ARGUMENT,
+            "Failed to extract_detections, reg {} buffer_size should be {}, but is {}", layers_names.reg, buffer_size, reg_buffer.size());
+
+        // Validate classes buffer size
+        const uint32_t cls_entry_size = nms_config.number_of_classes;
+        number_of_entries = cls_padded_shape.height * cls_padded_shape.width;
+        buffer_size = number_of_entries * cls_entry_size * sizeof(SrcType);
+        CHECK(buffer_size == cls_buffer.size(), HAILO_INVALID_ARGUMENT,
+            "Failed to extract_detections, cls {} buffer_size should be {}, but is {}", layers_names.cls, buffer_size, cls_buffer.size());
+
+        // Format is NHCW -> each row size is C size * W size
+        auto cls_row_size = cls_padded_shape.features * cls_padded_shape.width;
+
+        SrcType *reg_data = (SrcType*)reg_buffer.data();
+        SrcType *cls_data = (SrcType*)cls_buffer.data();
+
+        for (uint32_t row = 0; row < cls_padded_shape.height; row++) {
+            for (uint32_t col = 0; col < cls_padded_shape.width; col++) {
+                auto cls_idx = (cls_row_size * row) + col;
+
+                if (nms_config.cross_classes) {
+                    // Pre-NMS optimization. If NMS checks IoU over different classes, only the maximum class is relevant
+                    auto max_id_score_pair = get_max_class<DstType, SrcType>(cls_data, cls_idx, CLASSES_START_INDEX,
+                        NO_OBJECTNESS, cls_quant_info, cls_padded_shape.width);
+                    if (max_id_score_pair.second >= nms_config.nms_score_th) {
+                        // If passes threshold - get the relevant bbox and add this detection
+                        assert(contains(m_d_matrix, layers_names.reg));
+                        auto &d_matrix = m_d_matrix.at(layers_names.reg);
+                        auto bbox = get_bbox<DstType, SrcType>(row, col, stride, reg_padded_shape, reg_quant_info,
+                                                                (SrcType*)reg_data, d_matrix, max_id_score_pair.second);
+                        m_detections.emplace_back(DetectionBbox(bbox, max_id_score_pair.first));
+                        m_classes_detections_count[max_id_score_pair.first]++;
+                    }
+                }
+                else {
+                    // No optimization - it's possible that a specific bbox will hold more then 1 class
+                    for (uint32_t curr_class_idx = 0; curr_class_idx < nms_config.number_of_classes; curr_class_idx++) {
+                        auto class_entry_idx = cls_idx + (curr_class_idx * cls_padded_shape.width);
+                        auto class_confidence = Quantization::dequantize_output<DstType, SrcType>(
+                            cls_data[class_entry_idx], cls_quant_info);
+                        if (class_confidence >= nms_config.nms_score_th) {
+                            // If passes threshold - get the relevant bbox and add this detection
+                            assert(contains(m_d_matrix, layers_names.reg));
+                            auto &d_matrix = m_d_matrix.at(layers_names.reg);
+                            auto bbox = get_bbox<DstType, SrcType>(row, col, stride, reg_padded_shape, reg_quant_info, 
+                                                                    (SrcType*)reg_data, d_matrix, class_confidence);
+                            m_detections.emplace_back(DetectionBbox(bbox, curr_class_idx));
+                            m_classes_detections_count[curr_class_idx]++;
+                        }
+                    }
+                }
+            }
+        }
+        return HAILO_SUCCESS;
+    }
+
+    template<typename DstType = float32_t, typename SrcType>
+    hailo_bbox_float32_t get_bbox(uint32_t row, uint32_t col, uint32_t stride, const hailo_3d_image_shape_t &reg_padded_shape,
+        const hailo_quant_info_t &reg_quant_info, SrcType *reg_data, std::vector<std::vector<DstType>> &d_matrix, DstType class_confidence);
+
+    virtual hailo_bbox_float32_t decode(float32_t tx, float32_t ty, float32_t tw, float32_t th,
+        uint32_t col, uint32_t row, uint32_t stride) const;
+
+    static float32_t dot_product(std::vector<float> &values);
+
+};
+
+} // namespace net_flow
+} // namespace hailort
+
+#endif // _HAILO_YOLOV8_POST_PROCESS_HPP_
diff --git a/hailort/libhailort/src/net_flow/ops/yolox_post_process.cpp b/hailort/libhailort/src/net_flow/ops/yolox_post_process.cpp
index f0febbe..8f67829 100644
--- a/hailort/libhailort/src/net_flow/ops/yolox_post_process.cpp
+++ b/hailort/libhailort/src/net_flow/ops/yolox_post_process.cpp
@@ -103,10 +103,8 @@ hailo_status YOLOXPostProcessOp::execute(const std::map<std::string, MemoryView>
 {
     const auto &yolox_config = m_metadata->yolox_config();
     const auto &inputs_metadata = m_metadata->inputs_metadata();
-    const auto &nms_config = m_metadata->nms_config();
-    std::vector<DetectionBbox> detections;
-    std::vector<uint32_t> classes_detections_count(nms_config.number_of_classes, 0);
-    detections.reserve(nms_config.max_proposals_per_class * nms_config.number_of_classes);
+    
+    clear_before_frame();
     for (const auto &layers_names_triplet : yolox_config.input_names) {
         hailo_status status;
         assert(contains(inputs, layers_names_triplet.cls));
@@ -116,10 +114,10 @@ hailo_status YOLOXPostProcessOp::execute(const std::map<std::string, MemoryView>
         auto &input_metadata = inputs_metadata.at(layers_names_triplet.reg);
         if (input_metadata.format.type == HAILO_FORMAT_TYPE_UINT8) {
             status = extract_detections<float32_t, uint8_t>(layers_names_triplet, inputs.at(layers_names_triplet.reg), inputs.at(layers_names_triplet.cls),
-                inputs.at(layers_names_triplet.obj), detections, classes_detections_count);
+                inputs.at(layers_names_triplet.obj));
         } else if (input_metadata.format.type == HAILO_FORMAT_TYPE_UINT16) {
             status = extract_detections<float32_t, uint16_t>(layers_names_triplet, inputs.at(layers_names_triplet.reg), inputs.at(layers_names_triplet.cls),
-                inputs.at(layers_names_triplet.obj), detections, classes_detections_count);
+                inputs.at(layers_names_triplet.obj));
         } else {
             CHECK_SUCCESS(HAILO_INVALID_ARGUMENT, "YOLO post-process received invalid input type {}", input_metadata.format.type);
         }
@@ -127,7 +125,7 @@ hailo_status YOLOXPostProcessOp::execute(const std::map<std::string, MemoryView>
         CHECK_SUCCESS(status);
     }
 
-    return hailo_nms_format(std::move(detections), outputs.begin()->second, classes_detections_count);
+    return hailo_nms_format(outputs.begin()->second);
 }
 
 hailo_bbox_float32_t YOLOXPostProcessOp::decode(float32_t tx, float32_t ty, float32_t tw, float32_t th,
diff --git a/hailort/libhailort/src/net_flow/ops/yolox_post_process.hpp b/hailort/libhailort/src/net_flow/ops/yolox_post_process.hpp
index d0be5cf..3850f15 100644
--- a/hailort/libhailort/src/net_flow/ops/yolox_post_process.hpp
+++ b/hailort/libhailort/src/net_flow/ops/yolox_post_process.hpp
@@ -19,7 +19,7 @@ namespace hailort
 namespace net_flow
 {
 
-struct MatchingLayersNames
+struct YoloxMatchingLayersNames
 {
     // Regression layer
     std::string reg;
@@ -40,7 +40,7 @@ struct YoloxPostProcessConfig
     float32_t image_width = 0;
 
     // A vector off three strings that represents the relations between the outputs names.
-    std::vector<MatchingLayersNames> input_names;
+    std::vector<YoloxMatchingLayersNames> input_names;
 };
 
 class YoloxOpMetadata : public NmsOpMetadata
@@ -85,8 +85,8 @@ private:
     {}
 
     template<typename DstType = float32_t, typename SrcType>
-    hailo_status extract_detections(const MatchingLayersNames &layers_names, const MemoryView &reg_buffer, const MemoryView &cls_buffer,
-        const MemoryView &obj_buffer, std::vector<DetectionBbox> &detections, std::vector<uint32_t> &classes_detections_count)
+    hailo_status extract_detections(const YoloxMatchingLayersNames &layers_names, const MemoryView &reg_buffer, const MemoryView &cls_buffer,
+        const MemoryView &obj_buffer)
     {
         const auto &inputs_metadata = m_metadata->inputs_metadata();
         const auto &nms_config = m_metadata->nms_config();
@@ -167,8 +167,8 @@ private:
                     auto max_id_score_pair = get_max_class<DstType, SrcType>(cls_data, cls_idx, CLASSES_START_INDEX, objectness, cls_quant_info, cls_padded_shape.width);
                     bbox.score = max_id_score_pair.second;
                     if (max_id_score_pair.second >= nms_config.nms_score_th) {
-                        detections.emplace_back(DetectionBbox(bbox, max_id_score_pair.first));
-                        classes_detections_count[max_id_score_pair.first]++;
+                        m_detections.emplace_back(DetectionBbox(bbox, max_id_score_pair.first));
+                        m_classes_detections_count[max_id_score_pair.first]++;
                     }
                 }
                 else {
@@ -179,8 +179,8 @@ private:
                         auto class_score = class_confidence * objectness;
                         if (class_score >= nms_config.nms_score_th) {
                             bbox.score = class_score;
-                            detections.emplace_back(DetectionBbox(bbox, curr_class_idx));
-                            classes_detections_count[curr_class_idx]++;
+                            m_detections.emplace_back(DetectionBbox(bbox, curr_class_idx));
+                            m_classes_detections_count[curr_class_idx]++;
                         }
                     }
                 }
diff --git a/hailort/libhailort/src/net_flow/pipeline/async_infer_runner.cpp b/hailort/libhailort/src/net_flow/pipeline/async_infer_runner.cpp
index 04bb899..a1e6352 100644
--- a/hailort/libhailort/src/net_flow/pipeline/async_infer_runner.cpp
+++ b/hailort/libhailort/src/net_flow/pipeline/async_infer_runner.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2020-2023 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -14,19 +14,21 @@
 #include "hailo/event.hpp"
 #include "hailo/hailort_defaults.hpp"
 #include "hailo/hailort_common.hpp"
-#include "net_flow/pipeline/async_infer_runner_internal.hpp"
-#include "net_flow/pipeline/pipeline.hpp"
+#include "net_flow/pipeline/async_infer_runner.hpp"
+#include "net_flow/pipeline/pipeline_internal.hpp"
 #include "net_flow/ops/op_metadata.hpp"
 
 namespace hailort
 {
 
-Expected<AsyncPipeline> AsyncPipeline::create()
+Expected<std::shared_ptr<AsyncPipeline>> AsyncPipeline::create_shared()
 {
-    return AsyncPipeline();
+    auto async_pipeline_ptr = make_shared_nothrow<AsyncPipeline>();
+    CHECK_NOT_NULL_AS_EXPECTED(async_pipeline_ptr, HAILO_OUT_OF_HOST_MEMORY);
+    return async_pipeline_ptr;
 }
 
-AsyncPipeline::AsyncPipeline() {}
+AsyncPipeline::AsyncPipeline() : m_is_multi_planar(false) {}
 
 void AsyncPipeline::add_element_to_pipeline(std::shared_ptr<PipelineElement> pipeline_element)
 {
@@ -55,6 +57,55 @@ void AsyncPipeline::set_build_params(ElementBuildParams &build_params)
     m_build_params = build_params;
 }
 
+void AsyncPipeline::shutdown(hailo_status error_status)
+/*
+Async pipeline shutdown handling:
+Shutdown the pipeline is considered unrecoverable, so the entire pipeline and core op (or net_group) are assumed to be unusable*.
+Shutdown can originate from internal sources:
+- Errors from the core.
+- Errors from getting buffers from one of the buffer pools
+- Error from enqueue/dequeue buffers in a AsyncPushQueue element
+
+or external sources:
+- User request to abort.
+
+The flow in case of shutdown is:
+1. Set the pipeline_status to error_status - will block new infer requests from coming
+2. Shutdown threads in all elements
+3. Dequeue all user buffers with the error_status**
+
+*  - TODO: add support for resume flow
+** - if there are user buffers in an AsyncPushQueueElement (inside the queue itself) - the element will clear them after stop_thread() is complete
+
+
+*/
+{
+    if (HAILO_STREAM_ABORTED_BY_USER == error_status) {
+        LOGGER__INFO("Pipeline was aborted by user. Shutting it down");
+    } else {
+        LOGGER__ERROR("Shutting down the pipeline with status {}", error_status);
+    }
+    m_build_params.pipeline_status->store(error_status);
+    auto status = m_build_params.shutdown_event->signal();
+    if (HAILO_SUCCESS != status) {
+        LOGGER__CRITICAL("Executing pipeline shutdown failed with status {}", status);
+    }
+
+    for (auto &element : get_entry_elements()) {
+        status = element.second->terminate(error_status);
+        if (HAILO_SUCCESS != status) {
+            LOGGER__CRITICAL("Executing pipeline terminate failed with status {}", status);
+        }
+    }
+
+    for (auto &element : get_entry_elements()) {
+        status = element.second->dequeue_user_buffers(error_status);
+        if (HAILO_SUCCESS != status) {
+            LOGGER__CRITICAL("Dequeueing external buffers failed with status {}", status);
+        }
+    }
+}
+
 const std::vector<std::shared_ptr<PipelineElement>>& AsyncPipeline::get_pipeline() const
 {
     return m_pipeline_elements;
@@ -80,30 +131,27 @@ const ElementBuildParams AsyncPipeline::get_build_params()
     return m_build_params;
 }
 
-Expected<std::shared_ptr<AsyncInferRunnerInternal>> AsyncInferRunnerInternal::create(ConfiguredNetworkGroupBase &net_group,
-        const std::unordered_map<std::string, hailo_format_t> &inputs_formats, const std::unordered_map<std::string, hailo_format_t> &outputs_formats)
+void AsyncPipeline::set_as_multi_planar()
 {
-    auto async_infer_runner = AsyncInferRunnerImpl::create(net_group, inputs_formats, outputs_formats);
-    CHECK_EXPECTED(async_infer_runner);
-
-    auto async_infer_runner_ptr = std::shared_ptr<AsyncInferRunnerInternal>(async_infer_runner.release());
-    CHECK_NOT_NULL_AS_EXPECTED(async_infer_runner_ptr, HAILO_OUT_OF_HOST_MEMORY);
-
-    return async_infer_runner_ptr;
+    m_is_multi_planar = true;
 }
 
-AsyncInferRunnerInternal::AsyncInferRunnerInternal() :
-    m_pipeline_status(make_shared_nothrow<std::atomic<hailo_status>>(HAILO_SUCCESS))
-{}
+bool AsyncPipeline::is_multi_planar()
+{
+    return m_is_multi_planar;
+}
 
-Expected<std::shared_ptr<AsyncInferRunnerImpl>> AsyncInferRunnerImpl::create(ConfiguredNetworkGroupBase &net_group,
+Expected<std::shared_ptr<AsyncInferRunnerImpl>> AsyncInferRunnerImpl::create(std::shared_ptr<ConfiguredNetworkGroup> net_group,
     const std::unordered_map<std::string, hailo_format_t> &inputs_formats, const std::unordered_map<std::string, hailo_format_t> &outputs_formats,
     const uint32_t timeout)
 {
-    auto async_pipeline_expected = create_pipeline(net_group, inputs_formats, outputs_formats, timeout);
+    auto pipeline_status = make_shared_nothrow<std::atomic<hailo_status>>(HAILO_SUCCESS);
+    CHECK_AS_EXPECTED(nullptr != pipeline_status, HAILO_OUT_OF_HOST_MEMORY);
+
+    auto async_pipeline_expected = PipelineBuilder::create_pipeline(net_group, inputs_formats, outputs_formats, timeout, pipeline_status);
     CHECK_EXPECTED(async_pipeline_expected);
 
-    auto async_infer_runner_ptr = make_shared_nothrow<AsyncInferRunnerImpl>(async_pipeline_expected.release());
+    auto async_infer_runner_ptr = make_shared_nothrow<AsyncInferRunnerImpl>(async_pipeline_expected.release(), pipeline_status);
     CHECK_NOT_NULL_AS_EXPECTED(async_infer_runner_ptr, HAILO_OUT_OF_HOST_MEMORY);
 
     auto status = async_infer_runner_ptr->start_pipeline();
@@ -112,11 +160,11 @@ Expected<std::shared_ptr<AsyncInferRunnerImpl>> AsyncInferRunnerImpl::create(Con
     return async_infer_runner_ptr;
 }
 
-AsyncInferRunnerImpl::AsyncInferRunnerImpl(AsyncPipeline &&async_pipeline) :
-    AsyncInferRunnerInternal(),
-    m_async_pipeline(std::move(async_pipeline)),
+AsyncInferRunnerImpl::AsyncInferRunnerImpl(std::shared_ptr<AsyncPipeline> async_pipeline, std::shared_ptr<std::atomic<hailo_status>> pipeline_status) :
+    m_async_pipeline(async_pipeline),
     m_is_activated(false),
-    m_is_aborted(false)
+    m_is_aborted(false),
+    m_pipeline_status(pipeline_status)
 {}
 
 AsyncInferRunnerImpl::~AsyncInferRunnerImpl()
@@ -129,7 +177,7 @@ hailo_status AsyncInferRunnerImpl::stop_pipeline()
     hailo_status status = HAILO_SUCCESS;
     if (m_is_activated) {
         m_is_activated = false;
-        for (auto &entry_element : m_async_pipeline.get_entry_elements()) {
+        for (auto &entry_element : m_async_pipeline->get_entry_elements()) {
             status = entry_element.second->deactivate();
             if (HAILO_SUCCESS != status) {
                 LOGGER__WARNING("Failed deactivate of element {} status {}", entry_element.second->name(), status);
@@ -148,943 +196,123 @@ hailo_status AsyncInferRunnerImpl::stop_pipeline()
 hailo_status AsyncInferRunnerImpl::start_pipeline()
 {
     hailo_status status = HAILO_SUCCESS;
-    for (auto &entry_element : m_async_pipeline.get_entry_elements()) {
+    for (auto &entry_element : m_async_pipeline->get_entry_elements()) {
         status = entry_element.second->activate();
         CHECK_SUCCESS(status);
     }
 
+    m_is_activated = true;
+
     return status;
 }
 
-hailo_status AsyncInferRunnerImpl::async_infer()
+void AsyncInferRunnerImpl::abort()
 {
-    hailo_status status = m_async_pipeline.get_build_params().pipeline_status->load();
-    CHECK(HAILO_SUCCESS == status, HAILO_INVALID_OPERATION, "Can't handle infer request since Pipeline status is {}.", status);
+    m_is_aborted = true;
+    m_async_pipeline->shutdown(HAILO_STREAM_ABORTED_BY_USER);
+    return;
+}
 
-    for (auto &last_element : m_async_pipeline.get_last_elements()) {
-        auto buffers_are_full = last_element.second->are_buffer_pools_full();
-        CHECK_EXPECTED_AS_STATUS(buffers_are_full);
-        if (buffers_are_full.release()) {
-            LOGGER__ERROR("Can't handle infer request since queue is full.");
-            return HAILO_QUEUE_IS_FULL;
+Expected<bool> AsyncInferRunnerImpl::can_push_buffers()
+{
+    for (auto &last_element : m_async_pipeline->get_last_elements()) {
+        auto can_push_buffer = last_element.second->can_push_buffer_upstream(last_element.first);
+        CHECK_EXPECTED(can_push_buffer);
+        if (!can_push_buffer.release()) {
+            return false;
         }
     }
 
-    for (auto &last_element : m_async_pipeline.get_last_elements()) {
+    for (auto &entry_element : m_async_pipeline->get_entry_elements()) {
+        auto can_push_buffer = entry_element.second->can_push_buffer_downstream(entry_element.first);
+        CHECK_EXPECTED(can_push_buffer);
+        if (!can_push_buffer.release()) {
+            return false;
+        }
+    }
+
+    return true;
+}
+
+hailo_status AsyncInferRunnerImpl::async_infer()
+{
+    hailo_status status = m_async_pipeline->get_build_params().pipeline_status->load();
+    CHECK_SUCCESS(status, "Can't handle infer request since Pipeline status is {}.", status);
+
+    auto pools_are_ready = can_push_buffers();
+    CHECK_EXPECTED_AS_STATUS(pools_are_ready);
+    CHECK(pools_are_ready.release(), HAILO_QUEUE_IS_FULL, "Can't handle infer request since a queue in the pipeline is full.");
+
+    for (auto &last_element : m_async_pipeline->get_last_elements()) {
         assert(contains(m_output_buffers, last_element.first));
         auto output_buffer = m_output_buffers.at(last_element.first);
         auto read_done = m_read_dones.at(last_element.first);
-        // TODO: handle the non-recoverable case where one buffer is enqueued succesfully and the second isn't (HRT-11783)
+        // TODO: handle the non-recoverable case where one buffer is enqueued successfully and the second isn't (HRT-11783)
         status = last_element.second->enqueue_execution_buffer(output_buffer, read_done);
         CHECK_SUCCESS(status);
     }
 
-    for (auto &entry_element : m_async_pipeline.get_entry_elements()) {
+    for (auto &entry_element : m_async_pipeline->get_entry_elements()) {
         assert(contains(m_input_buffers, entry_element.first));
-        auto input_buffer = m_input_buffers.at(entry_element.first);
-        auto write_done = m_write_dones.at(entry_element.first);
-        entry_element.second->sinks()[0].run_push_async(PipelineBuffer(input_buffer, write_done));
+        entry_element.second->sinks()[0].run_push_async(std::move(m_input_buffers.at(entry_element.first)));
     }
     return HAILO_SUCCESS;
 }
 
 void AsyncInferRunnerImpl::add_element_to_pipeline(std::shared_ptr<PipelineElement> pipeline_element)
 {
-    m_async_pipeline.add_element_to_pipeline(pipeline_element);
+    m_async_pipeline->add_element_to_pipeline(pipeline_element);
 }
 
 void AsyncInferRunnerImpl::add_entry_element(std::shared_ptr<PipelineElement> pipeline_element, const std::string &input_name)
 {
-    m_async_pipeline.add_entry_element(pipeline_element, input_name);
+    m_async_pipeline->add_entry_element(pipeline_element, input_name);
 }
 
 void AsyncInferRunnerImpl::add_last_element(std::shared_ptr<PipelineElement> pipeline_element, const std::string &output_name)
 {
-    m_async_pipeline.add_last_element(pipeline_element, output_name);
+    m_async_pipeline->add_last_element(pipeline_element, output_name);
 }
 
 std::unordered_map<std::string, std::shared_ptr<PipelineElement>> AsyncInferRunnerImpl::get_entry_elements()
 {
-    return m_async_pipeline.get_entry_elements();
+    return m_async_pipeline->get_entry_elements();
 }
 
 std::unordered_map<std::string, std::shared_ptr<PipelineElement>> AsyncInferRunnerImpl::get_last_elements()
 {
-    return m_async_pipeline.get_last_elements();
+    return m_async_pipeline->get_last_elements();
 }
 
 void AsyncInferRunnerImpl::set_input(const std::string &input_name, MemoryView &&input_buffer, TransferDoneCallbackAsyncInfer &write_done)
 {
-    m_input_buffers[input_name] = std::move(input_buffer);
-    m_write_dones[input_name] = write_done;
-}
-
-void AsyncInferRunnerImpl::set_output(const std::string &output_name, MemoryView &&output_buffer, TransferDoneCallbackAsyncInfer &read_done)
-{
-    m_output_buffers[output_name] = std::move(output_buffer);
-    m_read_dones[output_name] = read_done;
-}
-
-Expected<size_t> AsyncInferRunnerImpl::get_min_buffer_pool_size(ConfiguredNetworkGroupBase &net_group)
-{
-    uint32_t buffer_pool_size = UINT32_MAX;
-
-    auto input_streams = net_group.get_input_streams();
-    for (const auto &input_stream : input_streams) {
-        auto async_max_queue_size = input_stream.get().get_async_max_queue_size();
-        CHECK_EXPECTED(async_max_queue_size);
-        if (buffer_pool_size > async_max_queue_size.value()) {
-            buffer_pool_size = static_cast<uint32_t>(async_max_queue_size.value());
-        }
-    }
-
-    auto output_streams = net_group.get_output_streams();
-    for (const auto &output_stream : output_streams) {
-        auto async_max_queue_size = output_stream.get().get_async_max_queue_size();
-        CHECK_EXPECTED(async_max_queue_size);
-        if (buffer_pool_size > async_max_queue_size.value()) {
-            buffer_pool_size = static_cast<uint32_t>(async_max_queue_size.value());
-        }
-    }
-
-    return buffer_pool_size;
-}
-
-Expected<std::unordered_map<std::string, hailo_format_t>> AsyncInferRunnerImpl::expand_auto_input_formats(ConfiguredNetworkGroupBase &net_group,
-    const std::unordered_map<std::string, hailo_format_t> &inputs_formats)
-{
-    std::unordered_map<std::string, hailo_format_t> expanded_input_format;
-    for (auto &input_format : inputs_formats) {
-        auto input_streams_names = net_group.get_stream_names_from_vstream_name(input_format.first);
-        CHECK_EXPECTED(input_streams_names);
-
-        // TODO: Taking data from the first ll stream will not work in multi-planar work
-        auto shared_stream_ptr = net_group.get_shared_input_stream_by_name(input_streams_names.value()[0]);
-        CHECK_EXPECTED(shared_stream_ptr);
-
-        expanded_input_format[input_format.first] = HailoRTDefaults::expand_auto_format(input_format.second,
-            shared_stream_ptr.value()->get_info().format);
-    }
-    return expanded_input_format;
-}
-
-Expected<std::unordered_map<std::string, hailo_format_t>> AsyncInferRunnerImpl::expand_auto_output_formats(ConfiguredNetworkGroupBase &net_group,
-    const std::unordered_map<std::string, hailo_format_t> &outputs_formats)
-{
-    std::unordered_map<std::string, hailo_format_t> expanded_output_format;
-    for (auto &output_format : outputs_formats) {
-        auto output_streams_names = net_group.get_stream_names_from_vstream_name(output_format.first);
-        CHECK_EXPECTED(output_streams_names);
-
-        // TODO: Taking data from the first ll stream will not work in multi-planar work
-        auto shared_stream_ptr = net_group.get_shared_output_stream_by_name(output_streams_names.value()[0]);
-        CHECK_EXPECTED(shared_stream_ptr);
-
-        expanded_output_format[output_format.first] = HailoRTDefaults::expand_auto_format(output_format.second,
-            shared_stream_ptr.value()->get_info().format);
-    }
-    return expanded_output_format;
-}
-
-Expected<std::unordered_map<std::string, std::shared_ptr<InputStream>>> AsyncInferRunnerImpl::get_input_streams_from_net_group(ConfiguredNetworkGroupBase &net_group,
-    const std::unordered_map<std::string, hailo_format_t> &inputs_formats)
-{
-    std::unordered_map<std::string, std::shared_ptr<InputStream>> input_streams;
-    for (auto &input_format : inputs_formats) {
-        auto input_streams_names = net_group.get_stream_names_from_vstream_name(input_format.first);
-        CHECK_EXPECTED(input_streams_names);
-
-        for (auto &input_stream_name : input_streams_names.release()) {
-            auto shared_stream_ptr = net_group.get_shared_input_stream_by_name(input_stream_name);
-            CHECK_EXPECTED(shared_stream_ptr);
-
-            input_streams[input_stream_name] = shared_stream_ptr.release();
-        }
-    }
-    return input_streams;
-}
-
-Expected<std::unordered_map<std::string, std::shared_ptr<OutputStream>>> AsyncInferRunnerImpl::get_output_streams_from_net_group(ConfiguredNetworkGroupBase &net_group,
-    const std::unordered_map<std::string, hailo_format_t> &outputs_formats)
-{
-    std::unordered_map<std::string, std::shared_ptr<OutputStream>> output_streams;
-    for (auto &output_format : outputs_formats) {
-        auto output_streams_names = net_group.get_stream_names_from_vstream_name(output_format.first);
-        CHECK_EXPECTED(output_streams_names);
-
-        for (auto &output_stream_name : output_streams_names.release()) {
-            auto shared_stream_ptr = net_group.get_shared_output_stream_by_name(output_stream_name);
-            CHECK_EXPECTED(shared_stream_ptr);
-
-            output_streams[output_stream_name] = shared_stream_ptr.release();
-        }
-    }
-    return output_streams;
-}
-
-hailo_status AsyncInferRunnerImpl::create_pre_async_hw_elements(ConfiguredNetworkGroupBase &net_group,
-        std::unordered_map<std::string, std::shared_ptr<InputStream>> &input_streams,
-        const std::unordered_map<std::string, hailo_format_t> &inputs_formats, AsyncPipeline &async_pipeline)
-{
-    bool is_dma_able = true;
-    for (auto &input_stream_pair : input_streams) {
-        auto input_stream = input_stream_pair.second;
-        auto input_stream_name = input_stream_pair.first;
-        auto input_stream_base = std::static_pointer_cast<InputStreamBase>(input_stream);
-        auto input_stream_info = input_stream->get_info();
-        auto vstream_names = net_group.get_vstream_names_from_stream_name(input_stream_name);
-        CHECK_EXPECTED_AS_STATUS(vstream_names);
-
-        auto sink_index = async_pipeline.get_async_hw_element()->get_sink_index_from_input_stream_name(input_stream_name);
-        CHECK_EXPECTED_AS_STATUS(sink_index);
-
-        auto should_transform = InputTransformContext::is_transformation_required(input_stream_info.shape,
-            inputs_formats.at(input_stream_name), input_stream_info.hw_shape, input_stream_info.format,
-            input_stream_base->get_quant_infos());
-        CHECK_EXPECTED_AS_STATUS(should_transform);
-
-        auto entry_queue_elem = add_push_queue_element(PipelineObject::create_element_name("EntryPushQueueElement", input_stream_info.name, input_stream_info.index),
-            async_pipeline, nullptr);
-        CHECK_EXPECTED_AS_STATUS(entry_queue_elem);
-
-        if (should_transform.value()) {
-            auto pre_infer_elem = PreInferElement::create(input_stream_info.shape, inputs_formats.at(input_stream_name),
-                input_stream_info.hw_shape, input_stream_info.format, input_stream_base->get_quant_infos(),
-                PipelineObject::create_element_name("PreInferElement", input_stream_info.name, input_stream_info.index),
-                async_pipeline.get_build_params(), PipelineDirection::PUSH, is_dma_able);
-            CHECK_EXPECTED_AS_STATUS(pre_infer_elem);
-            async_pipeline.add_element_to_pipeline(pre_infer_elem.value());
-            CHECK_SUCCESS(PipelinePad::link_pads(entry_queue_elem.value(), pre_infer_elem.value()));
-
-            auto queue_elem = add_push_queue_element(PipelineObject::create_element_name("PushQueueElement", input_stream_info.name, input_stream_info.index),
-                async_pipeline, pre_infer_elem.value());
-            CHECK_EXPECTED_AS_STATUS(queue_elem);
-
-            CHECK_SUCCESS(PipelinePad::link_pads(queue_elem.value(), async_pipeline.get_async_hw_element(), 0, sink_index.value()));
-        } else {
-            CHECK_SUCCESS(PipelinePad::link_pads(entry_queue_elem.value(), async_pipeline.get_async_hw_element(), 0, sink_index.value()));
-        }
-
-        for (auto &vstream_name : vstream_names.release()) {
-            if (!contains(async_pipeline.get_entry_elements(), vstream_name)) {
-                async_pipeline.add_entry_element(entry_queue_elem.release(), vstream_name);
-            }
-        }
-    }
-    return HAILO_SUCCESS;
-}
-
-Expected<std::shared_ptr<PostInferElement>> AsyncInferRunnerImpl::add_post_infer_element(const hailo_format_t &output_format,
-    const hailo_nms_info_t &nms_info, AsyncPipeline &async_pipeline, const hailo_3d_image_shape_t &src_image_shape,
-    const hailo_format_t &src_format, const hailo_3d_image_shape_t &dst_image_shape, const std::vector<hailo_quant_info_t> &dst_quant_infos,
-    bool is_last_copy_element, std::shared_ptr<PipelineElement> final_elem, const uint32_t final_elem_source_index)
-{
-    auto queue_elem = add_push_queue_element(PipelineObject::create_element_name("PushQueueElement", final_elem->name(), static_cast<uint8_t>(final_elem_source_index)),
-        async_pipeline, final_elem, final_elem_source_index);
-    CHECK_EXPECTED(queue_elem);
-
-    auto post_infer_elem = PostInferElement::create(src_image_shape, src_format, dst_image_shape, output_format,
-        dst_quant_infos, nms_info, PipelineObject::create_element_name("PostInferElement",
-        final_elem->name(), static_cast<uint8_t>(final_elem_source_index)), async_pipeline.get_build_params(),
-        PipelineDirection::PUSH, is_last_copy_element);
-    CHECK_EXPECTED(post_infer_elem);
-
-    async_pipeline.add_element_to_pipeline(post_infer_elem.value());
-
-    CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(queue_elem.value(), post_infer_elem.value()));
-    return post_infer_elem.release();
-}
-
-Expected<std::shared_ptr<AsyncPushQueueElement>> AsyncInferRunnerImpl::add_push_queue_element(const std::string &queue_name, AsyncPipeline &async_pipeline,
-    std::shared_ptr<PipelineElement> final_elem, const uint32_t final_elem_source_index)
-{
-    auto push_queue_elem = AsyncPushQueueElement::create(queue_name, async_pipeline.get_build_params(), PipelineDirection::PUSH);
-    CHECK_EXPECTED(push_queue_elem);
-
-    async_pipeline.add_element_to_pipeline(push_queue_elem.value());
-
-    // final elem will be nullptr in case it's the first element in pipeline
-    if (final_elem) {
-        CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(final_elem, push_queue_elem.value(), final_elem_source_index, 0));
-    }
-
-    return push_queue_elem.release();
-}
-
-Expected<std::shared_ptr<ConvertNmsToDetectionsElement>> AsyncInferRunnerImpl::add_nms_to_detections_convert_element(AsyncPipeline &async_pipeline,
-    std::shared_ptr<OutputStream> output_stream, const std::string &element_name, const net_flow::PostProcessOpMetadataPtr &op_metadata,
-    const bool is_last_copy_element, std::shared_ptr<PipelineElement> final_elem, const uint32_t final_elem_index)
-{
-    auto metadata = std::dynamic_pointer_cast<net_flow::NmsOpMetadata>(op_metadata);
-    assert(nullptr != metadata);
-
-    auto nms_to_detections_element = ConvertNmsToDetectionsElement::create(metadata->nms_info(),
-        PipelineObject::create_element_name(element_name, output_stream->name(), output_stream->get_info().index),
-        async_pipeline.get_build_params(), PipelineDirection::PUSH, is_last_copy_element);
-    CHECK_EXPECTED(nms_to_detections_element);
-
-    async_pipeline.add_element_to_pipeline(nms_to_detections_element.value());
-
-    CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(final_elem, nms_to_detections_element.value(), final_elem_index, 0));
-    return nms_to_detections_element.release();
-}
-
-Expected<std::shared_ptr<RemoveOverlappingBboxesElement>> AsyncInferRunnerImpl::add_remove_overlapping_bboxes_element(AsyncPipeline &async_pipeline,
-    std::shared_ptr<OutputStream> output_stream, const std::string &element_name, const net_flow::PostProcessOpMetadataPtr &op_metadata,
-    const bool is_last_copy_element, std::shared_ptr<PipelineElement> final_elem, const uint32_t final_elem_index)
-{
-    auto metadata = std::dynamic_pointer_cast<net_flow::NmsOpMetadata>(op_metadata);
-    assert(nullptr != metadata);
-
-    auto remove_overlapping_bboxes_element = RemoveOverlappingBboxesElement::create(metadata->nms_config(),
-        PipelineObject::create_element_name(element_name, output_stream->name(), output_stream->get_info().index),
-        async_pipeline.get_build_params(), PipelineDirection::PUSH, is_last_copy_element);
-    CHECK_EXPECTED(remove_overlapping_bboxes_element);
-
-    async_pipeline.add_element_to_pipeline(remove_overlapping_bboxes_element.value());
-
-    CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(final_elem, remove_overlapping_bboxes_element.value(), final_elem_index, 0));
-    return remove_overlapping_bboxes_element;
-}
-
-Expected<std::shared_ptr<FillNmsFormatElement>> AsyncInferRunnerImpl::add_fill_nms_format_element(AsyncPipeline &async_pipeline,
-    std::shared_ptr<OutputStream> output_stream, const std::string &element_name, const net_flow::PostProcessOpMetadataPtr &op_metadata,
-    const hailo_format_t &output_format, const bool is_last_copy_element, std::shared_ptr<PipelineElement> final_elem, const uint32_t final_elem_index)
-{
-    auto metadata = std::dynamic_pointer_cast<net_flow::NmsOpMetadata>(op_metadata);
-    assert(nullptr != metadata);
-
-    auto fill_nms_format_element = FillNmsFormatElement::create(metadata->nms_info(), output_format, metadata->nms_config(),
-        PipelineObject::create_element_name(element_name, output_stream->name(), output_stream->get_info().index),
-        async_pipeline.get_build_params(), PipelineDirection::PUSH, is_last_copy_element);
-    CHECK_EXPECTED(fill_nms_format_element);
-
-    async_pipeline.add_element_to_pipeline(fill_nms_format_element.value());
-
-    CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(final_elem, fill_nms_format_element.value(), final_elem_index, 0));
-    return fill_nms_format_element;
-}
-
-Expected<std::shared_ptr<LastAsyncElement>> AsyncInferRunnerImpl::add_last_async_element(AsyncPipeline &async_pipeline,
-    const std::string &output_format_name, std::shared_ptr<PipelineElement> final_elem, const uint32_t final_elem_source_index)
-{
-    auto last_async_element = LastAsyncElement::create(PipelineObject::create_element_name("LastAsyncElement",
-        final_elem->name(), static_cast<uint8_t>(final_elem_source_index)), async_pipeline.get_build_params());
-    CHECK_EXPECTED(last_async_element);
-
-    async_pipeline.add_element_to_pipeline(last_async_element.value());
-    CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(final_elem, last_async_element.value(), final_elem_source_index, 0));
-
-    async_pipeline.add_last_element(last_async_element.value(), output_format_name);
-
-    return last_async_element.release();
-}
-
-Expected<std::pair<std::string, hailo_format_t>> AsyncInferRunnerImpl::get_output_format_from_edge_info_name(std::string edge_info_name,
-    const std::unordered_map<std::string, hailo_format_t> &outputs_formats)
-{
-    for (auto &output_format : outputs_formats) {
-        if (output_format.first == edge_info_name) {
-            return std::pair<std::string, hailo_format_t>(output_format);
-        }
-    }
-    return make_unexpected(HAILO_NOT_FOUND);
-}
-
-hailo_status AsyncInferRunnerImpl::add_output_demux_flow(std::shared_ptr<OutputStreamBase> &output_stream,
-    AsyncPipeline &async_pipeline, const std::unordered_map<std::string, hailo_format_t> &outputs_formats)
-{
-    const bool is_dma_able_hw_async = true;
-    auto status = async_pipeline.get_async_hw_element()->fill_buffer_pools(is_dma_able_hw_async);
-    CHECK_SUCCESS(status);
-
-    auto expected_demuxer = OutputDemuxer::create(*output_stream);
-    CHECK_EXPECTED_AS_STATUS(expected_demuxer);
-
-    std::shared_ptr<OutputDemuxer> demuxer_ptr = expected_demuxer.release();
-    CHECK_ARG_NOT_NULL(demuxer_ptr);
-
-    status = output_stream->set_timeout(HAILO_INFINITE_TIMEOUT);
-    CHECK_SUCCESS(status);
-
-    auto demux_elem = TransformDemuxElement::create(demuxer_ptr,
-        PipelineObject::create_element_name("TransformDemuxElement", output_stream->name(), output_stream->get_info().index),
-        async_pipeline.get_build_params(), PipelineDirection::PUSH);
-    CHECK_EXPECTED_AS_STATUS(demux_elem);
-    async_pipeline.add_element_to_pipeline(demux_elem.value());
-
-    auto output_index = async_pipeline.get_async_hw_element()->get_source_index_from_output_stream_name(output_stream->name());
-    CHECK_EXPECTED_AS_STATUS(output_index);
-    CHECK_SUCCESS(PipelinePad::link_pads(async_pipeline.get_async_hw_element(), demux_elem.value(), output_index.value(), 0));
-
-    uint8_t i = 0;
-    for (auto &edge_info : demuxer_ptr->get_edges_stream_info()) {
-        auto output_format_expected = get_output_format_from_edge_info_name(edge_info.name, outputs_formats);
-        CHECK_EXPECTED_AS_STATUS(output_format_expected);
-
-        auto demux_queue_elem = add_push_queue_element(PipelineObject::create_element_name("PushQueueElement_demux", edge_info.name, i), async_pipeline,
-            demux_elem.value(), i);
-        CHECK_EXPECTED_AS_STATUS(demux_queue_elem);
-
-        auto should_transform = OutputTransformContext::is_transformation_required(edge_info.hw_shape, 
-            edge_info.format, edge_info.shape, output_format_expected.value().second, std::vector<hailo_quant_info_t>{edge_info.quant_info}); // TODO: Get quant vector (HRT-11077)
-        CHECK_EXPECTED_AS_STATUS(should_transform);
-
-        if (should_transform.value()) {
-            status = demux_elem.value()->fill_buffer_pool(false, i);
-            CHECK_SUCCESS(status);
-
-            auto post_infer_elem = add_post_infer_element(output_format_expected.value().second, edge_info.nms_info,
-                async_pipeline, edge_info.hw_shape, edge_info.format, edge_info.shape, {edge_info.quant_info}, true, demux_queue_elem.value());
-            CHECK_EXPECTED_AS_STATUS(post_infer_elem);
-
-            auto last_async_element = add_last_async_element(async_pipeline, output_format_expected.value().first, post_infer_elem.value());
-            CHECK_EXPECTED_AS_STATUS(last_async_element);
-        } else {
-            auto last_async_element = add_last_async_element(async_pipeline, output_format_expected.value().first, demux_queue_elem.value());
-            CHECK_EXPECTED_AS_STATUS(last_async_element);
-        }
-        i++;
-    }
-    return HAILO_SUCCESS;
+    m_input_buffers[input_name] = PipelineBuffer(std::move(input_buffer), write_done);
 }
 
-// TODO: remove this function as part of HRT-11667
-hailo_status AsyncInferRunnerImpl::finalize_output_flow(std::shared_ptr<OutputStreamBase> &output_stream_base,
-    const std::pair<std::string, hailo_format_t> &output_format, const hailo_nms_info_t &nms_info, const bool is_dma_able,
-    AsyncPipeline &async_pipeline, std::shared_ptr<PipelineElement> final_elem, const uint32_t final_elem_source_index)
+void AsyncInferRunnerImpl::set_input(const std::string &input_name, hailo_pix_buffer_t input_buffer, TransferDoneCallbackAsyncInfer &write_done)
 {
-    auto stream_info = output_stream_base->get_info();
-    auto stream_quant_infos = output_stream_base->get_quant_infos();
-    auto should_transform = OutputTransformContext::is_transformation_required(stream_info.hw_shape,
-        stream_info.format, stream_info.shape, output_format.second, stream_quant_infos);
-    CHECK_EXPECTED_AS_STATUS(should_transform);
-
-    if (should_transform.value()) {
-        hailo_status status = final_elem->fill_buffer_pools(is_dma_able);
-        CHECK_SUCCESS(status);
-
-        auto post_infer_elem = add_post_infer_element(output_format.second, nms_info, async_pipeline,
-            stream_info.hw_shape, stream_info.format, stream_info.shape, stream_quant_infos, true, final_elem, final_elem_source_index);
-        CHECK_EXPECTED_AS_STATUS(post_infer_elem);
-
-        auto last_async_element = add_last_async_element(async_pipeline, output_format.first, post_infer_elem.value());
-        CHECK_EXPECTED_AS_STATUS(last_async_element);
+    // If only one plane is passed, address it as memview
+    if (1 == input_buffer.number_of_planes) {
+        m_input_buffers[input_name] = PipelineBuffer(MemoryView(input_buffer.planes[0].user_ptr, input_buffer.planes[0].bytes_used), write_done);
+    } else if (m_async_pipeline->is_multi_planar()) {
+        // If model is multi-planar
+        m_input_buffers[input_name] = PipelineBuffer(std::move(input_buffer), write_done);
     } else {
-        auto last_async_element = add_last_async_element(async_pipeline, output_format.first, final_elem, final_elem_source_index);
-        CHECK_EXPECTED_AS_STATUS(last_async_element);
-    }
-    return HAILO_SUCCESS;
-}
-
-hailo_status AsyncInferRunnerImpl::add_nms_fuse_flow(OutputStreamPtrVector &output_streams,
-    const std::pair<std::string, hailo_format_t> &output_format, AsyncPipeline &async_pipeline)
-{
-    const bool is_dma_able_hw_async = true;
-    auto status = async_pipeline.get_async_hw_element()->fill_buffer_pools(is_dma_able_hw_async);
-    CHECK_SUCCESS(status);
-
-    std::vector<hailo_nms_info_t> nms_infos;
-    nms_infos.reserve(output_streams.size());
-    for (const auto &out_stream : output_streams) {
-        CHECK(out_stream->get_info().nms_info.defuse_info.class_group_index <= output_streams.size(),
-            HAILO_INVALID_ARGUMENT, "Not all defused nms outputs were grouped correctly!");
-        nms_infos.emplace_back(out_stream->get_info().nms_info);
-    }
-
-    // To get the fused layer name and src stream format, we use the stream info of one of the defuses
-    auto first_defused_stream_info = output_streams[0]->get_info();
-    auto fused_layer_name = first_defused_stream_info.nms_info.defuse_info.original_name;
-
-    bool is_last_copy_element = true;
-    auto nms_elem = NmsMuxElement::create(nms_infos, PipelineObject::create_element_name("NmsMuxElement", fused_layer_name, 0),
-        async_pipeline.get_build_params(), PipelineDirection::PUSH, is_last_copy_element);
-    CHECK_EXPECTED_AS_STATUS(nms_elem);
-
-    async_pipeline.add_element_to_pipeline(nms_elem.value());
-
-    uint32_t i = 0;
-    for (auto &output_stream :  output_streams) {
-        const auto &curr_stream_info = output_stream->get_info();
-        output_stream->set_timeout(HAILO_INFINITE_TIMEOUT);
-
-        auto output_index = async_pipeline.get_async_hw_element()->get_source_index_from_output_stream_name(output_stream->name());
-        CHECK_EXPECTED_AS_STATUS(output_index);
-
-        auto queue_elem = add_push_queue_element(PipelineObject::create_element_name("PushQueueElement_nms_source", curr_stream_info.name, curr_stream_info.index),
-            async_pipeline, async_pipeline.get_async_hw_element(), output_index.value());
-        CHECK_EXPECTED_AS_STATUS(queue_elem);
-
-        CHECK_SUCCESS(PipelinePad::link_pads(queue_elem.value(), nms_elem.value(), 0, i));
-        i++;
-    }
-
-    auto output_stream_base = std::static_pointer_cast<OutputStreamBase>(output_streams[0]);
-    auto fused_layer_nms_info = nms_elem.value()->get_fused_nms_info();
-    const bool is_dma_able_nms_mux = false;
-    const uint32_t final_elem_source_index = 0;
-    status = finalize_output_flow(output_stream_base, output_format, fused_layer_nms_info,
-        is_dma_able_nms_mux, async_pipeline, nms_elem.value(), final_elem_source_index);
-    CHECK_SUCCESS(status);
-
-    return HAILO_SUCCESS;
-}
-
-hailo_status AsyncInferRunnerImpl::add_softmax_flow(AsyncPipeline &async_pipeline, OutputStreamPtrVector &output_streams,
-    const std::pair<std::string, hailo_format_t> &output_format, const net_flow::PostProcessOpMetadataPtr &softmax_op_metadata)
-{
-    assert(output_streams.size() == 1);
-    auto output_stream_base = std::static_pointer_cast<OutputStreamBase>(output_streams[0]);
-    auto hw_async_elem_index = async_pipeline.get_async_hw_element()->get_source_index_from_output_stream_name(output_stream_base->name());
-    CHECK_EXPECTED_AS_STATUS(hw_async_elem_index);
-
-    auto op_input_format = softmax_op_metadata->inputs_metadata().begin()->second.format;
-    auto output_format_expanded = net_flow::SoftmaxOpMetadata::expand_output_format_autos(output_format.second, op_input_format);
-
-    auto stream_info = output_stream_base->get_info();
-    auto stream_quant_infos = output_stream_base->get_quant_infos();
-    auto post_infer_elem = add_post_infer_element(output_format_expanded, {}, async_pipeline, stream_info.hw_shape, stream_info.format,
-        stream_info.shape, output_stream_base->get_quant_infos(), false, async_pipeline.get_async_hw_element(), hw_async_elem_index.value());
-    CHECK_EXPECTED_AS_STATUS(post_infer_elem);
-
-    auto queue_elem = add_push_queue_element(PipelineObject::create_element_name("PushQueueElement_softmax", async_pipeline.get_async_hw_element()->name(),
-        static_cast<uint8_t>(hw_async_elem_index.value())), async_pipeline, post_infer_elem.value());
-    CHECK_EXPECTED_AS_STATUS(queue_elem);
-
-    // Updating metadata according to user request
-    // Currently softmax only supports inputs to be float32 and order NHWC or NC
-    auto updated_inputs_metadata = softmax_op_metadata.get()->inputs_metadata();
-    updated_inputs_metadata.begin()->second.format = output_format_expanded;
-    auto updated_outputs_metadata = softmax_op_metadata.get()->outputs_metadata();
-    updated_outputs_metadata.begin()->second.format = output_format_expanded;
-    auto metadata = std::dynamic_pointer_cast<net_flow::SoftmaxOpMetadata>(softmax_op_metadata);
-    assert(nullptr != metadata);
-    metadata->set_outputs_metadata(updated_outputs_metadata);
-    metadata->set_inputs_metadata(updated_inputs_metadata);
-    CHECK_SUCCESS(metadata->validate_format_info());
-
-    auto op_expected = net_flow::SoftmaxPostProcessOp::create(metadata);
-    CHECK_EXPECTED_AS_STATUS(op_expected);
-
-    auto softmax_op = op_expected.release();
-    auto softmax_element = SoftmaxPostProcessElement::create(softmax_op,
-        PipelineObject::create_element_name("SoftmaxPostProcessElement", output_stream_base->name(), stream_info.index),
-        async_pipeline.get_build_params(), PipelineDirection::PUSH, true);
-    CHECK_EXPECTED_AS_STATUS(softmax_element);
-
-    async_pipeline.add_element_to_pipeline(softmax_element.value());
-    CHECK_SUCCESS(PipelinePad::link_pads(queue_elem.value(), softmax_element.value()));
-
-    auto last_async_element = add_last_async_element(async_pipeline, output_format.first, softmax_element.value());
-    CHECK_EXPECTED_AS_STATUS(last_async_element);
-
-    return HAILO_SUCCESS;
-}
-
-hailo_status AsyncInferRunnerImpl::add_argmax_flow(AsyncPipeline &async_pipeline, OutputStreamPtrVector &output_streams,
-    const std::pair<std::string, hailo_format_t> &output_format, const net_flow::PostProcessOpMetadataPtr &argmax_op_metadata)
-{
-    assert(output_streams.size() == 1);
-    auto output_stream_base = std::static_pointer_cast<OutputStreamBase>(output_streams[0]);
-    auto hw_async_elem_index = async_pipeline.get_async_hw_element()->get_source_index_from_output_stream_name(output_stream_base->name());
-    CHECK_EXPECTED_AS_STATUS(hw_async_elem_index);
-
-    auto queue_elem = add_push_queue_element(PipelineObject::create_element_name("PushQueueElement_argmax", async_pipeline.get_async_hw_element()->name(),
-        static_cast<uint8_t>(hw_async_elem_index.value())), async_pipeline, async_pipeline.get_async_hw_element());
-    CHECK_EXPECTED_AS_STATUS(queue_elem);
-
-    // Updating metadata according to user request
-    auto op_input_format = argmax_op_metadata->inputs_metadata().begin()->second.format;
-    auto updated_outputs_metadata = argmax_op_metadata.get()->outputs_metadata();
-    updated_outputs_metadata.begin()->second.format = net_flow::ArgmaxOpMetadata::expand_output_format_autos(output_format.second, op_input_format);;
-    auto metadata = std::dynamic_pointer_cast<net_flow::ArgmaxOpMetadata>(argmax_op_metadata);
-    assert(nullptr != metadata);
-    metadata->set_outputs_metadata(updated_outputs_metadata);
-    CHECK_SUCCESS(metadata->validate_format_info());
-
-    auto op_expected = net_flow::ArgmaxPostProcessOp::create(metadata);
-    CHECK_EXPECTED_AS_STATUS(op_expected);
-    auto argmax_op = op_expected.release();
-    bool is_last_copy_element = true;
-
-    auto argmax_element = ArgmaxPostProcessElement::create(argmax_op,
-        PipelineObject::create_element_name("ArgmaxPostProcessElement", output_stream_base->name(), output_stream_base->get_info().index),
-        async_pipeline.get_build_params(), PipelineDirection::PUSH, is_last_copy_element);
-    CHECK_EXPECTED_AS_STATUS(argmax_element);
-
-    async_pipeline.add_element_to_pipeline(argmax_element.value());
-    CHECK_SUCCESS(PipelinePad::link_pads(queue_elem.value(), argmax_element.value()));
-
-    auto last_async_element = add_last_async_element(async_pipeline, output_format.first, argmax_element.value());
-    CHECK_EXPECTED_AS_STATUS(last_async_element);
-
-    return HAILO_SUCCESS;
-}
-
-hailo_status AsyncInferRunnerImpl::add_nms_flow(AsyncPipeline &async_pipeline, OutputStreamPtrVector &output_streams,
-    const std::pair<std::string, hailo_format_t> &output_format, const std::shared_ptr<hailort::net_flow::Op> &nms_op,
-    const hailo_vstream_info_t &vstream_info)
-{
-    auto first_stream_info = output_streams[0]->get_info();
-    CHECK(output_format.second.type == HAILO_FORMAT_TYPE_FLOAT32, HAILO_INVALID_ARGUMENT,
-        "NMS output format type must be HAILO_FORMAT_TYPE_FLOAT32");
-    CHECK(HailoRTCommon::is_nms(output_format.second.order), HAILO_INVALID_ARGUMENT,
-        "NMS output format order must be HAILO_FORMAT_ORDER_HAILO_NMS or HAILO_FORMAT_ORDER_HAILO_NMS_WITH_BYTE_MASK");
-
-    std::unordered_map<std::string, net_flow::BufferMetaData> inputs_metadata;
-    std::unordered_map<std::string, net_flow::BufferMetaData> outputs_metadata;
-    for (uint32_t i = 0; i < output_streams.size(); ++i) {
-        const auto &curr_stream_info = output_streams[i]->get_info();
-        net_flow::BufferMetaData input_metadata = {
-            curr_stream_info.shape,
-            curr_stream_info.hw_shape,
-            curr_stream_info.format,
-            curr_stream_info.quant_info
-        };
-        inputs_metadata.insert({curr_stream_info.name, input_metadata});
-    }
-
-    assert(nms_op->outputs_metadata().size() == 1);
-
-    net_flow::BufferMetaData output_metadata = {
-        vstream_info.shape,
-        vstream_info.shape,
-        vstream_info.format,
-        vstream_info.quant_info
-    };
-    outputs_metadata.insert({nms_op->outputs_metadata().begin()->first, output_metadata});
-
-    auto nms_elem = NmsPostProcessMuxElement::create(nms_op, PipelineObject::create_element_name("NmsPostProcessMuxElement", nms_op->get_name(), 0),
-        async_pipeline.get_build_params(), PipelineDirection::PUSH, true);
-    CHECK_EXPECTED_AS_STATUS(nms_elem);
-
-    async_pipeline.add_element_to_pipeline(nms_elem.value());
-
-    hailo_format_t nms_src_format;
-    nms_src_format.flags = HAILO_FORMAT_FLAGS_QUANTIZED;
-    nms_src_format.order = HAILO_FORMAT_ORDER_NHCW;
-    nms_src_format.type = first_stream_info.format.type;
-
-    for (uint32_t i = 0; i < output_streams.size(); ++i) {
-        const auto &curr_stream_info = output_streams[i]->get_info();
-        output_streams[i]->set_timeout(HAILO_INFINITE_TIMEOUT); // TODO: Check with Salem/Kimel if can be removed
-
-        auto output_stream_base = std::static_pointer_cast<OutputStreamBase>(output_streams[i]);
-        auto should_transform = OutputTransformContext::is_transformation_required(curr_stream_info.hw_shape, curr_stream_info.format,
-            curr_stream_info.hw_shape, nms_src_format, output_stream_base->get_quant_infos());
-        CHECK_EXPECTED_AS_STATUS(should_transform);
-
-        CHECK(!(should_transform.value()), HAILO_INVALID_ARGUMENT, "Unexpected transformation required for {}", curr_stream_info.name);
-
-        auto source_id = async_pipeline.get_async_hw_element()->get_source_index_from_output_stream_name(output_stream_base->name());
-        CHECK_EXPECTED_AS_STATUS(source_id);
-
-        auto nms_source_queue_elem = add_push_queue_element(PipelineObject::create_element_name("PullQueueElement_nms_source", curr_stream_info.name, curr_stream_info.index),
-            async_pipeline, async_pipeline.get_async_hw_element(), source_id.value());
-        CHECK_EXPECTED_AS_STATUS(nms_source_queue_elem);
-
-        CHECK_SUCCESS(PipelinePad::link_pads(nms_source_queue_elem.value(), nms_elem.value(), 0, i));
-        nms_elem.value()->add_sink_name(curr_stream_info.name);
-    }
-    auto last_async_element = add_last_async_element(async_pipeline, output_format.first, nms_elem.value());
-    CHECK_EXPECTED_AS_STATUS(last_async_element);
-
-    return HAILO_SUCCESS;
-}
-
-hailo_status AsyncInferRunnerImpl::add_iou_flow( AsyncPipeline &async_pipeline, OutputStreamPtrVector &output_streams,
-    const std::pair<std::string, hailo_format_t> &output_format, const net_flow::PostProcessOpMetadataPtr &iou_op_metadata)
-{
-    assert(output_streams.size() == 1);
-    auto output_stream = output_streams[0];
-
-    auto output_index = async_pipeline.get_async_hw_element()->get_source_index_from_output_stream_name(output_stream->name());
-        CHECK_EXPECTED_AS_STATUS(output_index);
-
-    auto hw_read_queue_element = add_push_queue_element(PipelineObject::create_element_name("PushQueueElement_hw_read", output_stream->name(), output_stream->get_info().index),
-        async_pipeline, async_pipeline.get_async_hw_element() , output_index.value());
-    CHECK_EXPECTED_AS_STATUS(hw_read_queue_element);
-
-    auto &stream_info = output_stream->get_info();
-    auto &stream_quant_infos = output_stream->get_quant_infos();
-
-    auto post_infer_element = add_post_infer_element(output_format.second, stream_info.nms_info,
-        async_pipeline, stream_info.hw_shape, stream_info.format, stream_info.shape, stream_quant_infos, false, hw_read_queue_element.value());
-    CHECK_EXPECTED_AS_STATUS(post_infer_element);
-
-    auto pre_nms_convert_queue_element = add_push_queue_element(PipelineObject::create_element_name("PullQueueElement_pre_nms_convert", output_stream->name(), output_stream->get_info().index),
-        async_pipeline, post_infer_element.value());
-    CHECK_EXPECTED_AS_STATUS(pre_nms_convert_queue_element);
-
-    auto nms_to_detections_element = add_nms_to_detections_convert_element(async_pipeline, output_stream, "NmsFormatToDetectionsElement", iou_op_metadata,
-        false, pre_nms_convert_queue_element.value());
-    CHECK_EXPECTED_AS_STATUS(nms_to_detections_element);
-
-    auto pre_remove_overlapping_bboxes_element_queue_element = add_push_queue_element(PipelineObject::create_element_name("PullQueueElement_pre_bboxes_removing", output_stream->name(), output_stream->get_info().index),
-        async_pipeline, nms_to_detections_element.value());
-    CHECK_EXPECTED_AS_STATUS(pre_remove_overlapping_bboxes_element_queue_element);
-
-    auto remove_overlapping_bboxes_element = add_remove_overlapping_bboxes_element(async_pipeline, output_stream, "RemoveOverlappingBboxesElement", iou_op_metadata,
-        false, pre_remove_overlapping_bboxes_element_queue_element.value());
-    CHECK_EXPECTED_AS_STATUS(remove_overlapping_bboxes_element);
-
-    auto pre_fill_nms_format_element_queue_element = add_push_queue_element(PipelineObject::create_element_name("PullQueueElement_pre_fill_nms_format", output_stream->name(), output_stream->get_info().index),
-        async_pipeline, remove_overlapping_bboxes_element.value());
-    CHECK_EXPECTED_AS_STATUS(pre_fill_nms_format_element_queue_element);
-
-    auto fill_nms_format_element = add_fill_nms_format_element(async_pipeline, output_stream, "FillNmsFormatElement", iou_op_metadata,
-        output_format.second, true, pre_fill_nms_format_element_queue_element.value());
-    CHECK_EXPECTED_AS_STATUS(fill_nms_format_element);
-
-    auto last_async_element = add_last_async_element(async_pipeline, output_format.first, fill_nms_format_element.value());
-    CHECK_EXPECTED_AS_STATUS(last_async_element);
-
-    return HAILO_SUCCESS;
-}
-
-hailo_status AsyncInferRunnerImpl::add_nms_flows(AsyncPipeline &async_pipeline, OutputStreamPtrVector &output_streams,
-    const std::pair<std::string, hailo_format_t> &output_format, const net_flow::PostProcessOpMetadataPtr &op_metadata,
-    const std::vector<hailo_vstream_info_t> &vstreams_infos)
-{
-    assert(1 <= op_metadata->outputs_metadata().size());
-    auto updated_outputs_metadata = op_metadata->outputs_metadata();
-    std::pair<std::string, hailo_format_t> expanded_output_format = {output_format.first,
-        net_flow::NmsOpMetadata::expand_output_format_autos_by_op_type(output_format.second, op_metadata->type())};
-    updated_outputs_metadata.begin()->second.format = expanded_output_format.second;
-
-    if (HAILO_FORMAT_FLAGS_QUANTIZED & updated_outputs_metadata.begin()->second.format.flags) {
-        updated_outputs_metadata.begin()->second.format.flags &= ~HAILO_FORMAT_FLAGS_QUANTIZED;
-        // TODO: Delete override when changing CLI default flags
-        // TODO: check with Kimel/Salem of this warning is still needed
-        LOGGER__WARNING("The output_vstream {} format flag is marked as quantized, which is not supported with {}. "
-            "flag has been automatically set to False.", updated_outputs_metadata.begin()->first, op_metadata->get_name());
-    }
-
-    op_metadata->set_outputs_metadata(updated_outputs_metadata);
-    CHECK_SUCCESS(op_metadata->validate_format_info());
-    std::shared_ptr<hailort::net_flow::Op> op;
-
-    switch (op_metadata->type()) {
-    case net_flow::OperationType::IOU:
-        return add_iou_flow(async_pipeline, output_streams, expanded_output_format, op_metadata);
-
-    case net_flow::OperationType::YOLOX:
-    {
-        auto metadata = std::dynamic_pointer_cast<net_flow::YoloxOpMetadata>(op_metadata);
-        assert(nullptr != metadata);
-        auto op_expected = net_flow::YOLOXPostProcessOp::create(metadata);
-        CHECK_EXPECTED_AS_STATUS(op_expected);
-        op = op_expected.release();
-        break;
-    }
-    case net_flow::OperationType::YOLOV5:
-    {
-        auto metadata = std::dynamic_pointer_cast<net_flow::Yolov5OpMetadata>(op_metadata);
-        assert(nullptr != metadata);
-        auto op_expected = net_flow::YOLOv5PostProcessOp::create(metadata);
-        CHECK_EXPECTED_AS_STATUS(op_expected);
-        op = op_expected.release();
-        break;
-    }
-    case net_flow::OperationType::SSD:
-    {
-        auto metadata = std::dynamic_pointer_cast<net_flow::SSDOpMetadata>(op_metadata);
-        assert(nullptr != metadata);
-        auto op_expected = net_flow::SSDPostProcessOp::create(metadata);
-        CHECK_EXPECTED_AS_STATUS(op_expected);
-        op = op_expected.release();
-        break;
-    }
-    default:
-        break;
+        // Other cases - return error, as on async flow we do not support copy to new buffer
+        LOGGER__ERROR("HEF was compiled for single input layer, while trying to pass non-contiguous planes buffers.");
+        m_input_buffers[input_name] = PipelineBuffer(HAILO_INVALID_OPERATION, write_done);
     }
-    hailo_vstream_info_t output_vstream_info;
-    for (auto &current_output_vstream_info : vstreams_infos) {
-        if (current_output_vstream_info.name == op->outputs_metadata().begin()->first) {
-            output_vstream_info = current_output_vstream_info;
-        }
-    }
-    return add_nms_flow(async_pipeline, output_streams, expanded_output_format, op, output_vstream_info);
 }
 
-hailo_status AsyncInferRunnerImpl::add_ops_flows(AsyncPipeline &async_pipeline,
-    const std::pair<std::string, hailo_format_t> &output_format, net_flow::PostProcessOpMetadataPtr &op_metadata,
-    OutputStreamPtrVector &output_streams, const std::vector<hailo_vstream_info_t> &vstreams_infos)
-{
-    const bool is_dma_able_hw_async = true;
-    auto status = async_pipeline.get_async_hw_element()->fill_buffer_pools(is_dma_able_hw_async);
-    CHECK_SUCCESS(status);
-
-    switch (op_metadata->type()) {
-    case net_flow::OperationType::YOLOX:
-    case net_flow::OperationType::SSD:
-    case net_flow::OperationType::YOLOV5:
-    case net_flow::OperationType::IOU:
-    // TODO: add support for YOLOV5SEG
-        return add_nms_flows(async_pipeline, output_streams, output_format, op_metadata, vstreams_infos);
-
-    case net_flow::OperationType::ARGMAX:
-        return add_argmax_flow(async_pipeline, output_streams, output_format, op_metadata);
-
-    case net_flow::OperationType::SOFTMAX:
-        return add_softmax_flow(async_pipeline, output_streams, output_format, op_metadata);
-
-    default:
-        LOGGER__ERROR("op type {} of op {} is not in any of the supported post process OP types", net_flow::OpMetadata::get_operation_type_str(op_metadata->type()), op_metadata->get_name());
-        return HAILO_INVALID_OPERATION;
-    }
-}
-
-hailo_status AsyncInferRunnerImpl::create_post_async_hw_elements(ConfiguredNetworkGroupBase &net_group,
-        const std::unordered_map<std::string, hailo_format_t> &expanded_outputs_formats, std::unordered_map<std::string, hailo_format_t> &original_outputs_formats,
-        AsyncPipeline &async_pipeline)
-{
-    // streams_added is a vector which holds all stream names which vstreams connected to them were already added (for demux cases)
-    std::vector<std::string> streams_added;
-
-    // Building DBs that connect output_vstreams, output_streams and ops.
-    // Note: Assuming each post process op has a unique output streams.
-    //       In other words, not possible for an output stream to be connected to more than one op
-    std::unordered_map<std::string, net_flow::PostProcessOpMetadataPtr> post_process_metadata;
-    std::unordered_map<stream_name_t, op_name_t> op_inputs_to_op_name;
-    for (auto &metadata : net_group.get_ops_metadata().release()) {
-        post_process_metadata.insert({metadata->get_name(), metadata});
-        for (auto &input_name : metadata->get_input_names()) {
-            op_inputs_to_op_name.insert({input_name, metadata->get_name()});
-        }
-    }
-
-    for (auto &output_format : expanded_outputs_formats) {
-        auto output_streams_expected = net_group.get_output_streams_by_vstream_name(output_format.first);
-        CHECK_EXPECTED_AS_STATUS(output_streams_expected);
-
-        auto first_stream_info = output_streams_expected.value()[0]->get_info();
-        if (contains(streams_added, static_cast<std::string>(first_stream_info.name))) {
-            continue;
-        }
-        for (auto &output_stream : output_streams_expected.value()) {
-            streams_added.push_back(output_stream->get_info().name);
-        }
-
-        if (contains(op_inputs_to_op_name, static_cast<std::string>(first_stream_info.name))) {
-            auto &op_name = op_inputs_to_op_name.at(first_stream_info.name);
-            auto &op_metadata = post_process_metadata.at(op_name);
-
-            auto output_vstreams_infos = net_group.get_output_vstream_infos();
-            CHECK_EXPECTED_AS_STATUS(output_vstreams_infos);
-
-            std::pair<std::string, hailo_format_t> original_output_format = {output_format.first, original_outputs_formats.at(output_format.first)};
-
-            hailo_status status = add_ops_flows(async_pipeline, original_output_format,
-                op_metadata, output_streams_expected.value(), output_vstreams_infos.value());
-            CHECK_SUCCESS(status);
-
-        } else if ((HAILO_FORMAT_ORDER_HAILO_NMS == first_stream_info.format.order) &&
-            (first_stream_info.nms_info.is_defused)) {
-            // Case defuse NMS
-            hailo_status status = add_nms_fuse_flow(output_streams_expected.value(), output_format, async_pipeline);
-            CHECK_SUCCESS(status);
-        } else if (first_stream_info.is_mux) {
-            // case demux in output from NN core (only one output stream is currently suppored)
-            hailo_status status = add_output_demux_flow(output_streams_expected.value()[0], async_pipeline, expanded_outputs_formats);
-            CHECK_SUCCESS(status);
-        } else {
-            // case simple and single output from NN core to user (and transformation at best)
-            auto output_stream_base = std::static_pointer_cast<OutputStreamBase>(output_streams_expected.value()[0]);
-            const bool is_dma_able = true;
-            auto final_elem_source_index = async_pipeline.get_async_hw_element()->get_source_index_from_output_stream_name(output_stream_base->name());
-            CHECK_EXPECTED_AS_STATUS(final_elem_source_index);
-
-            hailo_status status = finalize_output_flow(output_stream_base, output_format, {}, is_dma_able, async_pipeline,
-                async_pipeline.get_async_hw_element(), final_elem_source_index.value());
-            CHECK_SUCCESS(status);
-        }
-    }
-    return HAILO_SUCCESS;
-}
-
-Expected<AsyncPipeline> AsyncInferRunnerImpl::create_pipeline(ConfiguredNetworkGroupBase &net_group,
-    const std::unordered_map<std::string, hailo_format_t> &inputs_formats,
-    const std::unordered_map<std::string, hailo_format_t> &outputs_formats,
-    const uint32_t timeout)
+void AsyncInferRunnerImpl::set_output(const std::string &output_name, MemoryView &&output_buffer, TransferDoneCallbackAsyncInfer &read_done)
 {
-    std::unordered_map<std::string, std::shared_ptr<PipelineElement>> entry_elements;
-    std::unordered_map<std::string, std::shared_ptr<PipelineElement>> last_elements;
-
-    ElementBuildParams build_params;
-
-    // buffer_pool_size should be the minimum of the maximum queue size of all LL streams (input and output)
-    auto buffer_pool_size_expected = get_min_buffer_pool_size(net_group);
-    CHECK_EXPECTED(buffer_pool_size_expected);
-    build_params.buffer_pool_size = buffer_pool_size_expected.release();
-    build_params.elem_stats_flags = HAILO_PIPELINE_ELEM_STATS_NONE;
-    build_params.vstream_stats_flags = HAILO_VSTREAM_STATS_NONE;
-
-    auto async_pipeline_expected = AsyncPipeline::create();
-    CHECK_EXPECTED(async_pipeline_expected);
-    auto async_pipeline = async_pipeline_expected.release();
-
-    auto input_streams_expected = get_input_streams_from_net_group(net_group, inputs_formats);
-    CHECK_EXPECTED(input_streams_expected);
-
-    auto input_expanded_format = expand_auto_input_formats(net_group, inputs_formats);
-    CHECK_EXPECTED(input_expanded_format);
-
-    std::vector<std::shared_ptr<InputStream>> input_streams_list;
-    input_streams_list.reserve(input_streams_expected.value().size());
-    for (auto &input_stream : input_streams_expected.value()) {
-        input_streams_list.push_back(input_stream.second);
-    }
-
-    auto output_streams_expected = get_output_streams_from_net_group(net_group, outputs_formats);
-    CHECK_EXPECTED(output_streams_expected);
-
-    auto output_expanded_format = expand_auto_output_formats(net_group, outputs_formats);
-    CHECK_EXPECTED(output_expanded_format);
-
-    auto outputs_original_formats = outputs_formats;  // The original formats is needed for specific format expanding (required for PP OPs, like argmax)
-
-    std::vector<std::shared_ptr<OutputStream>> output_streams_list;
-    output_streams_list.reserve(output_streams_expected.value().size());
-    for (auto &output_stream : output_streams_expected.value()) {
-        output_streams_list.push_back(output_stream.second);
-    }
-
-    auto shutdown_event_expected = Event::create_shared(Event::State::not_signalled);
-    CHECK_EXPECTED(shutdown_event_expected);
-
-    build_params.shutdown_event = shutdown_event_expected.release();
-    build_params.pipeline_status = make_shared_nothrow<std::atomic<hailo_status>>(HAILO_SUCCESS);
-    CHECK_ARG_NOT_NULL_AS_EXPECTED(build_params.pipeline_status);
-    build_params.timeout = std::chrono::milliseconds(timeout);
-
-    async_pipeline.set_build_params(build_params);
-
-    // all elements in async pipeline start as last elements, and in the end of this func all non-last-copy elements will be added buffers
-    bool is_last_copy_element = true;
-
-    auto async_hw_elem = AsyncHwElement::create(input_streams_list, output_streams_list, build_params.timeout,
-        build_params.buffer_pool_size, build_params.elem_stats_flags,
-        build_params.vstream_stats_flags, build_params.shutdown_event,
-        "AsyncHwElement", build_params.pipeline_status, PipelineDirection::PUSH, is_last_copy_element);
-    CHECK_EXPECTED(async_hw_elem);
-    async_pipeline.add_element_to_pipeline(async_hw_elem.value());
-    async_pipeline.set_async_hw_element(async_hw_elem.release());
-
-    // TODO: HRT-11759
-    hailo_status status = create_pre_async_hw_elements(net_group, input_streams_expected.value(), input_expanded_format.value(),
-        async_pipeline);
-    CHECK_SUCCESS_AS_EXPECTED(status);
-
-    status = create_post_async_hw_elements(net_group, output_expanded_format.value(), outputs_original_formats, async_pipeline);
-    CHECK_SUCCESS_AS_EXPECTED(status);
-
-    return async_pipeline;
+    m_output_buffers[output_name] = std::move(output_buffer);
+    m_read_dones[output_name] = read_done;
 }
 
 std::vector<std::shared_ptr<PipelineElement>> AsyncInferRunnerImpl::get_pipeline() const
 {
-    return m_async_pipeline.get_pipeline();
+    return m_async_pipeline->get_pipeline();
 }
 
 std::string AsyncInferRunnerImpl::get_pipeline_description() const
@@ -1097,4 +325,14 @@ std::string AsyncInferRunnerImpl::get_pipeline_description() const
     return pipeline_str.str();
 }
 
+hailo_status AsyncInferRunnerImpl::get_pipeline_status() const
+{
+    return m_pipeline_status->load();
+}
+
+std::shared_ptr<AsyncPipeline> AsyncInferRunnerImpl::get_async_pipeline() const
+{
+    return m_async_pipeline;
+}
+
 } /* namespace hailort */
diff --git a/hailort/libhailort/src/net_flow/pipeline/async_infer_runner.hpp b/hailort/libhailort/src/net_flow/pipeline/async_infer_runner.hpp
new file mode 100644
index 0000000..3467677
--- /dev/null
+++ b/hailort/libhailort/src/net_flow/pipeline/async_infer_runner.hpp
@@ -0,0 +1,105 @@
+/**
+ * Copyright (c) 2020-2023 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+ **/
+/**
+ * @file async_infer_runner.hpp
+ * @brief Implementation of the async HL infer
+ **/
+
+#ifndef _HAILO_ASYNC_INFER_RUNNER_HPP_
+#define _HAILO_ASYNC_INFER_RUNNER_HPP_
+
+#include "network_group/network_group_internal.hpp"
+#include "net_flow/pipeline/pipeline.hpp"
+#include "net_flow/pipeline/pipeline_builder.hpp"
+#include "net_flow/pipeline/vstream_internal.hpp"
+#include "net_flow/ops/op.hpp"
+
+namespace hailort
+{
+
+class AsyncPipeline
+{
+public:
+    static Expected<std::shared_ptr<AsyncPipeline>> create_shared();
+    AsyncPipeline &operator=(const AsyncPipeline &) = delete;
+    AsyncPipeline();
+    virtual ~AsyncPipeline() = default;
+
+    void add_element_to_pipeline(std::shared_ptr<PipelineElement> pipeline_element);
+    void set_async_hw_element(std::shared_ptr<AsyncHwElement> async_hw_element);
+    void add_entry_element(std::shared_ptr<PipelineElement> pipeline_element, const std::string &input_name);
+    void add_last_element(std::shared_ptr<PipelineElement> pipeline_element, const std::string &output_name);
+    void set_build_params(ElementBuildParams &build_params);
+    void shutdown(hailo_status error_status);
+
+    const std::vector<std::shared_ptr<PipelineElement>>& get_pipeline() const;
+    const std::unordered_map<std::string, std::shared_ptr<PipelineElement>>& get_entry_elements() const;
+    const std::unordered_map<std::string, std::shared_ptr<PipelineElement>>& get_last_elements() const;
+    const std::shared_ptr<AsyncHwElement> get_async_hw_element();
+    const ElementBuildParams get_build_params();
+    void set_as_multi_planar();
+    bool is_multi_planar();
+
+private:
+    std::shared_ptr<AsyncHwElement> m_async_hw_element;
+    std::vector<std::shared_ptr<PipelineElement>> m_pipeline_elements;
+    std::unordered_map<std::string, std::shared_ptr<PipelineElement>> m_entry_elements;
+    std::unordered_map<std::string, std::shared_ptr<PipelineElement>> m_last_elements;
+    ElementBuildParams m_build_params;
+    bool m_is_multi_planar;
+};
+
+class AsyncInferRunnerImpl
+{
+public:
+    static Expected<std::shared_ptr<AsyncInferRunnerImpl>> create(std::shared_ptr<ConfiguredNetworkGroup> net_group,
+        const std::unordered_map<std::string, hailo_format_t> &inputs_formats, const std::unordered_map<std::string, hailo_format_t> &outputs_formats,
+        const uint32_t timeout = HAILO_DEFAULT_ASYNC_INFER_TIMEOUT_MS);
+    AsyncInferRunnerImpl(AsyncInferRunnerImpl &&) = delete;
+    AsyncInferRunnerImpl(const AsyncInferRunnerImpl &) = delete;
+    AsyncInferRunnerImpl &operator=(AsyncInferRunnerImpl &&) = delete;
+    AsyncInferRunnerImpl &operator=(const AsyncInferRunnerImpl &) = delete;
+    virtual ~AsyncInferRunnerImpl();
+    AsyncInferRunnerImpl(std::shared_ptr<AsyncPipeline> async_pipeline, std::shared_ptr<std::atomic<hailo_status>> pipeline_status);
+
+    hailo_status async_infer();
+
+    void abort();
+
+    Expected<bool> can_push_buffers();
+
+    // TODO: consider removing the methods below (needed for unit testing)
+    void add_element_to_pipeline(std::shared_ptr<PipelineElement> pipeline_element);
+    void add_entry_element(std::shared_ptr<PipelineElement> pipeline_element, const std::string &input_name);
+    void add_last_element(std::shared_ptr<PipelineElement> pipeline_element, const std::string &output_name);
+
+    void set_input(const std::string &input_name, MemoryView &&input_buffer, TransferDoneCallbackAsyncInfer &write_done);
+    void set_input(const std::string &input_name, hailo_pix_buffer_t input_buffer, TransferDoneCallbackAsyncInfer &write_done);
+    void set_output(const std::string &output_name, MemoryView &&output_buffer, TransferDoneCallbackAsyncInfer &read_done);
+
+    std::unordered_map<std::string, std::shared_ptr<PipelineElement>> get_entry_elements();
+    std::unordered_map<std::string, std::shared_ptr<PipelineElement>> get_last_elements();
+
+    std::vector<std::shared_ptr<PipelineElement>> get_pipeline() const;
+    std::string get_pipeline_description() const;
+    hailo_status get_pipeline_status() const;
+    std::shared_ptr<AsyncPipeline> get_async_pipeline() const;
+
+protected:
+    hailo_status start_pipeline();
+    hailo_status stop_pipeline();
+
+    std::shared_ptr<AsyncPipeline> m_async_pipeline;
+    std::unordered_map<std::string, PipelineBuffer> m_input_buffers;
+    std::unordered_map<std::string, MemoryView> m_output_buffers;
+    std::unordered_map<std::string, TransferDoneCallbackAsyncInfer> m_read_dones;
+    volatile bool m_is_activated;
+    volatile bool m_is_aborted;
+    std::shared_ptr<std::atomic<hailo_status>> m_pipeline_status;
+};
+
+} /* namespace hailort */
+
+#endif /* _HAILO_ASYNC_INFER_RUNNER_HPP_ */
diff --git a/hailort/libhailort/src/net_flow/pipeline/async_infer_runner_internal.hpp b/hailort/libhailort/src/net_flow/pipeline/async_infer_runner_internal.hpp
deleted file mode 100644
index f07316e..0000000
--- a/hailort/libhailort/src/net_flow/pipeline/async_infer_runner_internal.hpp
+++ /dev/null
@@ -1,184 +0,0 @@
-/**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
- * Distributed under the MIT license (https://opensource.org/licenses/MIT)
- **/
-/**
- * @file async_infer_runner_internal.hpp
- * @brief Implemention of the async HL infer
- **/
-
-#ifndef _HAILO_ASYNC_INFER_RUNNER_INTERNAL_HPP_
-#define _HAILO_ASYNC_INFER_RUNNER_INTERNAL_HPP_
-
-#include "network_group/network_group_internal.hpp"
-#include "net_flow/pipeline/pipeline.hpp"
-#include "net_flow/pipeline/vstream_internal.hpp"
-#include "net_flow/ops/argmax_post_process.hpp"
-#include "net_flow/ops/softmax_post_process.hpp"
-#include "net_flow/ops/yolox_post_process.hpp"
-#include "net_flow/ops/ssd_post_process.hpp"
-#include "net_flow/ops/op.hpp"
-
-namespace hailort
-{
-class AsyncPipeline
-{
-public:
-    static Expected<AsyncPipeline> create();
-    AsyncPipeline &operator=(const AsyncPipeline &) = delete;
-
-    virtual ~AsyncPipeline() = default;
-
-    void add_element_to_pipeline(std::shared_ptr<PipelineElement> pipeline_element);
-    void set_async_hw_element(std::shared_ptr<AsyncHwElement> async_hw_element);
-    void add_entry_element(std::shared_ptr<PipelineElement> pipeline_element, const std::string &input_name);
-    void add_last_element(std::shared_ptr<PipelineElement> pipeline_element, const std::string &output_name);
-    void set_build_params(ElementBuildParams &build_params);
-
-    const std::vector<std::shared_ptr<PipelineElement>>& get_pipeline() const;
-    const std::unordered_map<std::string, std::shared_ptr<PipelineElement>>& get_entry_elements() const;
-    const std::unordered_map<std::string, std::shared_ptr<PipelineElement>>& get_last_elements() const;
-    const std::shared_ptr<AsyncHwElement> get_async_hw_element();
-    const ElementBuildParams get_build_params();
-
-private:
-    AsyncPipeline();
-
-    std::vector<std::shared_ptr<PipelineElement>> m_pipeline_elements;
-    std::shared_ptr<AsyncHwElement> m_async_hw_element;
-    std::unordered_map<std::string, std::shared_ptr<PipelineElement>> m_entry_elements;
-    std::unordered_map<std::string, std::shared_ptr<PipelineElement>> m_last_elements;
-    ElementBuildParams m_build_params;
-};
-
-class AsyncInferRunnerInternal
-{
-public:
-    static Expected<std::shared_ptr<AsyncInferRunnerInternal>> create(ConfiguredNetworkGroupBase &net_group,
-        const std::unordered_map<std::string, hailo_format_t> &inputs_formats, const std::unordered_map<std::string, hailo_format_t> &outputs_formats);
-    AsyncInferRunnerInternal(AsyncInferRunnerInternal &&other) noexcept = default;
-    AsyncInferRunnerInternal &operator=(AsyncInferRunnerInternal &&other) noexcept = default;
-    virtual ~AsyncInferRunnerInternal() = default;
-
-    virtual hailo_status async_infer() = 0;
-    virtual std::string get_pipeline_description() const = 0;
-    virtual std::vector<std::shared_ptr<PipelineElement>> get_pipeline() const = 0;
-
-protected:
-    AsyncInferRunnerInternal();
-    std::shared_ptr<std::atomic<hailo_status>> m_pipeline_status;
-
-};
-
-
-class AsyncInferRunnerImpl : public AsyncInferRunnerInternal
-{
-public:
-    static Expected<std::shared_ptr<AsyncInferRunnerImpl>> create(ConfiguredNetworkGroupBase &net_group,
-        const std::unordered_map<std::string, hailo_format_t> &inputs_formats, const std::unordered_map<std::string, hailo_format_t> &outputs_formats,
-        const uint32_t timeout = HAILO_DEFAULT_VSTREAM_TIMEOUT_MS);
-    AsyncInferRunnerImpl(AsyncInferRunnerImpl &&) = delete;
-    AsyncInferRunnerImpl(const AsyncInferRunnerImpl &) = delete;
-    AsyncInferRunnerImpl &operator=(AsyncInferRunnerImpl &&) = delete;
-    AsyncInferRunnerImpl &operator=(const AsyncInferRunnerImpl &) = delete;
-    virtual ~AsyncInferRunnerImpl();
-    AsyncInferRunnerImpl(AsyncPipeline &&async_pipeline);
-
-    virtual hailo_status async_infer() override;
-
-    // TODO: consider removing the methods below (needed for unit testing)
-    void add_element_to_pipeline(std::shared_ptr<PipelineElement> pipeline_element);
-    void add_entry_element(std::shared_ptr<PipelineElement> pipeline_element, const std::string &input_name);
-    void add_last_element(std::shared_ptr<PipelineElement> pipeline_element, const std::string &output_name);
-
-    void set_input(const std::string &input_name, MemoryView &&input_buffer, TransferDoneCallbackAsyncInfer &write_done);
-    void set_output(const std::string &output_name, MemoryView &&output_buffer, TransferDoneCallbackAsyncInfer &read_done);
-
-    std::unordered_map<std::string, std::shared_ptr<PipelineElement>> get_entry_elements();
-    std::unordered_map<std::string, std::shared_ptr<PipelineElement>> get_last_elements();
-
-    virtual std::vector<std::shared_ptr<PipelineElement>> get_pipeline() const override;
-    virtual std::string get_pipeline_description() const override;
-
-    static Expected<size_t> get_min_buffer_pool_size(ConfiguredNetworkGroupBase &net_group);
-
-protected:
-    static Expected<AsyncPipeline> create_pipeline(ConfiguredNetworkGroupBase &net_group, const std::unordered_map<std::string, hailo_format_t> &inputs_formats,
-        const std::unordered_map<std::string, hailo_format_t> &outputs_formats, const uint32_t timeout);
-
-    hailo_status start_pipeline();
-    hailo_status stop_pipeline();
-
-    static Expected<std::unordered_map<std::string, std::shared_ptr<InputStream>>> get_input_streams_from_net_group(ConfiguredNetworkGroupBase &net_group,
-        const std::unordered_map<std::string, hailo_format_t> &inputs_formats);
-    static Expected<std::unordered_map<std::string, std::shared_ptr<OutputStream>>> get_output_streams_from_net_group(ConfiguredNetworkGroupBase &net_group,
-        const std::unordered_map<std::string, hailo_format_t> &outputs_formats);
-    static Expected<std::unordered_map<std::string, hailo_format_t>> expand_auto_input_formats(ConfiguredNetworkGroupBase &net_group,
-        const std::unordered_map<std::string, hailo_format_t> &inputs_formats);
-    static Expected<std::unordered_map<std::string, hailo_format_t>> expand_auto_output_formats(ConfiguredNetworkGroupBase &net_group,
-        const std::unordered_map<std::string, hailo_format_t> &outputs_formats);
-    static Expected<std::pair<std::string, hailo_format_t>> get_output_format_from_edge_info_name(std::string edge_info_name,
-        const std::unordered_map<std::string, hailo_format_t> &outputs_formats);
-
-    static hailo_status create_pre_async_hw_elements(ConfiguredNetworkGroupBase &net_group,
-        std::unordered_map<std::string, std::shared_ptr<InputStream>> &input_streams,
-        const std::unordered_map<std::string, hailo_format_t> &inputs_formats, AsyncPipeline &async_pipeline);
-    static hailo_status create_post_async_hw_elements(ConfiguredNetworkGroupBase &net_group,
-        const std::unordered_map<std::string, hailo_format_t> &expanded_outputs_formats, std::unordered_map<std::string, hailo_format_t> &original_outputs_formats,
-        AsyncPipeline &async_pipeline);
-
-    static hailo_status add_argmax_flow(AsyncPipeline &async_pipeline, OutputStreamPtrVector &output_streams,
-        const std::pair<std::string, hailo_format_t> &output_format, const net_flow::PostProcessOpMetadataPtr &argmax_op_metadata);
-    static hailo_status add_softmax_flow(AsyncPipeline &async_pipeline, OutputStreamPtrVector &output_streams,
-        const std::pair<std::string, hailo_format_t> &output_format, const net_flow::PostProcessOpMetadataPtr &softmax_op_metadata);
-    static hailo_status add_ops_flows(AsyncPipeline &async_pipeline,
-        const std::pair<std::string, hailo_format_t> &output_format, net_flow::PostProcessOpMetadataPtr &op_metadata,
-        OutputStreamPtrVector &output_streams, const std::vector<hailo_vstream_info_t> &vstreams_infos);
-    static hailo_status add_output_demux_flow(std::shared_ptr<OutputStreamBase> &output_stream,
-        AsyncPipeline &async_pipeline, const std::unordered_map<std::string, hailo_format_t> &outputs_formats);
-    static hailo_status add_nms_fuse_flow(OutputStreamPtrVector &output_streams, const std::pair<std::string, hailo_format_t> &output_format,
-        AsyncPipeline &async_pipeline);
-    static hailo_status add_nms_flow(AsyncPipeline &async_pipeline, OutputStreamPtrVector &output_streams,
-        const std::pair<std::string, hailo_format_t> &output_format, const std::shared_ptr<hailort::net_flow::Op> &nms_op,
-        const hailo_vstream_info_t &vstream_info);
-    static hailo_status add_iou_flow(AsyncPipeline &async_pipeline, OutputStreamPtrVector &output_streams,
-        const std::pair<std::string, hailo_format_t> &output_format, const net_flow::PostProcessOpMetadataPtr &iou_op_metadata);
-    static hailo_status add_nms_flows(AsyncPipeline &async_pipeline, OutputStreamPtrVector &output_streams,
-        const std::pair<std::string, hailo_format_t> &output_format, const net_flow::PostProcessOpMetadataPtr &op_metadata,
-        const std::vector<hailo_vstream_info_t> &vstreams_infos);
-
-
-    static Expected<std::shared_ptr<PostInferElement>> add_post_infer_element(const hailo_format_t &output_format, const hailo_nms_info_t &nms_info,
-        AsyncPipeline &async_pipeline, const hailo_3d_image_shape_t &src_image_shape, const hailo_format_t &src_format,
-        const hailo_3d_image_shape_t &dst_image_shape, const std::vector<hailo_quant_info_t> &dst_quant_infos, bool is_last_copy_element,
-        std::shared_ptr<PipelineElement> final_elem, const uint32_t final_elem_source_index = 0);
-    static Expected<std::shared_ptr<LastAsyncElement>> add_last_async_element(AsyncPipeline &async_pipeline,
-        const std::string &output_format_name, std::shared_ptr<PipelineElement> final_elem, const uint32_t final_elem_source_index = 0);
-    static Expected<std::shared_ptr<AsyncPushQueueElement>> add_push_queue_element(const std::string &queue_name, AsyncPipeline &async_pipeline,
-        std::shared_ptr<PipelineElement> final_elem, const uint32_t final_elem_source_index = 0);
-    static Expected<std::shared_ptr<ConvertNmsToDetectionsElement>> add_nms_to_detections_convert_element(AsyncPipeline &async_pipeline,
-        std::shared_ptr<OutputStream> output_stream, const std::string &element_name, const net_flow::PostProcessOpMetadataPtr &op_metadata,
-        const bool is_last_copy_element, std::shared_ptr<PipelineElement> final_elem, const uint32_t final_elem_source_index = 0);
-    static Expected<std::shared_ptr<RemoveOverlappingBboxesElement>> add_remove_overlapping_bboxes_element(AsyncPipeline &async_pipeline,
-        std::shared_ptr<OutputStream> output_stream, const std::string &element_name, const net_flow::PostProcessOpMetadataPtr &op_metadata,
-        const bool is_last_copy_element, std::shared_ptr<PipelineElement> final_elem, const uint32_t final_elem_source_index = 0);
-    static Expected<std::shared_ptr<FillNmsFormatElement>> add_fill_nms_format_element(AsyncPipeline &async_pipeline,
-        std::shared_ptr<OutputStream> output_stream, const std::string &element_name, const net_flow::PostProcessOpMetadataPtr &op_metadata,
-        const hailo_format_t &output_format, const bool is_last_copy_element, std::shared_ptr<PipelineElement> final_elem, const uint32_t final_elem_source_index = 0);
-
-    static hailo_status finalize_output_flow(std::shared_ptr<OutputStreamBase> &output_stream_base,
-        const std::pair<std::string, hailo_format_t> &output_format, const hailo_nms_info_t &nms_info, const bool is_dma_able,
-        AsyncPipeline &async_pipeline, std::shared_ptr<PipelineElement> final_elem, const uint32_t final_elem_source_index = 0);
-
-    AsyncPipeline m_async_pipeline;
-    std::unordered_map<std::string, MemoryView> m_input_buffers;
-    std::unordered_map<std::string, TransferDoneCallbackAsyncInfer> m_write_dones;
-    std::unordered_map<std::string, MemoryView> m_output_buffers;
-    std::unordered_map<std::string, TransferDoneCallbackAsyncInfer> m_read_dones;
-    volatile bool m_is_activated;
-    volatile bool m_is_aborted;
-};
-
-} /* namespace hailort */
-
-#endif /* _HAILO_ASYNC_INFER_RUNNER_INTERNAL_HPP_ */
diff --git a/hailort/libhailort/src/net_flow/pipeline/infer_model.cpp b/hailort/libhailort/src/net_flow/pipeline/infer_model.cpp
index 3cbf267..8f0d8b2 100644
--- a/hailort/libhailort/src/net_flow/pipeline/infer_model.cpp
+++ b/hailort/libhailort/src/net_flow/pipeline/infer_model.cpp
@@ -14,9 +14,9 @@
 #include "hailo/vdevice.hpp"
 #include "hailo/infer_model.hpp"
 #include "net_flow/pipeline/infer_model_internal.hpp"
-#include "net_flow/pipeline/async_infer_runner_internal.hpp"
+#include "net_flow/pipeline/async_infer_runner.hpp"
 
-#define WAIT_FOR_ASYNC_IN_DTOR_TIMEOUT (10000)
+#define WAIT_FOR_ASYNC_IN_DTOR_TIMEOUT (std::chrono::milliseconds(10000))
 
 namespace hailort
 {
@@ -26,19 +26,38 @@ std::string InferModel::InferStream::Impl::name() const
     return m_vstream_info.name;
 }
 
+hailo_3d_image_shape_t InferModel::InferStream::Impl::shape() const
+{
+    return m_vstream_info.shape;
+}
+
+hailo_format_t InferModel::InferStream::Impl::format() const
+{
+    return m_user_buffer_format;
+}
+
 size_t InferModel::InferStream::Impl::get_frame_size() const
 {
     return HailoRTCommon::get_frame_size(m_vstream_info, m_user_buffer_format);
 }
 
+Expected<hailo_nms_shape_t> InferModel::InferStream::Impl::get_nms_shape() const
+{
+    CHECK_AS_EXPECTED(HailoRTCommon::is_nms(m_vstream_info.format.order), HAILO_INVALID_OPERATION,
+        "Output {} is not NMS", name());
+    auto res = m_vstream_info.nms_shape;
+    return res;
+}
+
+std::vector<hailo_quant_info_t> InferModel::InferStream::Impl::get_quant_infos() const
+{
+    // TODO: Support quant infos vector
+    return {m_vstream_info.quant_info};
+}
+
 void InferModel::InferStream::Impl::set_format_type(hailo_format_type_t type)
 {
     m_user_buffer_format.type = type;
-    if (HAILO_FORMAT_TYPE_FLOAT32 == type) {
-        m_user_buffer_format.flags = HAILO_FORMAT_FLAGS_NONE;
-    } else {
-        m_user_buffer_format.flags = HAILO_FORMAT_FLAGS_QUANTIZED;
-    }
 }
 
 void InferModel::InferStream::Impl::set_format_order(hailo_format_order_t order)
@@ -46,9 +65,25 @@ void InferModel::InferStream::Impl::set_format_order(hailo_format_order_t order)
     m_user_buffer_format.order = order;
 }
 
-hailo_format_t InferModel::InferStream::Impl::get_user_buffer_format()
+bool InferModel::InferStream::Impl::is_nms() const
 {
-    return m_user_buffer_format;
+    return HailoRTCommon::is_nms(m_vstream_info.format.order);
+}
+
+void InferModel::InferStream::Impl::set_nms_score_threshold(float32_t threshold)
+{
+    m_nms_score_threshold = threshold;
+}
+
+void InferModel::InferStream::Impl::set_nms_iou_threshold(float32_t threshold)
+{
+    m_nms_iou_threshold = threshold;
+}
+
+void InferModel::InferStream::Impl::set_nms_max_proposals_per_class(uint32_t max_proposals_per_class)
+{
+    m_nms_max_proposals_per_class = max_proposals_per_class;
+    m_vstream_info.nms_shape.max_bboxes_per_class = max_proposals_per_class;
 }
 
 InferModel::InferStream::InferStream(std::shared_ptr<InferModel::InferStream::Impl> pimpl) : m_pimpl(pimpl)
@@ -60,11 +95,31 @@ const std::string InferModel::InferStream::name() const
     return m_pimpl->name();
 }
 
+hailo_3d_image_shape_t InferModel::InferStream::shape() const
+{
+    return m_pimpl->shape();
+}
+
+hailo_format_t InferModel::InferStream::format() const
+{
+    return m_pimpl->format();
+}
+
 size_t InferModel::InferStream::get_frame_size() const
 {
     return m_pimpl->get_frame_size();
 }
 
+Expected<hailo_nms_shape_t> InferModel::InferStream::get_nms_shape() const
+{
+    return m_pimpl->get_nms_shape();
+}
+
+std::vector<hailo_quant_info_t> InferModel::InferStream::get_quant_infos() const
+{
+    return m_pimpl->get_quant_infos();
+}
+
 void InferModel::InferStream::set_format_type(hailo_format_type_t type)
 {
     m_pimpl->set_format_type(type);
@@ -75,14 +130,30 @@ void InferModel::InferStream::set_format_order(hailo_format_order_t order)
     m_pimpl->set_format_order(order);
 }
 
-hailo_format_t InferModel::InferStream::get_user_buffer_format()
+bool InferModel::InferStream::is_nms() const
+{
+    return m_pimpl->is_nms();
+}
+
+void InferModel::InferStream::set_nms_score_threshold(float32_t threshold)
+{
+    m_pimpl->set_nms_score_threshold(threshold);
+}
+
+void InferModel::InferStream::set_nms_iou_threshold(float32_t threshold)
+{
+    m_pimpl->set_nms_iou_threshold(threshold);
+}
+
+void InferModel::InferStream::set_nms_max_proposals_per_class(uint32_t max_proposals_per_class)
 {
-    return m_pimpl->get_user_buffer_format();
+    m_pimpl->set_nms_max_proposals_per_class(max_proposals_per_class);
 }
 
 InferModel::InferModel(VDevice &vdevice, Hef &&hef, std::unordered_map<std::string, InferModel::InferStream> &&inputs,
         std::unordered_map<std::string, InferModel::InferStream> &&outputs)
-    : m_vdevice(vdevice), m_hef(std::move(hef)), m_inputs(std::move(inputs)), m_outputs(std::move(outputs))
+    : m_vdevice(vdevice), m_hef(std::move(hef)), m_inputs(std::move(inputs)), m_outputs(std::move(outputs)),
+    m_config_params(HailoRTDefaults::get_configure_params())
 {
     m_inputs_vector.reserve(m_inputs.size());
     m_input_names.reserve(m_inputs.size());
@@ -107,14 +178,35 @@ InferModel::InferModel(InferModel &&other) :
     m_inputs_vector(std::move(other.m_inputs_vector)),
     m_outputs_vector(std::move(other.m_outputs_vector)),
     m_input_names(std::move(other.m_input_names)),
-    m_output_names(std::move(other.m_output_names))
+    m_output_names(std::move(other.m_output_names)),
+    m_config_params(std::move(other.m_config_params))
 {
 }
 
+const Hef &InferModel::hef() const
+{
+    return m_hef;
+}
+
+void InferModel::set_batch_size(uint16_t batch_size)
+{
+    m_config_params.batch_size = batch_size;
+}
+
+void InferModel::set_power_mode(hailo_power_mode_t power_mode)
+{
+    m_config_params.power_mode = power_mode;
+}
+
+void InferModel::set_hw_latency_measurement_flags(hailo_latency_measurement_flags_t latency)
+{
+    m_config_params.latency = latency;
+}
+
 // TODO: document that this will check validity of format tpyes/orders
 Expected<ConfiguredInferModel> InferModel::configure(const std::string &network_name)
 {
-    CHECK_AS_EXPECTED("" == network_name, HAILO_NOT_IMPLEMENTED, "Passing network name is not supported yet!");
+    CHECK_AS_EXPECTED(network_name.empty(), HAILO_NOT_IMPLEMENTED, "Passing network name is not supported yet!");
 
     auto configure_params = m_vdevice.get().create_configure_params(m_hef);
     CHECK_EXPECTED(configure_params);
@@ -123,6 +215,13 @@ Expected<ConfiguredInferModel> InferModel::configure(const std::string &network_
         for (auto &stream_params_name_pair : network_group_name_params_pair.second.stream_params_by_name) {
             stream_params_name_pair.second.flags = HAILO_STREAM_FLAGS_ASYNC;
         }
+
+        for (auto &network_name_params_pair : network_group_name_params_pair.second.network_params_by_name) {
+            network_name_params_pair.second.batch_size = m_config_params.batch_size;
+        }
+
+        network_group_name_params_pair.second.power_mode = m_config_params.power_mode;
+        network_group_name_params_pair.second.latency = m_config_params.latency;
     }
 
     auto network_groups = m_vdevice.get().configure(m_hef, configure_params.value());
@@ -135,28 +234,65 @@ Expected<ConfiguredInferModel> InferModel::configure(const std::string &network_
     CHECK_EXPECTED(input_vstream_infos);
 
     for (const auto &vstream_info : input_vstream_infos.value()) {
-        inputs_formats[vstream_info.name] = m_inputs.at(vstream_info.name).get_user_buffer_format();
+        assert(contains(m_inputs, std::string(vstream_info.name)));
+        inputs_formats[vstream_info.name] = m_inputs.at(vstream_info.name).format();
     }
 
     auto output_vstream_infos = network_groups.value()[0]->get_output_vstream_infos();
     CHECK_EXPECTED(output_vstream_infos);
 
     for (const auto &vstream_info : output_vstream_infos.value()) {
-        outputs_formats[vstream_info.name] = m_outputs.at(vstream_info.name).get_user_buffer_format();
+        assert(contains(m_outputs, std::string(vstream_info.name)));
+        outputs_formats[vstream_info.name] = m_outputs.at(vstream_info.name).format();
     }
 
-    // downcasting from ConfiguredNetworkGroup to ConfiguredNetworkGroupBase since we need some functions from ConfiguredNetworkGroupBase
-    std::shared_ptr<ConfiguredNetworkGroupBase> configured_net_group_base = std::dynamic_pointer_cast<ConfiguredNetworkGroupBase>(network_groups.value()[0]);
-    CHECK_NOT_NULL_AS_EXPECTED(configured_net_group_base, HAILO_INTERNAL_FAILURE);
-
-    auto async_infer_runner = AsyncInferRunnerImpl::create(*configured_net_group_base, inputs_formats, outputs_formats);
-    CHECK_EXPECTED(async_infer_runner);
+    CHECK_AS_EXPECTED(std::all_of(m_inputs.begin(), m_inputs.end(), [](const auto &input_pair) {
+        return ((input_pair.second.m_pimpl->m_nms_score_threshold == INVALID_NMS_CONFIG) &&
+                (input_pair.second.m_pimpl->m_nms_iou_threshold == INVALID_NMS_CONFIG) &&
+                (input_pair.second.m_pimpl->m_nms_max_proposals_per_class == static_cast<uint32_t>(INVALID_NMS_CONFIG)));
+    }), HAILO_INVALID_OPERATION, "NMS config was changed for input");
+
+    for (const auto &output_pair : m_outputs) {
+        auto &edge_name = output_pair.first;
+        if ((output_pair.second.m_pimpl->m_nms_score_threshold == INVALID_NMS_CONFIG) &&
+            (output_pair.second.m_pimpl->m_nms_iou_threshold == INVALID_NMS_CONFIG) &&
+            (output_pair.second.m_pimpl->m_nms_max_proposals_per_class == static_cast<uint32_t>(INVALID_NMS_CONFIG))) {
+                continue;
+            }
+        if (output_pair.second.m_pimpl->m_nms_score_threshold != INVALID_NMS_CONFIG) {
+            auto status = network_groups.value()[0]->set_nms_score_threshold(edge_name, output_pair.second.m_pimpl->m_nms_score_threshold);
+            CHECK_SUCCESS_AS_EXPECTED(status);
+        }
+        if (output_pair.second.m_pimpl->m_nms_iou_threshold != INVALID_NMS_CONFIG) {
+            auto status = network_groups.value()[0]->set_nms_iou_threshold(edge_name, output_pair.second.m_pimpl->m_nms_iou_threshold);
+            CHECK_SUCCESS_AS_EXPECTED(status);
+        }
+        if (output_pair.second.m_pimpl->m_nms_max_proposals_per_class != static_cast<uint32_t>(INVALID_NMS_CONFIG)) {
+            auto status = network_groups.value()[0]->set_nms_max_bboxes_per_class(edge_name, output_pair.second.m_pimpl->m_nms_max_proposals_per_class);
+            CHECK_SUCCESS_AS_EXPECTED(status);
+        }
+    }
 
-    auto configured_infer_model_pimpl = make_shared_nothrow<ConfiguredInferModelImpl>(network_groups.value()[0], async_infer_runner.release(),
+    auto configured_infer_model_pimpl = ConfiguredInferModelImpl::create(network_groups.value()[0], inputs_formats, outputs_formats,
         get_input_names(), get_output_names());
-    CHECK_NOT_NULL_AS_EXPECTED(configured_infer_model_pimpl, HAILO_OUT_OF_HOST_MEMORY);
+    CHECK_EXPECTED(configured_infer_model_pimpl);
 
-    return ConfiguredInferModel(configured_infer_model_pimpl);
+    return ConfiguredInferModel(configured_infer_model_pimpl.release());
+}
+
+Expected<ConfiguredInferModel> InferModel::configure_for_ut(std::shared_ptr<AsyncInferRunnerImpl> async_infer_runner,
+    const std::vector<std::string> &input_names, const std::vector<std::string> &output_names)
+{
+    auto configure_params = m_vdevice.get().create_configure_params(m_hef);
+    CHECK_EXPECTED(configure_params);
+
+    auto network_groups = m_vdevice.get().configure(m_hef, configure_params.value());
+    CHECK_EXPECTED(network_groups);
+
+    auto configured_infer_model_pimpl = ConfiguredInferModelImpl::create_for_ut(network_groups.value()[0], async_infer_runner, input_names, output_names);
+    CHECK_EXPECTED(configured_infer_model_pimpl);
+
+    return ConfiguredInferModel(configured_infer_model_pimpl.release());
 }
 
 Expected<InferModel::InferStream> InferModel::input()
@@ -237,19 +373,74 @@ hailo_status ConfiguredInferModel::run(ConfiguredInferModel::Bindings bindings,
 }
 
 Expected<AsyncInferJob> ConfiguredInferModel::run_async(ConfiguredInferModel::Bindings bindings,
-    std::function<void(const CompletionInfoAsyncInfer &)> callback)
+    std::function<void(const AsyncInferCompletionInfo &)> callback)
 {
     return m_pimpl->run_async(bindings, callback);
 }
 
+Expected<LatencyMeasurementResult> ConfiguredInferModel::get_hw_latency_measurement(const std::string &network_name)
+{
+    return m_pimpl->get_hw_latency_measurement(network_name);
+}
+
+hailo_status ConfiguredInferModel::set_scheduler_timeout(const std::chrono::milliseconds &timeout)
+{
+    return m_pimpl->set_scheduler_timeout(timeout);
+}
+
+hailo_status ConfiguredInferModel::set_scheduler_threshold(uint32_t threshold)
+{
+    return m_pimpl->set_scheduler_threshold(threshold);
+}
+
+hailo_status ConfiguredInferModel::set_scheduler_priority(uint8_t priority)
+{
+    return m_pimpl->set_scheduler_priority(priority);
+}
+
+Expected<size_t> ConfiguredInferModel::get_async_queue_size()
+{
+    return m_pimpl->get_async_queue_size();
+}
+
+Expected<std::shared_ptr<ConfiguredInferModelImpl>> ConfiguredInferModelImpl::create(std::shared_ptr<ConfiguredNetworkGroup> net_group,
+    const std::unordered_map<std::string, hailo_format_t> &inputs_formats,
+    const std::unordered_map<std::string, hailo_format_t> &outputs_formats,
+    const std::vector<std::string> &input_names, const std::vector<std::string> &output_names, const uint32_t timeout)
+{
+    auto async_infer_runner = AsyncInferRunnerImpl::create(net_group, inputs_formats, outputs_formats, timeout);
+    CHECK_EXPECTED(async_infer_runner);
+
+    auto configured_infer_model_pimpl = make_shared_nothrow<ConfiguredInferModelImpl>(net_group, async_infer_runner.release(),
+        input_names, output_names);
+    CHECK_NOT_NULL_AS_EXPECTED(configured_infer_model_pimpl, HAILO_OUT_OF_HOST_MEMORY);
+
+    return configured_infer_model_pimpl;
+}
+
+Expected<std::shared_ptr<ConfiguredInferModelImpl>> ConfiguredInferModelImpl::create_for_ut(std::shared_ptr<ConfiguredNetworkGroup> net_group,
+    std::shared_ptr<AsyncInferRunnerImpl> async_infer_runner, const std::vector<std::string> &input_names, const std::vector<std::string> &output_names)
+{
+    auto configured_infer_model_pimpl = make_shared_nothrow<ConfiguredInferModelImpl>(net_group, async_infer_runner,
+        input_names, output_names);
+    CHECK_NOT_NULL_AS_EXPECTED(configured_infer_model_pimpl, HAILO_OUT_OF_HOST_MEMORY);
+
+    return configured_infer_model_pimpl;
+}
+
 ConfiguredInferModelImpl::ConfiguredInferModelImpl(std::shared_ptr<ConfiguredNetworkGroup> cng,
-    std::shared_ptr<AsyncInferRunnerImpl> async_infer_runner, 
+    std::shared_ptr<AsyncInferRunnerImpl> async_infer_runner,
     const std::vector<std::string> &input_names,
     const std::vector<std::string> &output_names) : m_cng(cng), m_async_infer_runner(async_infer_runner),
     m_ongoing_parallel_transfers(0), m_input_names(input_names), m_output_names(output_names)
 {
 }
 
+ConfiguredInferModelImpl::~ConfiguredInferModelImpl()
+{
+    abort();
+}
+
 Expected<ConfiguredInferModel::Bindings> ConfiguredInferModelImpl::create_bindings()
 {
     std::unordered_map<std::string, ConfiguredInferModel::Bindings::InferStream> inputs;
@@ -283,20 +474,29 @@ Expected<ConfiguredInferModel::Bindings> ConfiguredInferModelImpl::create_bindin
 hailo_status ConfiguredInferModelImpl::wait_for_async_ready(std::chrono::milliseconds timeout)
 {
     std::unique_lock<std::mutex> lock(m_mutex);
+    hailo_status status = HAILO_SUCCESS;
+    bool was_successful = m_cv.wait_for(lock, timeout, [this, &status] () -> bool {
+        auto pools_are_ready = m_async_infer_runner->can_push_buffers();
+        if (HAILO_SUCCESS != pools_are_ready.status()) {
+            status = pools_are_ready.status();
+            return true;
+        }
+        return pools_are_ready.release();
+    });
+    CHECK_SUCCESS(status);
 
-    // downcasting from ConfiguredNetworkGroup to ConfiguredNetworkGroupBase since we need some functions from ConfiguredNetworkGroupBase
-    std::shared_ptr<ConfiguredNetworkGroupBase> configured_net_group_base = std::dynamic_pointer_cast<ConfiguredNetworkGroupBase>(m_cng);
-    CHECK_NOT_NULL(configured_net_group_base, HAILO_INTERNAL_FAILURE);
+    CHECK(was_successful, HAILO_TIMEOUT, "Got timeout in `wait_for_async_ready`");
 
-    auto low_level_queue_size = m_async_infer_runner->get_min_buffer_pool_size(*configured_net_group_base);
-    CHECK_EXPECTED_AS_STATUS(low_level_queue_size);
+    return HAILO_SUCCESS;
+}
 
-    bool was_successful = m_cv.wait_for(lock, timeout, [this, low_level_queue_size  = low_level_queue_size.value()] () -> bool {
-        return m_ongoing_parallel_transfers < low_level_queue_size;
+void ConfiguredInferModelImpl::abort()
+{
+    m_async_infer_runner->abort();
+    std::unique_lock<std::mutex> lock(m_mutex);
+    m_cv.wait_for(lock, WAIT_FOR_ASYNC_IN_DTOR_TIMEOUT, [this] () -> bool {
+        return m_ongoing_parallel_transfers == 0;
     });
-    CHECK(was_successful, HAILO_TIMEOUT);
-
-    return HAILO_SUCCESS;
 }
 
 hailo_status ConfiguredInferModelImpl::activate()
@@ -305,7 +505,7 @@ hailo_status ConfiguredInferModelImpl::activate()
     CHECK_EXPECTED_AS_STATUS(activated_ng);
 
     m_ang = activated_ng.release();
-    return HAILO_SUCCESS;;
+    return HAILO_SUCCESS;
 }
 
 void ConfiguredInferModelImpl::deactivate()
@@ -315,7 +515,7 @@ void ConfiguredInferModelImpl::deactivate()
 
 hailo_status ConfiguredInferModelImpl::run(ConfiguredInferModel::Bindings bindings, std::chrono::milliseconds timeout)
 {
-    auto job = run_async(bindings, [] (const CompletionInfoAsyncInfer &) {});
+    auto job = run_async(bindings, [] (const AsyncInferCompletionInfo &) {});
     CHECK_EXPECTED_AS_STATUS(job);
 
     auto status = job->wait(timeout);
@@ -324,16 +524,33 @@ hailo_status ConfiguredInferModelImpl::run(ConfiguredInferModel::Bindings bindin
     return HAILO_SUCCESS;
 }
 
+hailo_status ConfiguredInferModelImpl::validate_bindings(ConfiguredInferModel::Bindings bindings)
+{
+    for (const auto &input_name : m_input_names) {
+        if (BufferType::VIEW == bindings.input(input_name)->m_pimpl->get_type()) {
+            CHECK_EXPECTED_AS_STATUS(bindings.input(input_name)->get_buffer());
+        } else {
+            CHECK_EXPECTED_AS_STATUS(bindings.input(input_name)->get_pix_buffer());
+        }
+    }
+    for (const auto &output_name : m_output_names) {
+        CHECK_EXPECTED_AS_STATUS(bindings.output(output_name)->get_buffer());
+    }
+
+    return HAILO_SUCCESS;
+}
+
 Expected<AsyncInferJob> ConfiguredInferModelImpl::run_async(ConfiguredInferModel::Bindings bindings,
-    std::function<void(const CompletionInfoAsyncInfer &)> callback)
+    std::function<void(const AsyncInferCompletionInfo &)> callback)
 {
+    CHECK_SUCCESS_AS_EXPECTED(validate_bindings(bindings));
+
     auto job_pimpl = make_shared_nothrow<AsyncInferJob::Impl>(static_cast<uint32_t>(m_input_names.size() + m_output_names.size()));
     CHECK_NOT_NULL_AS_EXPECTED(job_pimpl, HAILO_OUT_OF_HOST_MEMORY);
     AsyncInferJob job(job_pimpl);
 
-    TransferDoneCallbackAsyncInfer transfer_done = [this, bindings, job_pimpl, callback]
-    (const CompletionInfoAsyncInferInternal &internal_completion_info) {
-        bool should_call_callback = job_pimpl->stream_done();
+    TransferDoneCallbackAsyncInfer transfer_done = [this, bindings, job_pimpl, callback](hailo_status status) {
+        bool should_call_callback = job_pimpl->stream_done(status);
         if (should_call_callback) {
             {
                 std::unique_lock<std::mutex> lock(m_mutex);
@@ -341,38 +558,79 @@ Expected<AsyncInferJob> ConfiguredInferModelImpl::run_async(ConfiguredInferModel
             }
             m_cv.notify_all();
 
-            CompletionInfoAsyncInfer completion_info(bindings, internal_completion_info.status);
+            auto final_status = (m_async_infer_runner->get_pipeline_status() == HAILO_SUCCESS) ?
+                job_pimpl->completion_status() : m_async_infer_runner->get_pipeline_status();
+
+            AsyncInferCompletionInfo completion_info(bindings, final_status);
             callback(completion_info);
+            job_pimpl->mark_callback_done();
         }
     };
 
     for (const auto &input_name : m_input_names) {
-        m_async_infer_runner->set_input(input_name, bindings.input(input_name)->get_buffer(), transfer_done);
+        auto buff_type = bindings.input(input_name)->m_pimpl->get_type();
+        if (BufferType::VIEW == buff_type) {
+            auto buffer = bindings.input(input_name)->get_buffer();
+            CHECK_EXPECTED(buffer, "Couldnt find input buffer for '{}'", input_name);
+            m_async_infer_runner->set_input(input_name, buffer.release(), transfer_done);
+        } else if (BufferType::PIX_BUFFER == buff_type) {
+            auto buffer = bindings.input(input_name)->get_pix_buffer();
+            CHECK_EXPECTED(buffer, "Couldnt find input buffer for '{}'", input_name);
+            m_async_infer_runner->set_input(input_name, buffer.release(), transfer_done);
+        } else {
+            CHECK_AS_EXPECTED(false, HAILO_NOT_FOUND, "Couldnt find input buffer for '{}'", input_name);
+        }
     }
 
     for (const auto &output_name : m_output_names) {
-        m_async_infer_runner->set_output(output_name, bindings.output(output_name)->get_buffer(), transfer_done);
+        auto buffer = bindings.output(output_name)->get_buffer();
+        CHECK_EXPECTED(buffer, "Couldnt find output buffer for '{}'", output_name);
+        m_async_infer_runner->set_output(output_name, buffer.release(), transfer_done);
     }
 
     {
         std::unique_lock<std::mutex> lock(m_mutex);
+        auto status = m_async_infer_runner->async_infer();
+        CHECK_SUCCESS_AS_EXPECTED(status);
         m_ongoing_parallel_transfers++;
     }
-    m_cv.notify_all();
 
-    auto status = m_async_infer_runner->async_infer();
-    CHECK_SUCCESS_AS_EXPECTED(status);
+    m_cv.notify_all();
 
     return job;
 }
 
+Expected<LatencyMeasurementResult> ConfiguredInferModelImpl::get_hw_latency_measurement(const std::string &network_name)
+{
+    return m_cng->get_latency_measurement(network_name);
+}
+
+hailo_status ConfiguredInferModelImpl::set_scheduler_timeout(const std::chrono::milliseconds &timeout)
+{
+    return m_cng->set_scheduler_timeout(timeout);
+}
+
+hailo_status ConfiguredInferModelImpl::set_scheduler_threshold(uint32_t threshold)
+{
+    return m_cng->set_scheduler_threshold(threshold);
+}
+
+hailo_status ConfiguredInferModelImpl::set_scheduler_priority(uint8_t priority)
+{
+    return m_cng->set_scheduler_priority(priority);
+}
+
+Expected<size_t> ConfiguredInferModelImpl::get_async_queue_size()
+{
+    return m_cng->get_min_buffer_pool_size();
+}
+
 AsyncInferJob::AsyncInferJob(std::shared_ptr<Impl> pimpl) : m_pimpl(pimpl), m_should_wait_in_dtor(true)
 {
 }
 
 AsyncInferJob::AsyncInferJob(AsyncInferJob &&other) :
-    m_pimpl(std::move(other.m_pimpl)),
-    m_should_wait_in_dtor(std::exchange(other.m_should_wait_in_dtor, false))
+    m_pimpl(std::move(other.m_pimpl)), m_should_wait_in_dtor(std::exchange(other.m_should_wait_in_dtor, false))
 {
 }
 
@@ -385,8 +643,14 @@ AsyncInferJob &AsyncInferJob::operator=(AsyncInferJob &&other)
 
 AsyncInferJob::~AsyncInferJob()
 {
+    if (m_pimpl == nullptr) {
+        // In case the user defines AsyncInferJob object without initializing it with a real object,
+        // the parameter `m_should_wait_in_dtor` is initialized to true and the d'tor calls for `wait()`,
+        // but `m_pimpl` is not initialized, resulting in seg-fault.
+        return;
+    }
     if (m_should_wait_in_dtor) {
-        auto status = wait(std::chrono::milliseconds(WAIT_FOR_ASYNC_IN_DTOR_TIMEOUT));
+        auto status = wait(WAIT_FOR_ASYNC_IN_DTOR_TIMEOUT);
         if (HAILO_SUCCESS != status) {
             LOGGER__CRITICAL("Could not finish async infer request! status = {}", status);
         }
@@ -395,10 +659,10 @@ AsyncInferJob::~AsyncInferJob()
 
 hailo_status AsyncInferJob::wait(std::chrono::milliseconds timeout)
 {
+    m_should_wait_in_dtor = false;
     auto status = m_pimpl->wait(timeout);
     CHECK_SUCCESS(status);
 
-    m_should_wait_in_dtor = false;
     return HAILO_SUCCESS;
 }
 
@@ -407,34 +671,51 @@ void AsyncInferJob::detach()
     m_should_wait_in_dtor = false;
 }
 
-AsyncInferJob::Impl::Impl(uint32_t streams_count)
+AsyncInferJob::Impl::Impl(uint32_t streams_count) : m_job_completion_status(HAILO_SUCCESS)
 {
     m_ongoing_transfers = streams_count;
+    m_callback_called = false;
 }
 
 hailo_status AsyncInferJob::Impl::wait(std::chrono::milliseconds timeout)
 {
     std::unique_lock<std::mutex> lock(m_mutex);
     bool was_successful = m_cv.wait_for(lock, timeout, [this] () -> bool {
-        return (0 == m_ongoing_transfers);
+        return (m_callback_called);
     });
-    CHECK(was_successful, HAILO_TIMEOUT);
+    CHECK(was_successful, HAILO_TIMEOUT, "Waiting for async job to finish has failed with timeout {}!", timeout.count());
 
     return HAILO_SUCCESS;
 }
 
-bool AsyncInferJob::Impl::stream_done()
+bool AsyncInferJob::Impl::stream_done(const hailo_status &status)
 {
     bool should_call_callback = false;
     {
         std::unique_lock<std::mutex> lock(m_mutex);
         m_ongoing_transfers--;
         should_call_callback = (0 == m_ongoing_transfers);
+        if (HAILO_SUCCESS != status) {
+            m_job_completion_status = status;
+        }
     }
-    m_cv.notify_all();
     return should_call_callback;
 }
 
+hailo_status AsyncInferJob::Impl::completion_status()
+{
+    return m_job_completion_status;
+}
+
+void AsyncInferJob::Impl::mark_callback_done()
+{
+    {
+        std::unique_lock<std::mutex> lock(m_mutex);
+        m_callback_called = true;
+    }
+    m_cv.notify_all();
+}
+
 ConfiguredInferModel::Bindings::Bindings(std::unordered_map<std::string, Bindings::InferStream> &&inputs,
         std::unordered_map<std::string, Bindings::InferStream> &&outputs) :
     m_inputs(std::move(inputs)), m_outputs(std::move(outputs))
@@ -469,19 +750,44 @@ Expected<ConfiguredInferModel::Bindings::InferStream> ConfiguredInferModel::Bind
     return copy;
 }
 
-ConfiguredInferModel::Bindings::InferStream::Impl::Impl(const hailo_vstream_info_t &vstream_info) : m_name(vstream_info.name)
+ConfiguredInferModel::Bindings::InferStream::Impl::Impl(const hailo_vstream_info_t &vstream_info) :
+    m_name(vstream_info.name),m_buffer_type(BufferType::UNINITIALIZED)
 {
 }
 
 hailo_status ConfiguredInferModel::Bindings::InferStream::Impl::set_buffer(MemoryView view)
 {
     m_view = view;
+    m_buffer_type = BufferType::VIEW;
+    return HAILO_SUCCESS;
+}
+
+Expected<MemoryView> ConfiguredInferModel::Bindings::InferStream::Impl::get_buffer()
+{
+    CHECK_AS_EXPECTED(BufferType::VIEW == m_buffer_type, HAILO_INVALID_OPERATION,
+        "Trying to get buffer as view for '{}', while it is not configured as view", m_name);
+    auto cp = m_view;
+    return cp;
+}
+
+hailo_status ConfiguredInferModel::Bindings::InferStream::Impl::set_pix_buffer(const hailo_pix_buffer_t &pix_buffer)
+{
+    m_pix_buffer = pix_buffer;
+    m_buffer_type = BufferType::PIX_BUFFER;
     return HAILO_SUCCESS;
 }
 
-MemoryView ConfiguredInferModel::Bindings::InferStream::Impl::get_buffer()
+Expected<hailo_pix_buffer_t> ConfiguredInferModel::Bindings::InferStream::Impl::get_pix_buffer()
+{
+    CHECK_AS_EXPECTED(BufferType::PIX_BUFFER == m_buffer_type, HAILO_INVALID_OPERATION,
+        "Trying to get buffer as pix_buffer for '{}', while it is not configured as pix_buffer", m_name);
+    auto cp = m_pix_buffer;
+    return cp;
+}
+
+BufferType ConfiguredInferModel::Bindings::InferStream::Impl::get_type()
 {
-    return m_view;
+    return m_buffer_type;
 }
 
 void ConfiguredInferModel::Bindings::InferStream::Impl::set_stream_callback(TransferDoneCallbackAsyncInfer callback)
@@ -498,9 +804,19 @@ hailo_status ConfiguredInferModel::Bindings::InferStream::set_buffer(MemoryView
     return m_pimpl->set_buffer(view);
 }
 
-MemoryView ConfiguredInferModel::Bindings::InferStream::get_buffer()
+hailo_status ConfiguredInferModel::Bindings::InferStream::set_pix_buffer(const hailo_pix_buffer_t &pix_buffer)
+{
+    return m_pimpl->set_pix_buffer(pix_buffer);
+}
+
+Expected<MemoryView> ConfiguredInferModel::Bindings::InferStream::get_buffer()
 {
     return m_pimpl->get_buffer();
 }
 
+Expected<hailo_pix_buffer_t> ConfiguredInferModel::Bindings::InferStream::get_pix_buffer()
+{
+    return m_pimpl->get_pix_buffer();
+}
+
 } /* namespace hailort */
diff --git a/hailort/libhailort/src/net_flow/pipeline/infer_model_internal.hpp b/hailort/libhailort/src/net_flow/pipeline/infer_model_internal.hpp
index 2f81dc8..bdea811 100644
--- a/hailort/libhailort/src/net_flow/pipeline/infer_model_internal.hpp
+++ b/hailort/libhailort/src/net_flow/pipeline/infer_model_internal.hpp
@@ -10,8 +10,10 @@
 #ifndef _HAILO_INFER_MODEL_INTERNAL_HPP_
 #define _HAILO_INFER_MODEL_INTERNAL_HPP_
 
+#include "hailo/infer_model.hpp"
 #include "hailo/vstream.hpp"
-#include "net_flow/pipeline/async_infer_runner_internal.hpp"
+#include "net_flow/pipeline/async_infer_runner.hpp"
+#include "net_flow/ops/nms_post_process.hpp"
 
 namespace hailort
 {
@@ -21,34 +23,54 @@ class ConfiguredInferModel::Bindings::InferStream::Impl
 public:
     Impl(const hailo_vstream_info_t &vstream_info);
     hailo_status set_buffer(MemoryView view);
-    MemoryView get_buffer();
+    Expected<MemoryView> get_buffer();
+    hailo_status set_pix_buffer(const hailo_pix_buffer_t &pix_buffer);
+    Expected<hailo_pix_buffer_t> get_pix_buffer();
+    BufferType get_type();
+
     void set_stream_callback(TransferDoneCallbackAsyncInfer callback);
 
 private:
     std::string m_name;
-    MemoryView m_view;
+    BufferType m_buffer_type;
+    union {
+        MemoryView m_view;
+        hailo_pix_buffer_t m_pix_buffer;
+    };
     TransferDoneCallbackAsyncInfer m_stream_callback;
 };
 
 class InferModel::InferStream::Impl
 {
 public:
-    Impl(const hailo_vstream_info_t &vstream_info) : m_vstream_info(vstream_info)
-    {
-        m_user_buffer_format.order = HAILO_FORMAT_ORDER_AUTO;
-        m_user_buffer_format.type = HAILO_FORMAT_TYPE_AUTO;
-        m_user_buffer_format.flags = HAILO_FORMAT_FLAGS_QUANTIZED;
-    }
+    Impl(const hailo_vstream_info_t &vstream_info) : m_vstream_info(vstream_info), m_user_buffer_format(vstream_info.format),
+        m_nms_score_threshold(static_cast<float32_t>(INVALID_NMS_CONFIG)), m_nms_iou_threshold(static_cast<float32_t>(INVALID_NMS_CONFIG)),
+        m_nms_max_proposals_per_class(static_cast<uint32_t>(INVALID_NMS_CONFIG))
+    {}
 
     std::string name() const;
+    hailo_3d_image_shape_t shape() const;
+    hailo_format_t format() const;
     size_t get_frame_size() const;
+    Expected<hailo_nms_shape_t> get_nms_shape() const;
+    std::vector<hailo_quant_info_t> get_quant_infos() const;
     void set_format_type(hailo_format_type_t type);
     void set_format_order(hailo_format_order_t order);
-    hailo_format_t get_user_buffer_format();
+
+    bool is_nms() const;
+    void set_nms_score_threshold(float32_t threshold);
+    void set_nms_iou_threshold(float32_t threshold);
+    void set_nms_max_proposals_per_class(uint32_t max_proposals_per_class);
 
 private:
+    friend class InferModel;
+
     hailo_vstream_info_t m_vstream_info;
     hailo_format_t m_user_buffer_format;
+
+    float32_t m_nms_score_threshold;
+    float32_t m_nms_iou_threshold;
+    uint32_t m_nms_max_proposals_per_class;
 };
 
 class AsyncInferJob::Impl
@@ -56,30 +78,50 @@ class AsyncInferJob::Impl
 public:
     Impl(uint32_t streams_count);
     hailo_status wait(std::chrono::milliseconds timeout);
-    bool stream_done();
+    bool stream_done(const hailo_status &status);
+    hailo_status completion_status();
+    void mark_callback_done();
 
 private:
     std::condition_variable m_cv;
     std::mutex m_mutex;
     std::atomic_uint32_t m_ongoing_transfers;
+    bool m_callback_called;
+    hailo_status m_job_completion_status;
 };
 
 class ConfiguredInferModelImpl
 {
 public:
+    static Expected<std::shared_ptr<ConfiguredInferModelImpl>> create(std::shared_ptr<ConfiguredNetworkGroup> net_group,
+        const std::unordered_map<std::string, hailo_format_t> &inputs_formats, const std::unordered_map<std::string, hailo_format_t> &outputs_formats,
+        const std::vector<std::string> &input_names, const std::vector<std::string> &output_names, const uint32_t timeout = HAILO_DEFAULT_VSTREAM_TIMEOUT_MS);
+
     ConfiguredInferModelImpl(std::shared_ptr<ConfiguredNetworkGroup> cng,
         std::shared_ptr<AsyncInferRunnerImpl> async_infer_runner,
         const std::vector<std::string> &input_names,
         const std::vector<std::string> &output_names);
+    ~ConfiguredInferModelImpl();
     Expected<ConfiguredInferModel::Bindings> create_bindings();
     hailo_status wait_for_async_ready(std::chrono::milliseconds timeout);
+    void abort();
     hailo_status activate();
     void deactivate();
     hailo_status run(ConfiguredInferModel::Bindings bindings, std::chrono::milliseconds timeout);
     Expected<AsyncInferJob> run_async(ConfiguredInferModel::Bindings bindings,
-        std::function<void(const CompletionInfoAsyncInfer &)> callback);
+        std::function<void(const AsyncInferCompletionInfo &)> callback);
+    Expected<LatencyMeasurementResult> get_hw_latency_measurement(const std::string &network_name);
+    hailo_status set_scheduler_timeout(const std::chrono::milliseconds &timeout);
+    hailo_status set_scheduler_threshold(uint32_t threshold);
+    hailo_status set_scheduler_priority(uint8_t priority);
+    Expected<size_t> get_async_queue_size();
+
+    static Expected<std::shared_ptr<ConfiguredInferModelImpl>> create_for_ut(std::shared_ptr<ConfiguredNetworkGroup> net_group,
+        std::shared_ptr<AsyncInferRunnerImpl> async_infer_runner, const std::vector<std::string> &input_names, const std::vector<std::string> &output_names);
 
 private:
+    hailo_status validate_bindings(ConfiguredInferModel::Bindings bindings);
+
     std::shared_ptr<ConfiguredNetworkGroup> m_cng;
     std::unique_ptr<ActivatedNetworkGroup> m_ang;
     std::shared_ptr<AsyncInferRunnerImpl> m_async_infer_runner;
diff --git a/hailort/libhailort/src/net_flow/pipeline/pipeline.cpp b/hailort/libhailort/src/net_flow/pipeline/pipeline.cpp
index c903be5..6360ed8 100644
--- a/hailort/libhailort/src/net_flow/pipeline/pipeline.cpp
+++ b/hailort/libhailort/src/net_flow/pipeline/pipeline.cpp
@@ -19,9 +19,6 @@
 namespace hailort
 {
 
-#define NUMBER_OF_PLANES_NV12_NV21 2
-#define NUMBER_OF_PLANES_I420 3
-
 PipelineBuffer::Metadata::Metadata(PipelineTimePoint start_time) :
     m_start_time(start_time)
 {}
@@ -48,51 +45,89 @@ PipelineBuffer::PipelineBuffer(Type type) :
     m_type(type),
     m_pool(nullptr),
     m_view(),
-    m_exec_done([](CompletionInfoAsyncInferInternal /*completion_info*/) {}),
     m_metadata(),
     m_is_user_buffer(false),
+    m_should_call_exec_done(true),
     m_action_status(HAILO_SUCCESS)
-{}
+{
+    m_exec_done = [buffer_pool = m_pool, mem_view = m_view, is_user_buffer = m_is_user_buffer](hailo_status){
+        release_buffer(buffer_pool, mem_view, is_user_buffer);
+    };
+}
 
-PipelineBuffer::PipelineBuffer(hailo_status action_status) :
+PipelineBuffer::PipelineBuffer(hailo_status action_status, const TransferDoneCallbackAsyncInfer &exec_done) :
     m_type(Type::DATA),
     m_pool(nullptr),
     m_view(),
-    m_exec_done([](CompletionInfoAsyncInferInternal /*completion_info*/) {}),
     m_metadata(),
     m_is_user_buffer(false),
+    m_should_call_exec_done(true),
     m_action_status(action_status)
-{}
+{
+    m_exec_done = [buffer_pool = m_pool, mem_view = m_view, is_user_buffer = m_is_user_buffer, exec_done = exec_done](hailo_status status){
+        exec_done(status);
+        release_buffer(buffer_pool, mem_view, is_user_buffer);
+    };
+}
 
 PipelineBuffer::PipelineBuffer(MemoryView view, bool is_user_buffer, BufferPoolPtr pool, bool should_measure, hailo_status action_status) :
     m_type(Type::DATA),
     m_pool(pool),
     m_view(view),
-    m_exec_done([](CompletionInfoAsyncInferInternal /*completion_info*/) {}),
     m_metadata(Metadata(add_timestamp(should_measure))),
     m_is_user_buffer(is_user_buffer),
+    m_should_call_exec_done(true),
     m_action_status(action_status)
-{}
+{
+    m_exec_done = [buffer_pool = m_pool, mem_view = m_view, is_user_buffer = m_is_user_buffer](hailo_status){
+        release_buffer(buffer_pool, mem_view, is_user_buffer);
+    };
+}
 
 PipelineBuffer::PipelineBuffer(MemoryView view, const TransferDoneCallbackAsyncInfer &exec_done, bool is_user_buffer, BufferPoolPtr pool, bool should_measure,
     hailo_status action_status) :
     m_type(Type::DATA),
     m_pool(pool),
     m_view(view),
-    m_exec_done(exec_done),
     m_metadata(Metadata(add_timestamp(should_measure))),
     m_is_user_buffer(is_user_buffer),
+    m_should_call_exec_done(true),
     m_action_status(action_status)
-{}
+{
+    m_exec_done = [buffer_pool = m_pool, mem_view = m_view, is_user_buffer = m_is_user_buffer, exec_done = exec_done](hailo_status status){
+        exec_done(status);
+        release_buffer(buffer_pool, mem_view, is_user_buffer);
+    };
+}
+
+PipelineBuffer::PipelineBuffer(hailo_pix_buffer_t buffer, const TransferDoneCallbackAsyncInfer &exec_done) :
+    m_type(Type::DATA),
+    m_pool(nullptr),
+    m_view(),
+    m_metadata(),
+    m_is_user_buffer(false),
+    m_should_call_exec_done(true),
+    m_action_status(HAILO_SUCCESS)
+{
+    set_additional_data(std::make_shared<PixBufferPipelineData>(buffer));
+    m_exec_done = [buffer_pool = m_pool, mem_view = m_view, is_user_buffer = m_is_user_buffer, exec_done = exec_done](hailo_status status){
+        exec_done(status);
+        release_buffer(buffer_pool, mem_view, is_user_buffer);
+    };
+}
 
 PipelineBuffer::PipelineBuffer(hailo_pix_buffer_t buffer) :
     m_type(Type::DATA),
     m_pool(nullptr),
     m_view(),
     m_metadata(),
-    m_is_user_buffer(false)
+    m_is_user_buffer(false),
+    m_should_call_exec_done(true)
 {
     set_additional_data(std::make_shared<PixBufferPipelineData>(buffer));
+    m_exec_done = [buffer_pool = m_pool, mem_view = m_view, is_user_buffer = m_is_user_buffer](hailo_status){
+        release_buffer(buffer_pool, mem_view, is_user_buffer);
+    };
 }
 
 PipelineBuffer::PipelineBuffer(PipelineBuffer &&other) :
@@ -102,6 +137,7 @@ PipelineBuffer::PipelineBuffer(PipelineBuffer &&other) :
     m_exec_done(std::move(other.m_exec_done)),
     m_metadata(std::move(other.m_metadata)),
     m_is_user_buffer(std::move(other.m_is_user_buffer)),
+    m_should_call_exec_done(std::exchange(other.m_should_call_exec_done, false)),
     m_action_status(std::move(other.m_action_status))
 {}
 
@@ -113,17 +149,15 @@ PipelineBuffer &PipelineBuffer::operator=(PipelineBuffer &&other)
     m_exec_done = std::move(other.m_exec_done);
     m_metadata = std::move(other.m_metadata);
     m_is_user_buffer = std::move(other.m_is_user_buffer);
+    m_should_call_exec_done = std::exchange(other.m_should_call_exec_done, false);
     m_action_status = std::move(other.m_action_status);
     return *this;
 }
 
 PipelineBuffer::~PipelineBuffer()
 {
-    if ((nullptr != m_pool) && (!m_is_user_buffer)) {
-        hailo_status status = m_pool->release_buffer(m_view);
-        if (HAILO_SUCCESS != status) {
-            LOGGER__ERROR("Releasing buffer in buffer pool failed! status = {}", status);
-        }
+    if (m_should_call_exec_done) {
+        m_exec_done(action_status());
     }
 }
 
@@ -228,8 +262,9 @@ void PipelineBuffer::set_metadata(Metadata &&val)
     m_metadata = std::move(val);
 }
 
-TransferDoneCallbackAsyncInfer PipelineBuffer::get_exec_done_cb() const
+TransferDoneCallbackAsyncInfer PipelineBuffer::get_exec_done_cb()
 {
+    m_should_call_exec_done = false;
     return m_exec_done;
 }
 
@@ -238,6 +273,16 @@ PipelineTimePoint PipelineBuffer::add_timestamp(bool should_measure)
     return should_measure ? std::chrono::steady_clock::now() : PipelineTimePoint{};
 }
 
+void PipelineBuffer::release_buffer(BufferPoolPtr buffer_pool_ptr, MemoryView mem_view, bool is_user_buffer)
+{
+    if ((nullptr != buffer_pool_ptr) && (!is_user_buffer)) {
+        hailo_status status = buffer_pool_ptr->release_buffer(mem_view);
+        if (HAILO_SUCCESS != status) {
+            LOGGER__CRITICAL("Releasing buffer in buffer pool failed! status = {}", status);
+        }
+    }
+}
+
 hailo_status PipelineBuffer::action_status()
 {
     return m_action_status;
@@ -301,7 +346,8 @@ BufferPool::BufferPool(size_t buffer_size, bool is_holding_user_buffers, bool me
     m_free_mem_views(std::move(free_mem_views)),
     m_done_cbs(std::move(done_cbs)),
     m_queue_size_accumulator(std::move(queue_size_accumulator))
-{}
+{
+}
 
 size_t BufferPool::buffer_size()
 {
@@ -329,15 +375,28 @@ hailo_status BufferPool::enqueue_buffer(MemoryView mem_view, const TransferDoneC
     return HAILO_SUCCESS;
 }
 
-bool BufferPool::is_full() {
-    return (m_max_buffer_count - m_buffers.size() == 0);
+bool BufferPool::is_full()
+{
+    return (m_max_buffer_count - m_free_mem_views.size_approx() == 0);
+}
+
+size_t BufferPool::num_of_buffers_in_pool()
+{
+    return m_done_cbs.size_approx();
+}
+
+bool BufferPool::is_holding_user_buffers()
+{
+    return m_is_holding_user_buffers;
 }
 
-hailo_status BufferPool::allocate_buffers(bool is_dma_able)
+// This function changes the m_max_buffer_count to be num_of_buffers, and it must be called when pool is empty of buffers
+hailo_status BufferPool::allocate_buffers(bool is_dma_able, size_t num_of_buffers)
 {
     m_is_holding_user_buffers = false;
-    size_t buffer_count = m_max_buffer_count - m_buffers.size();
-    for (size_t i = 0; i < buffer_count; i++) {
+    CHECK(m_free_mem_views.size_approx() == 0, HAILO_INTERNAL_FAILURE, "Cannot allocate buffers for pool, since pool is not empty!");
+    m_max_buffer_count = num_of_buffers;
+    for (size_t i = 0; i < m_max_buffer_count; i++) {
         BufferStorageParams buffer_storage_params;
         if (is_dma_able) {
             buffer_storage_params = BufferStorageParams::create_dma();
@@ -352,19 +411,23 @@ hailo_status BufferPool::allocate_buffers(bool is_dma_able)
     return HAILO_SUCCESS;
 }
 
-Expected<PipelineBuffer> BufferPool::acquire_buffer(std::chrono::milliseconds timeout)
+Expected<PipelineBuffer> BufferPool::acquire_buffer(std::chrono::milliseconds timeout,
+    bool ignore_shutdown_event)
 {
-    auto mem_view = acquire_free_mem_view(timeout);
+    auto mem_view = acquire_free_mem_view(timeout, ignore_shutdown_event);
+    if ((HAILO_SUCCESS != mem_view.status()) && (m_is_holding_user_buffers)) {
+        auto done_cb = acquire_on_done_cb(timeout, true);
+        CHECK_EXPECTED(done_cb);
+
+        done_cb.value()(mem_view.status());
+    }
     if (HAILO_SHUTDOWN_EVENT_SIGNALED == mem_view.status()) {
         return make_unexpected(HAILO_SHUTDOWN_EVENT_SIGNALED);
     }
     CHECK_EXPECTED(mem_view);
 
     if (m_is_holding_user_buffers) {
-        auto done_cb = acquire_on_done_cb(timeout);
-        if (HAILO_SHUTDOWN_EVENT_SIGNALED == done_cb.status()) {
-            return make_unexpected(HAILO_SHUTDOWN_EVENT_SIGNALED);
-        }
+        auto done_cb = acquire_on_done_cb(timeout, true);
         CHECK_EXPECTED(done_cb);
 
         return PipelineBuffer(mem_view.release(), done_cb.release(), m_is_holding_user_buffers, shared_from_this(), m_measure_vstream_latency);
@@ -376,6 +439,12 @@ Expected<PipelineBuffer> BufferPool::acquire_buffer(std::chrono::milliseconds ti
 Expected<std::shared_ptr<PipelineBuffer>> BufferPool::acquire_buffer_ptr(std::chrono::milliseconds timeout)
 {
     auto mem_view = acquire_free_mem_view(timeout);
+    if ((HAILO_SUCCESS != mem_view.status()) && (m_is_holding_user_buffers)) {
+        auto done_cb = acquire_on_done_cb(timeout, true);
+        CHECK_EXPECTED(done_cb);
+
+        done_cb.value()(mem_view.status());
+    }
     if (HAILO_SHUTDOWN_EVENT_SIGNALED == mem_view.status()) {
         return make_unexpected(HAILO_SHUTDOWN_EVENT_SIGNALED);
     }
@@ -383,10 +452,7 @@ Expected<std::shared_ptr<PipelineBuffer>> BufferPool::acquire_buffer_ptr(std::ch
 
     std::shared_ptr<PipelineBuffer> ptr = nullptr;
     if (m_is_holding_user_buffers) {
-        auto done_cb = acquire_on_done_cb(timeout);
-        if (HAILO_SHUTDOWN_EVENT_SIGNALED == done_cb.status()) {
-            return make_unexpected(HAILO_SHUTDOWN_EVENT_SIGNALED);
-        }
+        auto done_cb = acquire_on_done_cb(timeout, true);
         CHECK_EXPECTED(done_cb);
 
         ptr = make_shared_nothrow<PipelineBuffer>(mem_view.release(), done_cb.release(), m_is_holding_user_buffers, shared_from_this(), m_measure_vstream_latency);
@@ -398,13 +464,14 @@ Expected<std::shared_ptr<PipelineBuffer>> BufferPool::acquire_buffer_ptr(std::ch
     return ptr;
 }
 
-Expected<MemoryView> BufferPool::acquire_free_mem_view(std::chrono::milliseconds timeout)
+Expected<MemoryView> BufferPool::acquire_free_mem_view(std::chrono::milliseconds timeout,
+    bool ignore_shutdown_event)
 {
     if (nullptr != m_queue_size_accumulator) {
         m_queue_size_accumulator->add_data_point(static_cast<double>(m_free_mem_views.size_approx()));
     }
 
-    auto mem_view = m_free_mem_views.dequeue(timeout);
+    auto mem_view = m_free_mem_views.dequeue(timeout, ignore_shutdown_event);
     if (HAILO_SHUTDOWN_EVENT_SIGNALED == mem_view.status()) {
         return make_unexpected(mem_view.status());
     }
@@ -419,9 +486,10 @@ Expected<MemoryView> BufferPool::acquire_free_mem_view(std::chrono::milliseconds
     return mem_view.release();
 }
 
-Expected<TransferDoneCallbackAsyncInfer> BufferPool::acquire_on_done_cb(std::chrono::milliseconds timeout)
+Expected<TransferDoneCallbackAsyncInfer> BufferPool::acquire_on_done_cb(std::chrono::milliseconds timeout,
+    bool ignore_shutdown_event)
 {
-    auto done_cb = m_done_cbs.dequeue(timeout);
+    auto done_cb = m_done_cbs.dequeue(timeout, ignore_shutdown_event);
     if (HAILO_SHUTDOWN_EVENT_SIGNALED == done_cb.status()) {
         return make_unexpected(done_cb.status());
     }
@@ -641,6 +709,16 @@ hailo_status PipelinePad::abort()
     return m_element.abort();
 }
 
+hailo_status PipelinePad::terminate(hailo_status error_status)
+{
+    return m_element.terminate(error_status);
+}
+
+hailo_status PipelinePad::dequeue_user_buffers(hailo_status error_status)
+{
+    return m_element.dequeue_user_buffers(error_status);
+}
+
 hailo_status PipelinePad::wait_for_finish()
 {
     return m_element.wait_for_finish();
@@ -735,47 +813,6 @@ const PipelineElement &PipelinePad::element() const
     return m_element;
 }
 
-SourceElement::SourceElement(const std::string &name, DurationCollector &&duration_collector,
-                             std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
-                             PipelineDirection pipeline_direction) :
-    PipelineElement(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction)
-{
-    m_sources.emplace_back(*this, name, PipelinePad::Type::SOURCE);
-}
-
-PipelinePad &SourceElement::source()
-{
-    return m_sources[0];
-}
-
-SinkElement::SinkElement(const std::string &name, DurationCollector &&duration_collector,
-                         std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
-                         PipelineDirection pipeline_direction) :
-    PipelineElement(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction)
-{
-    m_sinks.emplace_back(*this, name, PipelinePad::Type::SINK);
-}
-
-PipelinePad &SinkElement::sink()
-{
-    return m_sinks[0];
-}
-
-IntermediateElement::IntermediateElement(const std::string &name, DurationCollector &&duration_collector,
-                                         std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
-                                         PipelineDirection pipeline_direction) :
-    PipelineElement(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction)
-{
-    m_sinks.emplace_back(*this, name, PipelinePad::Type::SINK);
-    m_sources.emplace_back(*this, name, PipelinePad::Type::SOURCE);
-}
-
-std::vector<PipelinePad*> IntermediateElement::execution_pads()
-{
-    std::vector<PipelinePad*> result{&next_pad()};
-    return result;
-}
-
 PipelineElement::PipelineElement(const std::string &name, DurationCollector &&duration_collector,
                                  std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
                                  PipelineDirection pipeline_direction) :
@@ -784,7 +821,9 @@ PipelineElement::PipelineElement(const std::string &name, DurationCollector &&du
     m_pipeline_status(std::move(pipeline_status)),
     m_sinks(),
     m_sources(),
-    m_pipeline_direction(pipeline_direction)
+    m_pipeline_direction(pipeline_direction),
+    m_is_terminating_element(false),
+    m_is_terminated(false)
 {}
 
 AccumulatorPtr PipelineElement::get_fps_accumulator()
@@ -797,6 +836,11 @@ AccumulatorPtr PipelineElement::get_latency_accumulator()
     return m_duration_collector.get_latency_accumulator();
 }
 
+bool PipelineElement::is_terminating_element()
+{
+    return m_is_terminating_element;
+}
+
 std::vector<AccumulatorPtr> PipelineElement::get_queue_size_accumulators()
 {
     return std::vector<AccumulatorPtr>();
@@ -829,16 +873,6 @@ std::string PipelineElement::description() const
     return element_description.str();
 }
 
-void PipelineElement::set_on_cant_pull_callback(std::function<void()> callback)
-{
-    m_cant_pull_callback = callback;
-}
-
-void PipelineElement::set_on_can_pull_callback(std::function<void()> callback)
-{
-    m_can_pull_callback = callback;
-}
-
 hailo_status PipelineElement::enqueue_execution_buffer(MemoryView mem_view, const TransferDoneCallbackAsyncInfer &exec_done, const std::string &source_name)
 {
     (void)mem_view;
@@ -853,13 +887,52 @@ hailo_status PipelineElement::enqueue_execution_buffer(MemoryView mem_view, cons
     return enqueue_execution_buffer(mem_view, exec_done, "");
 };
 
-hailo_status PipelineElement::fill_buffer_pools(bool is_dma_able)
+hailo_status PipelineElement::empty_buffer_pool(BufferPoolPtr pool, hailo_status error_status, std::chrono::milliseconds timeout)
+{
+    if (!pool->is_holding_user_buffers()) {
+        return HAILO_SUCCESS;
+    }
+
+    while (pool->num_of_buffers_in_pool() > 0) {
+        auto acquired_buffer = pool->acquire_buffer(timeout, true);
+
+        if (HAILO_SUCCESS != acquired_buffer.status()) {
+            LOGGER__CRITICAL("Failed to aquire from pool in {} element!", name());
+            return acquired_buffer.status();
+        }
+
+        auto exec_done_cb = acquired_buffer->get_exec_done_cb();
+        exec_done_cb(error_status);
+    }
+    return HAILO_SUCCESS;
+}
+
+hailo_status PipelineElement::fill_buffer_pool(bool /*is_dma_able*/, size_t /*num_of_buffers*/, const uint32_t /*source_index*/)
 {
-    (void)is_dma_able;
     return HAILO_NOT_IMPLEMENTED;
 }
 
-Expected<bool> PipelineElement::are_buffer_pools_full()
+Expected<bool> PipelineElement::can_push_buffer_upstream(const uint32_t /*source_index*/)
+{
+    return make_unexpected(HAILO_NOT_IMPLEMENTED);
+}
+
+Expected<bool> PipelineElement::can_push_buffer_downstream(const uint32_t /*source_index*/)
+{
+    return make_unexpected(HAILO_NOT_IMPLEMENTED);
+}
+
+hailo_status PipelineElement::fill_buffer_pool(bool /*is_dma_able*/, size_t /*num_of_buffers*/, const std::string &/*source_name*/)
+{
+    return HAILO_NOT_IMPLEMENTED;
+}
+
+Expected<bool> PipelineElement::can_push_buffer_upstream(const std::string &/*source_name*/)
+{
+    return make_unexpected(HAILO_NOT_IMPLEMENTED);
+}
+
+Expected<bool> PipelineElement::can_push_buffer_downstream(const std::string &/*source_name*/)
 {
     return make_unexpected(HAILO_NOT_IMPLEMENTED);
 }
@@ -899,6 +972,16 @@ hailo_status PipelineElement::clear_abort()
     return execute_clear_abort();
 }
 
+hailo_status PipelineElement::terminate(hailo_status error_status)
+{
+    return execute_terminate(error_status);
+}
+
+hailo_status PipelineElement::dequeue_user_buffers(hailo_status error_status)
+{
+    return execute_dequeue_user_buffers(error_status);
+}
+
 hailo_status PipelineElement::wait_for_finish()
 {
     return execute_wait_for_finish();
@@ -939,1320 +1022,29 @@ hailo_status PipelineElement::execute_clear_abort()
     return execute([&](auto *pad){ return pad->clear_abort(); });
 }
 
-hailo_status PipelineElement::execute_wait_for_finish()
-{
-    return execute([&](auto *pad){ return pad->wait_for_finish(); });
-}
-
-hailo_status PipelineElement::execute(std::function<hailo_status(PipelinePad*)> func)
-{
-    for (auto pad : execution_pads()) {
-        auto status = func(pad);
-        CHECK_SUCCESS(status);
-    }
-    return HAILO_SUCCESS;
-}
-
-void PipelineElement::handle_non_recoverable_async_error(hailo_status error_status)
-{
-    if (HAILO_SUCCESS != m_pipeline_status->load()){
-        LOGGER__ERROR("Non-recoverable Async Infer Pipeline error. status error code: {}", error_status);
-        m_pipeline_status->store(error_status);
-    }
-}
-
-std::vector<PipelinePad*> SourceElement::execution_pads()
-{
-    std::vector<PipelinePad*> result{&source()};
-    return result;
-}
-
-std::vector<PipelinePad*> SinkElement::execution_pads()
-{
-    std::vector<PipelinePad*> result{&sink()};
-    return result;
-}
-
-FilterElement::FilterElement(const std::string &name, DurationCollector &&duration_collector,
-                             std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
-                             PipelineDirection pipeline_direction, BufferPoolPtr buffer_pool,
-                             std::chrono::milliseconds timeout) :
-    IntermediateElement(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction),
-    m_pool(buffer_pool),
-    m_timeout(timeout)
-{}
-
-hailo_status FilterElement::run_push(PipelineBuffer &&buffer, const PipelinePad &/*sink*/)
-{
-    auto output = action(std::move(buffer), PipelineBuffer());
-    if (HAILO_SHUTDOWN_EVENT_SIGNALED == output.status()) {
-        return output.status();
-    }
-    CHECK_EXPECTED_AS_STATUS(output);
-
-    hailo_status status = next_pad().run_push(output.release());
-    if (HAILO_SHUTDOWN_EVENT_SIGNALED == status) {
-        LOGGER__INFO("run_push of {} was shutdown!", name());
-        return status;
-    }
-    if (HAILO_STREAM_ABORTED_BY_USER == status) {
-        LOGGER__INFO("run_push of {} was aborted!", name());
-        return status;
-    }
-    CHECK_SUCCESS(status);
-
-    return HAILO_SUCCESS;
-}
-
-void FilterElement::run_push_async(PipelineBuffer &&buffer, const PipelinePad &/*sink*/)
-{
-    assert(m_pipeline_direction == PipelineDirection::PUSH);
-    if (HAILO_SUCCESS != buffer.action_status()) {
-        auto buffer_from_pool = m_pool->get_available_buffer(PipelineBuffer(), m_timeout);
-        if (HAILO_SUCCESS != buffer_from_pool.status()) {
-            next_pad().run_push_async(PipelineBuffer(buffer_from_pool.status()));
-        } else {
-            next_pad().run_push_async(buffer_from_pool.release());
-        }
-        return;
-    }
-
-    auto output = action(std::move(buffer), PipelineBuffer());
-    if (HAILO_SUCCESS == output.status()) {
-        next_pad().run_push_async(output.release());
-    } else {
-        next_pad().run_push_async(PipelineBuffer(output.status()));
-    }
-    return;
-}
-
-Expected<PipelineBuffer> FilterElement::run_pull(PipelineBuffer &&optional, const PipelinePad &/*source*/)
-{
-    auto buffer = next_pad().run_pull();
-    if (HAILO_SHUTDOWN_EVENT_SIGNALED == buffer.status()) {
-        LOGGER__INFO("run_pull in FilterElement was shutdown!");
-        return make_unexpected(buffer.status());
-    }
-    CHECK_EXPECTED(buffer);
-    return action(buffer.release(), std::move(optional));
-}
-
-std::vector<AccumulatorPtr> FilterElement::get_queue_size_accumulators()
-{
-    if (nullptr == m_pool || nullptr == m_pool->get_queue_size_accumulator()) {
-        return std::vector<AccumulatorPtr>();
-    }
-    return {m_pool->get_queue_size_accumulator()};
-}
-
-hailo_status FilterElement::enqueue_execution_buffer(MemoryView mem_view, const TransferDoneCallbackAsyncInfer &exec_done, const std::string &source_name)
-{
-    (void)source_name;
-
-    auto status = m_pool->enqueue_buffer(mem_view, exec_done);
-    CHECK_SUCCESS(status);
-
-    return HAILO_SUCCESS;
-}
-
-Expected<bool> FilterElement::are_buffer_pools_full()
-{
-    return m_pool->is_full();
-}
-
-hailo_status FilterElement::fill_buffer_pools(bool is_dma_able)
-{
-    auto status = m_pool->allocate_buffers(is_dma_able);
-    CHECK_SUCCESS(status);
-
-    return HAILO_SUCCESS;
-}
-
-Expected<SpscQueue<PipelineBuffer>> BaseQueueElement::create_queue(size_t queue_size, EventPtr shutdown_event)
+hailo_status PipelineElement::execute_terminate(hailo_status error_status)
 {
-    auto queue = SpscQueue<PipelineBuffer>::create(queue_size, shutdown_event);
-    CHECK_EXPECTED(queue);
-
-    return queue.release();
-}
-
-BaseQueueElement::BaseQueueElement(SpscQueue<PipelineBuffer> &&queue, EventPtr shutdown_event, const std::string &name,
-                                   std::chrono::milliseconds timeout, DurationCollector &&duration_collector,
-                                   AccumulatorPtr &&queue_size_accumulator, std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
-                                   Event &&activation_event, Event &&deactivation_event, PipelineDirection pipeline_direction) :
-    IntermediateElement(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction),
-    m_queue(std::move(queue)),
-    m_shutdown_event(shutdown_event),
-    m_timeout(timeout),
-    m_is_thread_running(true),
-    m_activation_event(std::move(activation_event)),
-    m_deactivation_event(std::move(deactivation_event)),
-    m_queue_size_accumulator(std::move(queue_size_accumulator)),
-    m_is_run_in_thread_running(false)
-{}
-
-BaseQueueElement::~BaseQueueElement()
-{
-    LOGGER__INFO("Queue element {} has {} frames in his Queue on destruction", name(), m_queue.size_approx());
-}
-
-void BaseQueueElement::start_thread()
-{
-    m_thread = std::thread([this] () {
-        OsUtils::set_current_thread_name(thread_name());
-        while (m_is_thread_running.load()) {
-            auto status = m_activation_event.wait(INIFINITE_TIMEOUT());
-
-            if (!m_is_thread_running) {
-                LOGGER__INFO("Thread in element {} is not running anymore, exiting..", this->name());
-                break;
-            }
-            if (HAILO_SUCCESS == status) {
-                {
-                    std::unique_lock<std::mutex> lock(m_mutex);
-                    m_is_run_in_thread_running = true;
-                }
-                m_cv.notify_all();
-
-                status = run_in_thread();
-
-                {
-                    std::unique_lock<std::mutex> lock(m_mutex);
-                    m_is_run_in_thread_running = false;
-                }
-                m_cv.notify_all();
-            }
-
-            if (HAILO_SUCCESS != status) {
-                if (HAILO_SHUTDOWN_EVENT_SIGNALED != status) {
-                    // We do not want to log error for HAILO_STREAM_ABORTED_BY_USER
-                    if (HAILO_STREAM_ABORTED_BY_USER != status) {
-                        LOGGER__ERROR("Queue element {} run in thread function failed! status = {}", this->name(), status);
-                    }
-
-                    // Store the real error in pipeline_status
-                    m_pipeline_status->store(status);
-
-                    // Signal other threads to stop
-                    hailo_status shutdown_status = m_shutdown_event->signal();
-                    if (HAILO_SUCCESS != shutdown_status) {
-                        LOGGER__CRITICAL("Failed shutting down queue with status {}", shutdown_status);
-                    }
-                }
-                //Thread has done its execution. Mark to the thread to wait for activation again
-                hailo_status event_status = m_activation_event.reset();
-                if (HAILO_SUCCESS != event_status) {
-                    LOGGER__CRITICAL("Failed reset activation event of element {}, with status {}", this->name(), event_status);
-                }
-
-                // Mark to deactivation function that the thread is done
-                event_status = m_deactivation_event.signal();
-                if (HAILO_SUCCESS != event_status) {
-                    LOGGER__CRITICAL("Failed signaling deactivation event of element {}, with status {}", this->name(), event_status);
-                }
-            }
-        }
-    });
+    m_is_terminated = true;
+    return execute([&](auto *pad){ return pad->terminate(error_status); });
 }
 
-void BaseQueueElement::stop_thread()
+hailo_status PipelineElement::execute_dequeue_user_buffers(hailo_status error_status)
 {
-    m_shutdown_event->signal();
-
-    // Mark thread as not running, then wake it in case it is waiting on m_activation_event
-    m_is_thread_running = false;
-    m_activation_event.signal();
-
-    if (m_thread.joinable()) {
-        m_thread.join();
-    }
-}
-
-std::vector<AccumulatorPtr> BaseQueueElement::get_queue_size_accumulators()
-{
-    if (nullptr == m_queue_size_accumulator) {
-        return std::vector<AccumulatorPtr>();
-    }
-    return {m_queue_size_accumulator};
-}
-
-hailo_status BaseQueueElement::execute_activate()
-{
-    hailo_status status = PipelineElement::execute_activate();
-    CHECK_SUCCESS(status);
-
-    status = m_activation_event.signal();
-    CHECK_SUCCESS(status);
-
-    return HAILO_SUCCESS;
+    return execute([&](auto *pad){ return pad->dequeue_user_buffers(error_status); });
 }
 
-hailo_status BaseQueueElement::execute_post_deactivate(bool should_clear_abort)
+hailo_status PipelineElement::execute_wait_for_finish()
 {
-    hailo_status status = m_deactivation_event.wait(INIFINITE_TIMEOUT());
-    if (HAILO_SUCCESS != status) {
-        LOGGER__ERROR("Failed to post_deactivate() in {} with status {}", name(), status);
-    }
-
-    status = m_deactivation_event.reset();
-    if (HAILO_SUCCESS != status) {
-        LOGGER__ERROR("Failed to reset of deactivation event in {} with status {}", name(), status);
-    }
-
-    return PipelineElement::execute_post_deactivate(should_clear_abort);
+    return execute([&](auto *pad){ return pad->wait_for_finish(); });
 }
 
-hailo_status BaseQueueElement::execute_clear()
+hailo_status PipelineElement::execute(std::function<hailo_status(PipelinePad*)> func)
 {
-    auto status = PipelineElement::execute_clear();
-    if (HAILO_SUCCESS != status) {
-        LOGGER__ERROR("Failed to clear() in {} with status {}", name(), status);
+    for (auto pad : execution_pads()) {
+        auto status = func(pad);
+        CHECK_SUCCESS(status);
     }
-
-    auto queue_status = m_queue.clear();
-    CHECK_SUCCESS(queue_status, "Failed to clear() queue in {} with status {}", name(), status);
-
-    return status;
-}
-
-hailo_status BaseQueueElement::execute_wait_for_finish()
-{
-    std::unique_lock<std::mutex> lock(m_mutex);
-    m_cv.wait(lock, [this] () {
-        return !m_is_run_in_thread_running;
-    });
     return HAILO_SUCCESS;
 }
 
-hailo_status BaseQueueElement::enqueue_execution_buffer(MemoryView mem_view, const TransferDoneCallbackAsyncInfer &exec_done, const std::string &source_name)
-{
-    (void)source_name;
-    return m_sinks[0].prev()->element().enqueue_execution_buffer(mem_view, exec_done, m_sinks[0].prev()->name());
-}
-
-Expected<bool> BaseQueueElement::are_buffer_pools_full()
-{
-    return m_sinks[0].prev()->element().are_buffer_pools_full();
-}
-
-hailo_status BaseQueueElement::fill_buffer_pools(bool is_dma_able)
-{
-    return m_sinks[0].prev()->element().fill_buffer_pools(is_dma_able);
-}
-
-hailo_status PushQueueElement::execute_abort()
-{
-    auto status = m_shutdown_event->reset();
-    CHECK_SUCCESS(status);
-    m_pipeline_status->store(HAILO_STREAM_ABORTED_BY_USER);
-    status = PipelineElement::execute_abort();
-    CHECK_SUCCESS(status);
-    return m_activation_event.signal();
-}
-
-hailo_status BaseQueueElement::execute_clear_abort()
-{
-    auto status = m_shutdown_event->reset();
-    CHECK_SUCCESS(status);
-    m_pipeline_status->store(HAILO_SUCCESS);
-    return PipelineElement::execute_clear_abort();
-}
-
-hailo_status BaseQueueElement::set_timeout(std::chrono::milliseconds timeout)
-{
-    m_timeout = timeout;
-    return HAILO_SUCCESS;
-}
-
-std::string BaseQueueElement::description() const
-{
-    std::stringstream element_description;
-
-    element_description << "(" << this->name();
-    if (HAILO_INFINITE != this->m_timeout.count()) {
-        element_description << " | timeout: "  << std::chrono::duration_cast<std::chrono::seconds>(this->m_timeout).count() << "s";
-    }
-    element_description << ")";
-
-    return element_description.str();
-}
-
-hailo_status BaseQueueElement::pipeline_status()
-{
-    auto status = m_pipeline_status->load();
-
-    // We treat HAILO_STREAM_ABORTED_BY_USER as success because it is caused by user action (aborting streams)
-    if (HAILO_STREAM_ABORTED_BY_USER == status) {
-        return HAILO_SUCCESS;
-    }
-    return status;
-}
-
-Expected<std::shared_ptr<PushQueueElement>> PushQueueElement::create(const std::string &name, std::chrono::milliseconds timeout,
-        size_t queue_size, hailo_pipeline_elem_stats_flags_t flags, EventPtr shutdown_event,
-        std::shared_ptr<std::atomic<hailo_status>> pipeline_status, PipelineDirection pipeline_direction)
-{
-    auto queue = BaseQueueElement::create_queue(queue_size, shutdown_event);
-    CHECK_EXPECTED(queue);
-
-    auto activation_event = Event::create(Event::State::not_signalled);
-    CHECK_EXPECTED(activation_event);
-
-    auto deactivation_event = Event::create(Event::State::not_signalled);
-    CHECK_EXPECTED(deactivation_event);
-
-    // TODO: Support fps/latency collection for queue elems (HRT-7711)
-    auto duration_collector = DurationCollector::create(HAILO_PIPELINE_ELEM_STATS_NONE);
-    CHECK_EXPECTED(duration_collector);
-
-    AccumulatorPtr queue_size_accumulator = nullptr;
-    if ((flags & HAILO_PIPELINE_ELEM_STATS_MEASURE_QUEUE_SIZE) != 0) {
-        queue_size_accumulator = make_shared_nothrow<FullAccumulator<double>>("queue_size");
-        CHECK_AS_EXPECTED(nullptr != queue_size_accumulator, HAILO_OUT_OF_HOST_MEMORY);
-    }
-
-    auto queue_ptr = make_shared_nothrow<PushQueueElement>(queue.release(), shutdown_event, name, timeout,
-        duration_collector.release(), std::move(queue_size_accumulator), std::move(pipeline_status),
-        activation_event.release(), deactivation_event.release(), pipeline_direction);
-    CHECK_AS_EXPECTED(nullptr != queue_ptr, HAILO_OUT_OF_HOST_MEMORY, "Creating PushQueueElement {} failed!", name);
-
-    LOGGER__INFO("Created {}", queue_ptr->name());
-
-    return queue_ptr;
-}
-
-Expected<std::shared_ptr<PushQueueElement>> PushQueueElement::create(const std::string &name, const hailo_vstream_params_t &vstream_params,
-        EventPtr shutdown_event, std::shared_ptr<std::atomic<hailo_status>> pipeline_status, PipelineDirection pipeline_direction)
-{
-    return PushQueueElement::create(name, std::chrono::milliseconds(vstream_params.timeout_ms),
-        vstream_params.queue_size, vstream_params.pipeline_elements_stats_flags, shutdown_event, pipeline_status, pipeline_direction);
-}
-
-PushQueueElement::PushQueueElement(SpscQueue<PipelineBuffer> &&queue, EventPtr shutdown_event, const std::string &name,
-                                   std::chrono::milliseconds timeout, DurationCollector &&duration_collector, 
-                                   AccumulatorPtr &&queue_size_accumulator, std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
-                                   Event &&activation_event, Event &&deactivation_event, PipelineDirection pipeline_direction, bool should_start_thread) :
-    BaseQueueElement(std::move(queue), shutdown_event, name, timeout, std::move(duration_collector), std::move(queue_size_accumulator),
-                     std::move(pipeline_status), std::move(activation_event), std::move(deactivation_event), pipeline_direction)
-{
-    if (should_start_thread) {
-        start_thread();
-    }
-}
-
-PushQueueElement::~PushQueueElement()
-{
-    stop_thread();
-}
-
-hailo_status PushQueueElement::run_push(PipelineBuffer &&buffer, const PipelinePad &/*sink*/)
-{
-    // TODO: Support fps/latency collection for queue elems (HRT-7711)
-    if (nullptr != m_queue_size_accumulator) {
-        m_queue_size_accumulator->add_data_point(static_cast<double>(m_queue.size_approx()));
-    }
-    auto status = m_pipeline_status->load();
-    if (HAILO_STREAM_ABORTED_BY_USER == status) {
-        LOGGER__INFO("run_push of {} was aborted!", name());
-        return status;
-    }
-    CHECK_SUCCESS(m_pipeline_status->load());
-    status = m_queue.enqueue(std::move(buffer), m_timeout);
-    if (HAILO_SHUTDOWN_EVENT_SIGNALED == status) {
-        auto queue_thread_status = pipeline_status();
-        CHECK_SUCCESS(queue_thread_status,
-            "Shutdown event was signaled in enqueue of queue element {} because thread has failed with status={}!", name(),
-            queue_thread_status);
-        LOGGER__INFO("Shutdown event was signaled in enqueue of queue element {}!", name());
-        return HAILO_SHUTDOWN_EVENT_SIGNALED;
-    }
-    CHECK_SUCCESS(status);
-    return HAILO_SUCCESS;
-}
-
-void PushQueueElement::run_push_async(PipelineBuffer &&/*buffer*/, const PipelinePad &/*sink*/) {
-    LOGGER__ERROR("run_push_async is not supported for {}", name());
-    assert(false);
-}
-
-Expected<PipelineBuffer> PushQueueElement::run_pull(PipelineBuffer &&/*optional*/, const PipelinePad &/*source*/)
-{
-    return make_unexpected(HAILO_INVALID_OPERATION);
-}
-
-hailo_status PushQueueElement::execute_deactivate()
-{
-    // Mark to the threads that deactivate() was called.
-    hailo_status status = m_queue.enqueue(PipelineBuffer(PipelineBuffer::Type::DEACTIVATE));
-    if (HAILO_SUCCESS != status) {
-        // We want to deactivate source even if enqueue failed
-        auto deactivation_status = PipelineElement::execute_deactivate();
-        CHECK_SUCCESS(deactivation_status);
-        if ((HAILO_STREAM_ABORTED_BY_USER == status) || (HAILO_SHUTDOWN_EVENT_SIGNALED == status)) {
-            LOGGER__INFO("enqueue() in element {} was aborted, got status = {}", name(), status);
-        }
-        else {
-             LOGGER__ERROR("enqueue() in element {} failed, got status = {}", name(), status);
-             return status;
-        }
-    }
-
-    return HAILO_SUCCESS;
-}
-
-PipelinePad &PushQueueElement::next_pad()
-{
-    // Note: The next elem to be run is downstream from this elem (i.e. buffers are pushed)
-    return *m_sources[0].next();
-}
-
-hailo_status PushQueueElement::run_in_thread()
-{
-    auto buffer = m_queue.dequeue(INIFINITE_TIMEOUT());
-    if (HAILO_SHUTDOWN_EVENT_SIGNALED == buffer.status()) {
-        LOGGER__INFO("Shutdown event was signaled in dequeue of queue element {}!", name());
-        return HAILO_SHUTDOWN_EVENT_SIGNALED;
-    }
-    CHECK_EXPECTED_AS_STATUS(buffer);
-
-    // Return if deactivated
-    if (PipelineBuffer::Type::DEACTIVATE == buffer->get_type()) {
-        hailo_status status = m_shutdown_event->signal();
-        CHECK_SUCCESS(status);
-
-        status = next_pad().deactivate();
-        if (HAILO_SUCCESS != status) {
-            LOGGER__ERROR("Deactivate of source in {} has failed with status {}", name(), status);
-        }
-
-        return HAILO_SHUTDOWN_EVENT_SIGNALED;
-    }
-
-    hailo_status status = next_pad().run_push(buffer.release());
-    if (HAILO_STREAM_ABORTED_BY_USER == status) {
-        LOGGER__INFO("run_push of {} was aborted!", name());
-        return status;
-    }
-    CHECK_SUCCESS(status);
-
-    return HAILO_SUCCESS;
-}
-
-Expected<std::shared_ptr<AsyncPushQueueElement>> AsyncPushQueueElement::create(const std::string &name, std::chrono::milliseconds timeout,
-        size_t queue_size, hailo_pipeline_elem_stats_flags_t flags, EventPtr shutdown_event,
-        std::shared_ptr<std::atomic<hailo_status>> pipeline_status, PipelineDirection pipeline_direction)
-{
-    auto queue = BaseQueueElement::create_queue(queue_size, shutdown_event);
-    CHECK_EXPECTED(queue);
-
-    auto activation_event = Event::create(Event::State::not_signalled);
-    CHECK_EXPECTED(activation_event);
-
-    auto deactivation_event = Event::create(Event::State::not_signalled);
-    CHECK_EXPECTED(deactivation_event);
-
-    // TODO: Support fps/latency collection for queue elems (HRT-7711)
-    auto duration_collector = DurationCollector::create(HAILO_PIPELINE_ELEM_STATS_NONE);
-    CHECK_EXPECTED(duration_collector);
-
-    AccumulatorPtr queue_size_accumulator = nullptr;
-    if ((flags & HAILO_PIPELINE_ELEM_STATS_MEASURE_QUEUE_SIZE) != 0) {
-        queue_size_accumulator = make_shared_nothrow<FullAccumulator<double>>("queue_size");
-        CHECK_AS_EXPECTED(nullptr != queue_size_accumulator, HAILO_OUT_OF_HOST_MEMORY);
-    }
-
-    auto queue_ptr = make_shared_nothrow<AsyncPushQueueElement>(queue.release(), shutdown_event, name, timeout,
-        duration_collector.release(), std::move(queue_size_accumulator), std::move(pipeline_status),
-        activation_event.release(), deactivation_event.release(), pipeline_direction);
-    CHECK_AS_EXPECTED(nullptr != queue_ptr, HAILO_OUT_OF_HOST_MEMORY, "Creating PushQueueElement {} failed!", name);
-
-    LOGGER__INFO("Created {}", queue_ptr->name());
-
-    return queue_ptr;
-}
-
-Expected<std::shared_ptr<AsyncPushQueueElement>> AsyncPushQueueElement::create(const std::string &name, const ElementBuildParams &build_params,
-    PipelineDirection pipeline_direction)
-{
-    return AsyncPushQueueElement::create(name, build_params.timeout, build_params.buffer_pool_size,
-            build_params.elem_stats_flags, build_params.shutdown_event, build_params.pipeline_status, pipeline_direction);
-}
-
-AsyncPushQueueElement::AsyncPushQueueElement(SpscQueue<PipelineBuffer> &&queue, EventPtr shutdown_event, const std::string &name,
-                                   std::chrono::milliseconds timeout, DurationCollector &&duration_collector, 
-                                   AccumulatorPtr &&queue_size_accumulator, std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
-                                   Event &&activation_event, Event &&deactivation_event, PipelineDirection pipeline_direction) :
-    PushQueueElement(std::move(queue), shutdown_event, name, timeout, std::move(duration_collector), std::move(queue_size_accumulator),
-                     std::move(pipeline_status), std::move(activation_event), std::move(deactivation_event), pipeline_direction, false)
-{
-    start_thread();
-}
-
-void AsyncPushQueueElement::run_push_async(PipelineBuffer &&buffer, const PipelinePad &/*sink*/)
-{
-    if (HAILO_SUCCESS != buffer.action_status()) {
-        auto status = m_queue.enqueue(std::move(buffer), m_timeout);
-        if (HAILO_SUCCESS != status) {
-            handle_non_recoverable_async_error(status);
-        }
-        return;
-    }
-    // TODO: Support fps/latency collection for queue elems (HRT-7711)
-    if (nullptr != m_queue_size_accumulator) {
-        m_queue_size_accumulator->add_data_point(static_cast<double>(m_queue.size_approx()));
-    }
-
-    auto status = m_queue.enqueue(std::move(buffer), m_timeout);
-    if (HAILO_SUCCESS != status && HAILO_SHUTDOWN_EVENT_SIGNALED != status) {
-        handle_non_recoverable_async_error(status);
-    }
-}
-
-void AsyncPushQueueElement::start_thread()
-{
-    m_thread = std::thread([this] () {
-        OsUtils::set_current_thread_name(thread_name());
-        while (m_is_thread_running.load()) {
-            auto status = m_activation_event.wait(INIFINITE_TIMEOUT());
-            if (HAILO_SUCCESS != status) {
-                handle_non_recoverable_async_error(status);
-            }
-
-            if (!m_is_thread_running) {
-                LOGGER__INFO("Thread in element {} is not running anymore, exiting..", this->name());
-                break;
-            }
-            if (HAILO_SUCCESS == status) {
-                {
-                    std::unique_lock<std::mutex> lock(m_mutex);
-                    m_is_run_in_thread_running = true;
-                }
-                m_cv.notify_all();
-
-                status = run_in_thread();
-                if (HAILO_SUCCESS != status) {
-                    handle_non_recoverable_async_error(status);
-                }
-
-                {
-                    std::unique_lock<std::mutex> lock(m_mutex);
-                    m_is_run_in_thread_running = false;
-                }
-                m_cv.notify_all();
-            }
-        }
-    });
-}
-
-hailo_status AsyncPushQueueElement::run_push(PipelineBuffer &&/*buffer*/, const PipelinePad &/*sink*/)
-{
-    return HAILO_INVALID_OPERATION;
-}
-
-hailo_status AsyncPushQueueElement::run_in_thread()
-{
-    auto buffer = m_queue.dequeue(INIFINITE_TIMEOUT());
-    switch (buffer.status()) {
-    case HAILO_SHUTDOWN_EVENT_SIGNALED:
-        break;
-    
-    case HAILO_SUCCESS:
-        next_pad().run_push_async(buffer.release());
-        break;
-
-    default:
-        next_pad().run_push_async(PipelineBuffer(buffer.status()));
-    }
-    return buffer.status();
-}
-
-
-Expected<std::shared_ptr<PullQueueElement>> PullQueueElement::create(const std::string &name, std::chrono::milliseconds timeout,
-        size_t queue_size, hailo_pipeline_elem_stats_flags_t flags, EventPtr shutdown_event,
-        std::shared_ptr<std::atomic<hailo_status>> pipeline_status, PipelineDirection pipeline_direction)
-{
-    auto queue = BaseQueueElement::create_queue(queue_size, shutdown_event);
-    CHECK_EXPECTED(queue);
-
-    auto activation_event = Event::create(Event::State::not_signalled);
-    CHECK_EXPECTED(activation_event);
-
-    auto deactivation_event = Event::create(Event::State::not_signalled);
-    CHECK_EXPECTED(deactivation_event);
-
-    // TODO: Support fps/latency collection for queue elems (HRT-7711)
-    auto duration_collector = DurationCollector::create(HAILO_PIPELINE_ELEM_STATS_NONE);
-    CHECK_EXPECTED(duration_collector);
-
-    AccumulatorPtr queue_size_accumulator = nullptr;
-    if ((flags & HAILO_PIPELINE_ELEM_STATS_MEASURE_QUEUE_SIZE) != 0) {
-        queue_size_accumulator = make_shared_nothrow<FullAccumulator<double>>("queue_size");
-        CHECK_AS_EXPECTED(nullptr != queue_size_accumulator, HAILO_OUT_OF_HOST_MEMORY);
-    }
-
-    auto queue_ptr = make_shared_nothrow<PullQueueElement>(queue.release(), shutdown_event, name, timeout,
-        duration_collector.release(), std::move(queue_size_accumulator), std::move(pipeline_status),
-        activation_event.release(), deactivation_event.release(), pipeline_direction);
-    CHECK_AS_EXPECTED(nullptr != queue_ptr, HAILO_OUT_OF_HOST_MEMORY, "Creating PullQueueElement {} failed!", name);
-
-    LOGGER__INFO("Created {}", queue_ptr->name());
-
-    return queue_ptr;
-}
-Expected<std::shared_ptr<PullQueueElement>> PullQueueElement::create(const std::string &name, const hailo_vstream_params_t &vstream_params,
-        EventPtr shutdown_event, std::shared_ptr<std::atomic<hailo_status>> pipeline_status, PipelineDirection pipeline_direction)
-{
-    return PullQueueElement::create(name, std::chrono::milliseconds(vstream_params.timeout_ms),
-        vstream_params.queue_size, vstream_params.pipeline_elements_stats_flags, shutdown_event, pipeline_status, pipeline_direction);
-}
-
-PullQueueElement::PullQueueElement(SpscQueue<PipelineBuffer> &&queue, EventPtr shutdown_event, const std::string &name,
-                                   std::chrono::milliseconds timeout, DurationCollector &&duration_collector,
-                                   AccumulatorPtr &&queue_size_accumulator, std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
-                                   Event &&activation_event, Event &&deactivation_event, PipelineDirection pipeline_direction) :
-    BaseQueueElement(std::move(queue), shutdown_event, name, timeout, std::move(duration_collector), std::move(queue_size_accumulator),
-                     std::move(pipeline_status), std::move(activation_event), std::move(deactivation_event), pipeline_direction)
-{
-    start_thread();
-}
-
-PullQueueElement::~PullQueueElement()
-{
-    stop_thread();
-}
-
-hailo_status PullQueueElement::run_push(PipelineBuffer &&/*buffer*/, const PipelinePad &/*sink*/)
-{
-    return HAILO_INVALID_OPERATION;
-}
-
-void PullQueueElement::run_push_async(PipelineBuffer &&/*buffer*/, const PipelinePad &/*sink*/)
-{
-    LOGGER__ERROR("run_push_async is not supported for {}", name());
-    assert(false);
-}
-
-Expected<PipelineBuffer> PullQueueElement::run_pull(PipelineBuffer &&optional, const PipelinePad &/*sink*/)
-{
-    // TODO: Support fps/latency collection for queue elems (HRT-7711)
-    CHECK_AS_EXPECTED(!optional, HAILO_INVALID_ARGUMENT, "Optional buffer is not allowed in queue element!");
-
-    if (nullptr != m_queue_size_accumulator) {
-        m_queue_size_accumulator->add_data_point(static_cast<double>(m_queue.size_approx()));
-    }
-    auto output = m_queue.dequeue(m_timeout);
-    if (HAILO_SHUTDOWN_EVENT_SIGNALED == output.status()) {
-        auto queue_thread_status = pipeline_status();
-        CHECK_SUCCESS_AS_EXPECTED(queue_thread_status,
-            "Shutdown event was signaled in dequeue of queue element {} because thread has failed with status={}!", name(),
-            queue_thread_status);
-        LOGGER__INFO("Shutdown event was signaled in dequeue of queue element {}!", name());
-        return make_unexpected(HAILO_SHUTDOWN_EVENT_SIGNALED);
-    }
-    CHECK_EXPECTED(output);
-
-    return output;
-}
-
-hailo_status PullQueueElement::execute_deactivate()
-{
-    hailo_status status = PipelineElement::execute_deactivate();
-    auto shutdown_event_status = m_shutdown_event->signal();
-    CHECK_SUCCESS(status);
-    CHECK_SUCCESS(shutdown_event_status);
-
-    return HAILO_SUCCESS;
-}
-
-PipelinePad &PullQueueElement::next_pad()
-{
-    // Note: The next elem to be run is upstream from this elem (i.e. buffers are pulled)
-    return *m_sinks[0].prev();
-}
-
-hailo_status PullQueueElement::run_in_thread()
-{
-    auto buffer = next_pad().run_pull();
-    if (HAILO_SHUTDOWN_EVENT_SIGNALED == buffer.status()) {
-        LOGGER__INFO("Shutdown event was signaled in run_pull of queue element {}!", name());
-        return HAILO_SHUTDOWN_EVENT_SIGNALED;
-    }
-    if (HAILO_STREAM_ABORTED_BY_USER == buffer.status()) {
-        LOGGER__INFO("run_pull of queue element {} was aborted!", name());
-        return HAILO_STREAM_ABORTED_BY_USER;
-    }
-    if (HAILO_NETWORK_GROUP_NOT_ACTIVATED == buffer.status()) {
-        LOGGER__INFO("run_pull of queue element {} was called before network_group is activated!", name());
-        return HAILO_NETWORK_GROUP_NOT_ACTIVATED;
-    }
-    CHECK_EXPECTED_AS_STATUS(buffer);
-    
-    hailo_status status = m_queue.enqueue(buffer.release(), INIFINITE_TIMEOUT());
-    if (HAILO_SHUTDOWN_EVENT_SIGNALED == status) {
-        LOGGER__INFO("Shutdown event was signaled in enqueue of queue element {}!", name());
-        return HAILO_SHUTDOWN_EVENT_SIGNALED;
-    }
-    CHECK_SUCCESS(status);
-
-    return HAILO_SUCCESS;
-}
-
-Expected<std::shared_ptr<UserBufferQueueElement>> UserBufferQueueElement::create(const std::string &name, std::chrono::milliseconds timeout,
-    hailo_pipeline_elem_stats_flags_t flags, EventPtr shutdown_event, std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
-    PipelineDirection pipeline_direction)
-{
-    auto pending_buffer_queue = BaseQueueElement::create_queue(1, shutdown_event);
-    CHECK_EXPECTED(pending_buffer_queue);
-
-    auto full_buffer_queue = BaseQueueElement::create_queue(1, shutdown_event);
-    CHECK_EXPECTED(full_buffer_queue);
-
-    auto activation_event = Event::create(Event::State::not_signalled);
-    CHECK_EXPECTED(activation_event);
-
-    auto deactivation_event = Event::create(Event::State::not_signalled);
-    CHECK_EXPECTED(deactivation_event);
-
-    // TODO: Support fps/latency collection for queue elems (HRT-7711)
-    auto duration_collector = DurationCollector::create(HAILO_PIPELINE_ELEM_STATS_NONE);
-    CHECK_EXPECTED(duration_collector);
-
-    AccumulatorPtr queue_size_accumulator = nullptr;
-    if ((flags & HAILO_PIPELINE_ELEM_STATS_MEASURE_QUEUE_SIZE) != 0) {
-        queue_size_accumulator = make_shared_nothrow<FullAccumulator<double>>("queue_size");
-        CHECK_AS_EXPECTED(nullptr != queue_size_accumulator, HAILO_OUT_OF_HOST_MEMORY);
-    }
-
-    auto queue_ptr = make_shared_nothrow<UserBufferQueueElement>(pending_buffer_queue.release(),
-        full_buffer_queue.release(), shutdown_event, name, timeout, duration_collector.release(),
-        std::move(queue_size_accumulator), std::move(pipeline_status), activation_event.release(),
-        deactivation_event.release(), pipeline_direction);
-    CHECK_AS_EXPECTED(nullptr != queue_ptr, HAILO_OUT_OF_HOST_MEMORY, "Creating UserBufferQueueElement {} failed!", name);
-
-    LOGGER__INFO("Created {}", queue_ptr->name());
-
-    return queue_ptr;
-}
-
-Expected<std::shared_ptr<UserBufferQueueElement>> UserBufferQueueElement::create(const std::string &name, const hailo_vstream_params_t &vstream_params,
-        EventPtr shutdown_event, std::shared_ptr<std::atomic<hailo_status>> pipeline_status, PipelineDirection pipeline_direction)
-{
-    return UserBufferQueueElement::create(name, std::chrono::milliseconds(vstream_params.timeout_ms),
-        vstream_params.pipeline_elements_stats_flags, shutdown_event, pipeline_status, pipeline_direction);
-}
-
-UserBufferQueueElement::UserBufferQueueElement(SpscQueue<PipelineBuffer> &&queue, SpscQueue<PipelineBuffer> &&full_buffer_queue,
-                                               EventPtr shutdown_event, const std::string &name, std::chrono::milliseconds timeout,
-                                               DurationCollector &&duration_collector, AccumulatorPtr &&queue_size_accumulator,
-                                               std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
-                                               Event &&activation_event, Event &&deactivation_event,
-                                               PipelineDirection pipeline_direction) :
-    PullQueueElement(std::move(queue), shutdown_event, name, timeout, std::move(duration_collector),
-                     std::move(queue_size_accumulator), std::move(pipeline_status), std::move(activation_event),
-                     std::move(deactivation_event),
-                     pipeline_direction),
-    m_full_buffer_queue(std::move(full_buffer_queue))
-{}
-
-Expected<PipelineBuffer> UserBufferQueueElement::run_pull(PipelineBuffer &&optional, const PipelinePad &/*source*/)
-{
-    // TODO: Support fps/latency collection for queue elems (HRT-7711)
-    CHECK_AS_EXPECTED(optional, HAILO_INVALID_ARGUMENT, "Optional buffer must be valid in {}!", name());
-
-    hailo_status status = m_queue.enqueue(std::move(optional), m_timeout);
-    if (HAILO_SHUTDOWN_EVENT_SIGNALED == status) {
-        LOGGER__INFO("Shutdown event was signaled in enqueue of queue element {}!", name());
-        return make_unexpected(HAILO_SHUTDOWN_EVENT_SIGNALED);
-    }
-    CHECK_SUCCESS_AS_EXPECTED(status);
-
-    if (nullptr != m_queue_size_accumulator) {
-        m_queue_size_accumulator->add_data_point(static_cast<double>(m_full_buffer_queue.size_approx()));
-    }
-    auto output = m_full_buffer_queue.dequeue(m_timeout);
-    if (HAILO_SHUTDOWN_EVENT_SIGNALED == output.status()) {
-        LOGGER__INFO("Shutdown event was signaled in dequeue of queue element {}!", name());
-        return make_unexpected(HAILO_SHUTDOWN_EVENT_SIGNALED);
-    }
-    CHECK_AS_EXPECTED(HAILO_TIMEOUT != output.status(), HAILO_TIMEOUT, "{} (D2H) failed with status={} (timeout={}ms)",
-        name(), HAILO_TIMEOUT, m_timeout.count());
-    CHECK_EXPECTED(output);
-
-    CHECK_AS_EXPECTED(output->data() == optional.data(), HAILO_INTERNAL_FAILURE, "The buffer received in {} was not the same as the user buffer!", name());
-    return output;
-}
-
-hailo_status UserBufferQueueElement::execute_clear()
-{
-    auto status = PipelineElement::execute_clear();
-    if (HAILO_SUCCESS != status) {
-        LOGGER__ERROR("Failed to clear() in {} with status {}", name(), status);
-    }
-
-    auto queue_clear_status = m_full_buffer_queue.clear();
-    if (HAILO_SUCCESS != queue_clear_status) {
-        LOGGER__ERROR("Failed to clear() in {} with status {}", name(), queue_clear_status);
-        status = queue_clear_status;
-    }
-
-    queue_clear_status = m_queue.clear();
-    if (HAILO_SUCCESS != queue_clear_status) {
-        LOGGER__ERROR("Failed to clear() in {} with status {}", name(), queue_clear_status);
-        status = queue_clear_status;
-    }
-
-    return status;
-}
-
-hailo_status UserBufferQueueElement::run_in_thread()
-{
-    auto optional = m_queue.dequeue(INIFINITE_TIMEOUT());
-    if (HAILO_SHUTDOWN_EVENT_SIGNALED == optional.status()) {
-        LOGGER__INFO("Shutdown event was signaled in dequeue of {}!", name());
-        return HAILO_SHUTDOWN_EVENT_SIGNALED;
-    }
-    CHECK_EXPECTED_AS_STATUS(optional);
-
-    auto buffer = next_pad().run_pull(optional.release());
-    if (HAILO_SHUTDOWN_EVENT_SIGNALED == buffer.status()) {
-        LOGGER__INFO("Shutdown event was signaled in run_pull of {}!", name());
-        return HAILO_SHUTDOWN_EVENT_SIGNALED;
-    }
-    if (HAILO_STREAM_ABORTED_BY_USER == buffer.status()) {
-        LOGGER__INFO("run_pull of {} was aborted!", name());
-        return HAILO_STREAM_ABORTED_BY_USER;
-    }
-    CHECK_EXPECTED_AS_STATUS(buffer);
-    
-    hailo_status status = m_full_buffer_queue.enqueue(buffer.release(), INIFINITE_TIMEOUT());
-    if (HAILO_SHUTDOWN_EVENT_SIGNALED == status) {
-        LOGGER__INFO("Shutdown event was signaled in enqueue of {}!", name());
-        return HAILO_SHUTDOWN_EVENT_SIGNALED;
-    }
-    CHECK_SUCCESS(status);
-
-    return HAILO_SUCCESS;
-}
-
-BaseMuxElement::BaseMuxElement(size_t sink_count, const std::string &name, std::chrono::milliseconds timeout,
-                               DurationCollector &&duration_collector, std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
-                               BufferPoolPtr buffer_pool, PipelineDirection pipeline_direction) :
-    PipelineElement(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction),
-    m_timeout(timeout),
-    m_pool(buffer_pool)
-{
-    m_sources.emplace_back(*this, name, PipelinePad::Type::SOURCE);
-    m_sinks.reserve(sink_count);
-    m_sink_has_arrived.reserve(sink_count);
-    for (uint32_t i = 0; i < sink_count; ++i) {
-        m_sinks.emplace_back(*this, name, PipelinePad::Type::SINK);
-        m_index_of_sink[m_sinks[i].name()] = i;
-        m_sink_has_arrived[m_sinks[i].name()] = false;
-    }
-}
-
-std::vector<PipelinePad*> BaseMuxElement::execution_pads()
-{
-    if (m_next_pads.size() == 0) {
-        if (PipelineDirection::PUSH == m_pipeline_direction) {
-            m_next_pads.reserve(m_sources.size());
-            for (auto &source : m_sources ) {
-                m_next_pads.push_back(source.next());
-            }
-        } else {
-            m_next_pads.reserve(m_sinks.size());
-            for (auto &sink : m_sinks ) {
-                m_next_pads.push_back(sink.prev());
-            }
-        }
-    }
-    return m_next_pads;
-}
-
-hailo_status BaseMuxElement::run_push(PipelineBuffer &&/*buffer*/, const PipelinePad &/*sink*/)
-{
-    return HAILO_INVALID_OPERATION;
-}
-
-void BaseMuxElement::run_push_async(PipelineBuffer &&buffer, const PipelinePad &sink)
-{
-    assert(PipelineDirection::PUSH == m_pipeline_direction);
-    assert(m_next_pads.size() == 1);
-
-    std::unique_lock<std::mutex> lock(m_mutex);
-
-    m_sink_has_arrived[sink.name()] = true;
-    m_input_buffers[sink.name()] = std::move(buffer);
-    if (has_all_sinks_arrived()) {
-        for (auto &input_buffer : m_input_buffers) {
-            if (HAILO_SUCCESS != input_buffer.second.action_status()) {
-                auto acquired_buffer = m_pool->get_available_buffer(PipelineBuffer(), m_timeout);
-                if (HAILO_SUCCESS == acquired_buffer.status()) {
-                    acquired_buffer->set_action_status(input_buffer.second.action_status());
-                    m_next_pads[0]->run_push_async(acquired_buffer.release());
-                } else {
-                    handle_non_recoverable_async_error(acquired_buffer.status());
-                }
-                return;
-            }
-        }
-        std::vector<PipelineBuffer> input_buffers;
-        input_buffers.resize(m_input_buffers.size());
-        for (auto &input_buffer : m_input_buffers) {
-            input_buffers[m_index_of_sink[input_buffer.first]] = std::move(input_buffer.second);
-        }
-
-        auto output = action(std::move(input_buffers), PipelineBuffer());
-        if (HAILO_SUCCESS == output.status()) {
-            m_next_pads[0]->run_push_async(output.release());
-        } else {
-            m_next_pads[0]->run_push_async(PipelineBuffer(output.status()));
-        }
-
-        for (const auto &curr_sink : m_sinks) {
-            m_sink_has_arrived[curr_sink.name()] = false;
-        }
-        m_input_buffers.clear();
-
-        // Manual unlocking is done before notifying, to avoid waking up the waiting thread only to block again
-        lock.unlock();
-        m_cv.notify_all();
-    } else {
-        auto cv_status = m_cv.wait_for(lock, m_timeout);
-        if (std::cv_status::timeout == cv_status) {
-            LOGGER__ERROR("Waiting for other threads in BaseMuxElement {} has reached a timeout (timeout={}ms)", name(), m_timeout.count());
-            handle_non_recoverable_async_error(HAILO_TIMEOUT);
-        }
-    }
-}
-
-bool BaseMuxElement::has_all_sinks_arrived()
-{
-    for (const auto &current_sink : m_sink_has_arrived) {
-        if (!current_sink.second) {
-            return false;
-        }
-    }
-    return true;
-}
-Expected<PipelineBuffer> BaseMuxElement::run_pull(PipelineBuffer &&optional, const PipelinePad &/*source*/)
-{
-    CHECK_AS_EXPECTED(m_pipeline_direction == PipelineDirection::PULL, HAILO_INVALID_OPERATION,
-        "PostInferElement {} does not support run_pull operation", name());
-    std::vector<PipelineBuffer> inputs;
-    inputs.reserve(m_sinks.size());
-    for (auto &sink : m_sinks) {
-        auto buffer = sink.prev()->run_pull();
-        if (HAILO_SHUTDOWN_EVENT_SIGNALED == buffer.status()) {
-            return make_unexpected(buffer.status());
-        }
-        CHECK_EXPECTED(buffer);
-
-        inputs.push_back(buffer.release());
-    }
-
-    auto output = action(std::move(inputs), std::move(optional));
-    CHECK_EXPECTED(output);
-
-    return output;
-}
-
-hailo_status BaseMuxElement::enqueue_execution_buffer(MemoryView mem_view, const TransferDoneCallbackAsyncInfer &exec_done, const std::string &source_name)
-{
-    (void)source_name;
-    auto status = m_pool->enqueue_buffer(mem_view, exec_done);
-    CHECK_SUCCESS(status);
-
-    return HAILO_SUCCESS;
-}
-
-Expected<bool> BaseMuxElement::are_buffer_pools_full()
-{
-    return m_pool->is_full();
-}
-
-hailo_status BaseMuxElement::fill_buffer_pools(bool is_dma_able)
-{
-    auto status = m_pool->allocate_buffers(is_dma_able);
-    CHECK_SUCCESS(status);
-
-    return HAILO_SUCCESS;
-}
-
-BaseDemuxElement::BaseDemuxElement(size_t source_count, const std::string &name, std::chrono::milliseconds timeout,
-                                   DurationCollector &&duration_collector, std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
-                                   std::vector<BufferPoolPtr> pools, PipelineDirection pipeline_direction) :
-    PipelineElement(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction),
-    m_timeout(timeout),
-    m_pools(pools),
-    m_is_activated(false),
-    m_was_stream_aborted(false),
-    m_index_of_source(),
-    m_was_source_called(source_count, false),
-    m_buffers_for_action()
-{
-    m_sinks.emplace_back(*this, name, PipelinePad::Type::SINK);
-    m_sources.reserve(source_count);
-    for (uint32_t i = 0; i < source_count; i++) {
-        m_sources.emplace_back(*this, name, PipelinePad::Type::SOURCE);
-        m_index_of_source[m_sources[i].name()] = i;
-    }
-}
-
-hailo_status BaseDemuxElement::run_push(PipelineBuffer &&buffer, const PipelinePad &/*sink*/)
-{
-    CHECK(PipelineDirection::PUSH == m_pipeline_direction, HAILO_INVALID_OPERATION,
-        "BaseDemuxElement {} does not support run_push operation", name());
-
-    auto outputs = action(std::move(buffer));
-    if (HAILO_SHUTDOWN_EVENT_SIGNALED == outputs.status()) {
-        return outputs.status();
-    }
-    CHECK_EXPECTED_AS_STATUS(outputs);
-
-    for (const auto pad : execution_pads()) {
-        assert(m_index_of_source.count(pad->prev()->name()) > 0);
-        auto source_index = m_index_of_source[pad->prev()->name()];
-
-        hailo_status status = pad->run_push(std::move(outputs.value()[source_index]));
-        if (HAILO_SHUTDOWN_EVENT_SIGNALED == status) {
-            LOGGER__INFO("run_push of {} was shutdown!", name());
-            return status;
-        }
-        if (HAILO_STREAM_ABORTED_BY_USER == status) {
-            LOGGER__INFO("run_push of {} was aborted!", name());
-            return status;
-        }
-        CHECK_SUCCESS(status);
-    }
-
-    return HAILO_SUCCESS;
-}
-
-void BaseDemuxElement::run_push_async(PipelineBuffer &&buffer, const PipelinePad &/*sink*/)
-{
-    assert(PipelineDirection::PUSH == m_pipeline_direction);
-
-    if (HAILO_SUCCESS != buffer.action_status()) {
-        for (const auto pad : execution_pads()) {
-            auto source_index = m_index_of_source[pad->prev()->name()];
-            auto acquired_buffer = m_pools[source_index]->acquire_buffer(m_timeout);
-            if (HAILO_SUCCESS == acquired_buffer.status()) {
-                acquired_buffer->set_action_status(buffer.action_status());
-                pad->run_push_async(acquired_buffer.release());
-            } else {
-                handle_non_recoverable_async_error(acquired_buffer.status());
-            }
-        }
-        return;
-    }
-
-    auto outputs = action(std::move(buffer));
-
-    for (const auto pad : execution_pads()) {
-        assert(m_index_of_source.count(pad->prev()->name()) > 0);
-        auto source_index = m_index_of_source[pad->prev()->name()];
-        if (HAILO_SUCCESS == outputs.status()) {
-            pad->run_push_async(std::move(outputs.value()[source_index]));
-        } else {
-            pad->run_push_async(PipelineBuffer(outputs.status()));
-        }
-    }
-}
-
-Expected<PipelineBuffer> BaseDemuxElement::run_pull(PipelineBuffer &&optional, const PipelinePad &source)
-{
-    CHECK_AS_EXPECTED(m_pipeline_direction == PipelineDirection::PULL, HAILO_INVALID_OPERATION,
-        "BaseDemuxElement {} does not support run_pull operation", name());
-
-    CHECK_AS_EXPECTED(!optional, HAILO_INVALID_ARGUMENT, "Optional buffer is not allowed in demux element!");
-
-    std::unique_lock<std::mutex> lock(m_mutex);
-    if (!m_is_activated) {
-        return make_unexpected(HAILO_SHUTDOWN_EVENT_SIGNALED);
-    }
-
-    if (m_was_stream_aborted) {
-        return make_unexpected(HAILO_STREAM_ABORTED_BY_USER);
-    }
-
-    m_was_source_called[m_index_of_source[source.name()]] = true;
-
-    if (were_all_srcs_arrived()) {
-        // If all srcs arrived, execute the demux
-        auto input = execution_pads()[0]->run_pull();
-        if (HAILO_STREAM_ABORTED_BY_USER == input.status()) {
-            LOGGER__INFO("run_pull of demux element was aborted!");
-            m_was_stream_aborted = true;
-            lock.unlock();
-            m_cv.notify_all();
-            return make_unexpected(input.status());
-        }
-        if (HAILO_SHUTDOWN_EVENT_SIGNALED == input.status()) {
-            return make_unexpected(input.status());
-        }
-        CHECK_EXPECTED(input);
-
-        auto outputs = action(input.release());
-        if (HAILO_SHUTDOWN_EVENT_SIGNALED == outputs.status()) {
-            return make_unexpected(outputs.status());
-        }
-        CHECK_EXPECTED(outputs);
-
-        m_buffers_for_action = outputs.release();
-
-        for (uint32_t i = 0; i < m_was_source_called.size(); i++) {
-            m_was_source_called[i] = false;
-        }
-
-        // Manual unlocking is done before notifying, to avoid waking up the waiting thread only to block again
-        lock.unlock();
-        m_cv.notify_all();
-    } else {
-        // If not all srcs arrived, wait until m_was_source_called is false (set to false after the demux execution)
-        auto wait_successful = m_cv.wait_for(lock, m_timeout, [&](){
-            return !m_was_source_called[m_index_of_source[source.name()]] || m_was_stream_aborted || !m_is_activated;
-        });
-        CHECK_AS_EXPECTED(wait_successful, HAILO_TIMEOUT, "Waiting for other threads in demux {} has reached a timeout (timeout={}ms)", name(), m_timeout.count());
-
-        if (m_was_stream_aborted) {
-            lock.unlock();
-            m_cv.notify_all();
-            return make_unexpected(HAILO_STREAM_ABORTED_BY_USER);
-        }
-
-        // We check if the element is not activated in case notify_all() was called from deactivate()
-        if (!m_is_activated) {
-            lock.unlock();
-            m_cv.notify_all();
-            return make_unexpected(HAILO_SHUTDOWN_EVENT_SIGNALED);
-        }
-    }
-
-    assert(m_index_of_source[source.name()] < m_buffers_for_action.size());
-    return std::move(m_buffers_for_action[m_index_of_source[source.name()]]);
-}
-
-bool BaseDemuxElement::were_all_srcs_arrived()
-{
-    return std::all_of(m_was_source_called.begin(), m_was_source_called.end(), [](bool v) { return v; });
-}
-
-hailo_status BaseDemuxElement::execute_activate()
-{
-    if (m_is_activated) {
-        return HAILO_SUCCESS;
-    }
-    m_is_activated = true;// TODO Should this always be true, no matter the status of source().activate()?
-    m_was_stream_aborted = false;
-
-    return PipelineElement::execute_activate();
-}
-
-hailo_status BaseDemuxElement::execute_deactivate()
-{
-    if (!m_is_activated) {
-        return HAILO_SUCCESS;
-    }
-    m_is_activated = false;
-
-    // deactivate should be called before mutex acquire and notify_all because it is possible that all queues are waiting on
-    // the run_pull of the source (HwRead) and the mutex is already acquired so this would prevent a timeout error
-    hailo_status status = PipelineElement::execute_deactivate();
-
-    {
-        // There is a case where the other thread is halted (via context switch) before the wait_for() function,
-        // then we call notify_all() here, and then the wait_for() is called - resulting in a timeout.
-        // notify_all() only works on threads which are already waiting, so that's why we acquire the lock here.
-        std::unique_lock<std::mutex> lock(m_mutex);
-    }
-    m_cv.notify_all();
-
-    CHECK_SUCCESS(status);
-
-    return HAILO_SUCCESS;
-}
-
-hailo_status BaseDemuxElement::execute_post_deactivate(bool should_clear_abort)
-{
-    for (uint32_t i = 0; i < m_was_source_called.size(); i++) {
-        m_was_source_called[i] = false;
-    }
-    return PipelineElement::execute_post_deactivate(should_clear_abort);
-}
-
-hailo_status BaseDemuxElement::execute_abort()
-{
-    auto status = PipelineElement::execute_abort();
-    CHECK_SUCCESS(status);
-    {
-        // There is a case where the other thread is halted (via context switch) before the wait_for() function,
-        // then we call notify_all() here, and then the wait_for() is called - resulting in a timeout.
-        // notify_all() only works on threads which are already waiting, so that's why we acquire the lock here.
-        std::unique_lock<std::mutex> lock(m_mutex);
-    }
-    m_cv.notify_all();
-
-    return HAILO_SUCCESS;
-}
-
-hailo_status BaseDemuxElement::set_timeout(std::chrono::milliseconds timeout)
-{
-    m_timeout = timeout;
-    return HAILO_SUCCESS;
-}
-
-hailo_status BaseDemuxElement::enqueue_execution_buffer(MemoryView mem_view, const TransferDoneCallbackAsyncInfer &exec_done, const std::string &source_name)
-{
-    auto pool_id = m_index_of_source.at(source_name);
-    auto status = m_pools[pool_id]->enqueue_buffer(mem_view, exec_done);
-    CHECK_SUCCESS(status);
-
-    return HAILO_SUCCESS;
-}
-
-Expected<bool> BaseDemuxElement::are_buffer_pools_full()
-{
-    for (const auto &pool : m_pools) {
-        if (pool->is_full()) {
-            return true;
-        }
-    }
-    return false;
-}
-
-hailo_status BaseDemuxElement::fill_buffer_pool(bool is_dma_able, size_t pool_id) {
-    auto status = m_pools[pool_id]->allocate_buffers(is_dma_able);
-    CHECK_SUCCESS(status);
-
-    return HAILO_SUCCESS;
-}
-
-hailo_status BaseDemuxElement::fill_buffer_pools(bool is_dma_able) {
-    for (auto &pool : m_pools) {
-        auto status = pool->allocate_buffers(is_dma_able);
-        CHECK_SUCCESS(status);
-    }
-    return HAILO_SUCCESS;
-}
-
-std::vector<PipelinePad*> BaseDemuxElement::execution_pads()
-{
-    if (m_next_pads.size() == 0)
-    {
-        if (PipelineDirection::PUSH == m_pipeline_direction) {
-            m_next_pads.reserve(m_sources.size());
-            for (auto &source : m_sources ) {
-                m_next_pads.push_back(source.next());
-            }
-        } else {
-            m_next_pads.reserve(m_sinks.size());
-            for (auto &sink : m_sinks ) {
-                m_next_pads.push_back(sink.prev());
-            }
-        }
-    }
-    return m_next_pads;
-}
-
 } /* namespace hailort */
diff --git a/hailort/libhailort/src/net_flow/pipeline/pipeline.hpp b/hailort/libhailort/src/net_flow/pipeline/pipeline.hpp
index 6f0db70..8689a7b 100644
--- a/hailort/libhailort/src/net_flow/pipeline/pipeline.hpp
+++ b/hailort/libhailort/src/net_flow/pipeline/pipeline.hpp
@@ -26,15 +26,6 @@
 
 namespace hailort
 {
-struct ElementBuildParams
-{
-    std::shared_ptr<std::atomic<hailo_status>> pipeline_status;
-    std::chrono::milliseconds timeout;
-    EventPtr shutdown_event;
-    size_t buffer_pool_size;
-    hailo_pipeline_elem_stats_flags_t elem_stats_flags;
-    hailo_vstream_stats_flags_t vstream_stats_flags;
-};
 
 enum class PipelineDirection
 {
@@ -42,17 +33,22 @@ enum class PipelineDirection
     PUSH,
 };
 
-// TODO: need to think about naming and the right place to declare the CompletionInfoAsyncInferInternal and TransferDoneCallbackAsyncInfer
-struct CompletionInfoAsyncInferInternal
+enum class BufferType
 {
-    hailo_status status;
+    UNINITIALIZED,
+    VIEW,
+    PIX_BUFFER,
 };
-using TransferDoneCallbackAsyncInfer = std::function<void(const CompletionInfoAsyncInferInternal &completion_info)>;;
+
+using TransferDoneCallbackAsyncInfer = std::function<void(hailo_status)>;
 
 using PipelineTimePoint = std::chrono::steady_clock::time_point;
 #define BUFFER_POOL_DEFAULT_QUEUE_TIMEOUT (std::chrono::milliseconds(10000))
 #define DEFAULT_NUM_FRAMES_BEFORE_COLLECTION_START (100)
 
+#define NUMBER_OF_PLANES_NV12_NV21 (2)
+#define NUMBER_OF_PLANES_I420 (3)
+
 struct AdditionalData {};
 
 struct IouPipelineData : AdditionalData
@@ -88,7 +84,7 @@ public:
         Metadata();
         ~Metadata() = default;
         Metadata(const Metadata &) = default;
-        Metadata &operator=(const Metadata &) = delete;
+        Metadata &operator=(const Metadata &) = default;
         Metadata(Metadata &&other) = default;
         Metadata &operator=(Metadata &&other) = default;
 
@@ -116,11 +112,12 @@ public:
     // Creates an empty PipelineBuffer (with no buffer/memory view)
     PipelineBuffer();
     PipelineBuffer(Type type);
-    PipelineBuffer(hailo_status status);
+    PipelineBuffer(hailo_status status, const TransferDoneCallbackAsyncInfer &exec_done = [](hailo_status){});
     PipelineBuffer(MemoryView view, bool is_user_buffer = true, BufferPoolPtr pool = nullptr, bool should_measure = false, hailo_status status = HAILO_SUCCESS);
     PipelineBuffer(MemoryView view, const TransferDoneCallbackAsyncInfer &exec_done,
         bool is_user_buffer = true, BufferPoolPtr pool = nullptr, bool should_measure = false, hailo_status status = HAILO_SUCCESS);
     PipelineBuffer(hailo_pix_buffer_t buffer);
+    PipelineBuffer(hailo_pix_buffer_t buffer, const TransferDoneCallbackAsyncInfer &exec_done);
     ~PipelineBuffer();
 
     PipelineBuffer(const PipelineBuffer &) = delete;
@@ -137,7 +134,7 @@ public:
     Metadata get_metadata() const;
     void set_metadata(Metadata &&val);
     void set_additional_data(std::shared_ptr<AdditionalData> data) { m_metadata.set_additional_data(data);}
-    TransferDoneCallbackAsyncInfer get_exec_done_cb() const;
+    TransferDoneCallbackAsyncInfer get_exec_done_cb();
     hailo_status action_status();
     void set_action_status(hailo_status status);
 
@@ -148,9 +145,11 @@ private:
     TransferDoneCallbackAsyncInfer m_exec_done;
     Metadata m_metadata;
     bool m_is_user_buffer;
+    bool m_should_call_exec_done;
     hailo_status m_action_status;
 
     static PipelineTimePoint add_timestamp(bool should_measure);
+    static void release_buffer(BufferPoolPtr buffer_pool_ptr, MemoryView mem_view, bool is_user_buffer);
 };
 
 // The buffer pool has to be created as a shared pointer (via the create function) because we use shared_from_this(),
@@ -169,16 +168,18 @@ public:
     size_t buffer_size();
     hailo_status enqueue_buffer(MemoryView mem_view);
     hailo_status enqueue_buffer(MemoryView mem_view, const TransferDoneCallbackAsyncInfer &exec_done);
-    hailo_status allocate_buffers(bool is_dma_able);
-    Expected<PipelineBuffer> acquire_buffer(std::chrono::milliseconds timeout);
+    hailo_status allocate_buffers(bool is_dma_able, size_t num_of_buffers);
+    Expected<PipelineBuffer> acquire_buffer(std::chrono::milliseconds timeout, bool ignore_shutdown_event = false);
     Expected<std::shared_ptr<PipelineBuffer>> acquire_buffer_ptr(std::chrono::milliseconds timeout);
     AccumulatorPtr get_queue_size_accumulator();
     Expected<PipelineBuffer> get_available_buffer(PipelineBuffer &&optional, std::chrono::milliseconds timeout);
     bool is_full();
+    size_t num_of_buffers_in_pool();
+    bool is_holding_user_buffers();
 
 private:
-    Expected<MemoryView> acquire_free_mem_view(std::chrono::milliseconds timeout);
-    Expected<TransferDoneCallbackAsyncInfer> acquire_on_done_cb(std::chrono::milliseconds timeout);
+    Expected<MemoryView> acquire_free_mem_view(std::chrono::milliseconds timeout, bool ignore_shutdown_event = false);
+    Expected<TransferDoneCallbackAsyncInfer> acquire_on_done_cb(std::chrono::milliseconds timeout, bool ignore_shutdown_event = false);
     hailo_status release_buffer(MemoryView mem_view);
 
     const size_t m_buffer_size;
@@ -289,6 +290,8 @@ public:
     hailo_status clear();
     hailo_status flush();
     hailo_status abort();
+    hailo_status terminate(hailo_status error_status);
+    hailo_status dequeue_user_buffers(hailo_status error_status);
     hailo_status wait_for_finish();
     hailo_status clear_abort();
     virtual hailo_status run_push(PipelineBuffer &&buffer);
@@ -339,23 +342,46 @@ public:
     hailo_status clear();
     hailo_status flush();
     hailo_status abort();
+    hailo_status terminate(hailo_status error_status);
+    hailo_status dequeue_user_buffers(hailo_status error_status);
     hailo_status clear_abort();
     hailo_status wait_for_finish();
     AccumulatorPtr get_fps_accumulator();
     AccumulatorPtr get_latency_accumulator();
+    bool is_terminating_element();
     virtual std::vector<AccumulatorPtr> get_queue_size_accumulators();
     std::vector<PipelinePad> &sinks();
     std::vector<PipelinePad> &sources();
     const std::vector<PipelinePad> &sinks() const;
     const std::vector<PipelinePad> &sources() const;
     virtual std::string description() const;
-    virtual void set_on_cant_pull_callback(std::function<void()> callback);
-    virtual void set_on_can_pull_callback(std::function<void()> callback);
+
     virtual hailo_status enqueue_execution_buffer(MemoryView mem_view, const TransferDoneCallbackAsyncInfer &exec_done, const std::string &source_name);
     hailo_status enqueue_execution_buffer(MemoryView mem_view, const TransferDoneCallbackAsyncInfer &exec_done);
-    virtual Expected<bool> are_buffer_pools_full();
-    virtual hailo_status fill_buffer_pools(bool is_dma_able);
-    void handle_non_recoverable_async_error(hailo_status error_status);
+    hailo_status empty_buffer_pool(BufferPoolPtr pool, hailo_status error_status, std::chrono::milliseconds timeout);
+    virtual Expected<bool> can_push_buffer_upstream(const uint32_t source_index = UINT32_MAX);
+    virtual Expected<bool> can_push_buffer_downstream(const uint32_t source_index = UINT32_MAX);
+    virtual hailo_status fill_buffer_pool(bool is_dma_able, size_t num_of_buffers, const uint32_t source_index = UINT32_MAX);
+    virtual Expected<bool> can_push_buffer_upstream(const std::string &source_name = "");
+    virtual Expected<bool> can_push_buffer_downstream(const std::string &source_name = "");
+    virtual hailo_status fill_buffer_pool(bool is_dma_able, size_t num_of_buffers, const std::string &source_name = "");
+
+    virtual Expected<uint32_t> get_source_index_from_source_name(const std::string &/*source_name*/) {
+        // This function is overriden in multi-srcs elements
+        return 0;
+    }
+
+    virtual hailo_status set_nms_score_threshold(float32_t /*threshold*/) {
+        return HAILO_INVALID_OPERATION;
+    }
+
+    virtual hailo_status set_nms_iou_threshold(float32_t /*threshold*/) {
+        return HAILO_INVALID_OPERATION;
+    }
+
+    virtual hailo_status set_nms_max_proposals_per_class(uint32_t /*max_proposals_per_class*/) {
+        return HAILO_INVALID_OPERATION;
+    }
 
 protected:
     DurationCollector m_duration_collector;
@@ -363,9 +389,8 @@ protected:
     std::vector<PipelinePad> m_sinks;
     std::vector<PipelinePad> m_sources;
     PipelineDirection m_pipeline_direction;
-
-    std::function<void()> m_cant_pull_callback;
-    std::function<void()> m_can_pull_callback;
+    bool m_is_terminating_element;
+    bool m_is_terminated;
 
     virtual hailo_status run_push(PipelineBuffer &&buffer, const PipelinePad &sink) = 0;
     virtual void run_push_async(PipelineBuffer &&buffer, const PipelinePad &sink) = 0;
@@ -377,6 +402,8 @@ protected:
     virtual hailo_status execute_clear();
     virtual hailo_status execute_flush();
     virtual hailo_status execute_abort();
+    virtual hailo_status execute_terminate(hailo_status error_status);
+    virtual hailo_status execute_dequeue_user_buffers(hailo_status error_status);
     virtual hailo_status execute_clear_abort();
     virtual hailo_status execute_wait_for_finish();
 
@@ -385,328 +412,6 @@ protected:
     friend class PipelinePad;
 };
 
-// An element with one source pad only (generates data)
-class SourceElement : public PipelineElement
-{
-public:
-    SourceElement(const std::string &name, DurationCollector &&duration_collector,
-                  std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
-                  PipelineDirection pipeline_direction);
-    PipelinePad &source();
-
-protected:
-    virtual std::vector<PipelinePad*> execution_pads() override;
-};
-
-// An element with one sink pad only (consumes data)
-class SinkElement : public PipelineElement
-{
-public:
-    SinkElement(const std::string &name, DurationCollector &&duration_collector,
-                std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
-                PipelineDirection pipeline_direction);
-    PipelinePad &sink();
-
-protected:
-    virtual std::vector<PipelinePad*> execution_pads() override;
-};
-
-// Transfers data from one pad to another pad. Has one sink pad and one source pad.
-class IntermediateElement : public PipelineElement
-{
-public:
-    IntermediateElement(const std::string &name, DurationCollector &&duration_collector,
-                        std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
-                        PipelineDirection pipeline_direction);
-    virtual PipelinePad &next_pad() = 0;
-
-protected:
-    virtual std::vector<PipelinePad*> execution_pads() override;
-};
-
-class FilterElement : public IntermediateElement
-{
-public:
-    FilterElement(const std::string &name, DurationCollector &&duration_collector,
-                  std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
-                  PipelineDirection pipeline_direction, BufferPoolPtr buffer_pool, std::chrono::milliseconds timeout);
-    virtual ~FilterElement() = default;
-
-    virtual hailo_status run_push(PipelineBuffer &&buffer, const PipelinePad &sink) override;
-    virtual void run_push_async(PipelineBuffer &&buffer, const PipelinePad &sink) override;
-    virtual Expected<PipelineBuffer> run_pull(PipelineBuffer &&optional, const PipelinePad &source) override;
-    virtual hailo_status enqueue_execution_buffer(MemoryView mem_view, const TransferDoneCallbackAsyncInfer &exec_done, const std::string &source_name) override;
-    virtual Expected<bool> are_buffer_pools_full() override;
-    virtual hailo_status fill_buffer_pools(bool is_dma_able) override;
-    virtual std::vector<AccumulatorPtr> get_queue_size_accumulators() override;
-
-protected:
-    // The optional buffer functions as an output buffer that the user can write to instead of acquiring a new buffer
-    virtual Expected<PipelineBuffer> action(PipelineBuffer &&input, PipelineBuffer &&optional) = 0;
-    BufferPoolPtr m_pool;
-    std::chrono::milliseconds m_timeout;
-};
-
-class BaseQueueElement : public IntermediateElement
-{
-public:
-    virtual ~BaseQueueElement();
-
-    hailo_status set_timeout(std::chrono::milliseconds timeout);
-    virtual std::string description() const override;
-
-    static constexpr auto INIFINITE_TIMEOUT() { return std::chrono::milliseconds(HAILO_INFINITE); }
-
-protected:
-    static Expected<SpscQueue<PipelineBuffer>> create_queue(size_t queue_size, EventPtr shutdown_event);
-    BaseQueueElement(SpscQueue<PipelineBuffer> &&queue, EventPtr shutdown_event, const std::string &name,
-        std::chrono::milliseconds timeout, DurationCollector &&duration_collector,
-        AccumulatorPtr &&queue_size_accumulator, std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
-        Event &&activation_event, Event &&deactivation_event,
-        PipelineDirection pipeline_direction);
-
-    hailo_status pipeline_status();
-
-    virtual hailo_status execute_activate() override;
-    virtual hailo_status execute_post_deactivate(bool should_clear_abort) override;
-    virtual hailo_status execute_clear() override;
-    virtual hailo_status execute_clear_abort() override;
-    virtual hailo_status execute_wait_for_finish() override;
-
-    virtual hailo_status enqueue_execution_buffer(MemoryView mem_view, const TransferDoneCallbackAsyncInfer &exec_done, const std::string &source_name) override;
-    virtual Expected<bool> are_buffer_pools_full() override;
-    virtual hailo_status fill_buffer_pools(bool is_dma_able) override;
-
-    /// Starts/stops the queue thread. This functions needs to be called on subclasses ctor and dtor
-    /// accordingly because otherwise, if we will start/stop thread in this class we will face pure-call
-    /// to `run_in_thread`.
-    /// This functions don't return status because they are meant to be called on ctor and dtor 
-    virtual void start_thread();
-    virtual void stop_thread();
-
-    virtual std::vector<AccumulatorPtr> get_queue_size_accumulators() override;
-
-    virtual hailo_status run_in_thread() = 0;
-    virtual std::string thread_name() = 0;
-
-    SpscQueue<PipelineBuffer> m_queue;
-    EventPtr m_shutdown_event;
-    std::chrono::milliseconds m_timeout;
-    std::thread m_thread;
-    std::atomic_bool m_is_thread_running;
-    Event m_activation_event;
-    Event m_deactivation_event;
-    AccumulatorPtr m_queue_size_accumulator;
-    std::atomic_bool m_is_run_in_thread_running;
-    std::condition_variable m_cv;
-    std::mutex m_mutex;
-};
-
-class PushQueueElement : public BaseQueueElement
-{
-public:
-    static Expected<std::shared_ptr<PushQueueElement>> create(const std::string &name, std::chrono::milliseconds timeout,
-        size_t queue_size, hailo_pipeline_elem_stats_flags_t flags, EventPtr shutdown_event,
-        std::shared_ptr<std::atomic<hailo_status>> pipeline_status, PipelineDirection pipeline_direction = PipelineDirection::PUSH);
-    static Expected<std::shared_ptr<PushQueueElement>> create(const std::string &name, const hailo_vstream_params_t &vstream_params,
-        EventPtr shutdown_event, std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
-        PipelineDirection pipeline_direction = PipelineDirection::PUSH);
-    PushQueueElement(SpscQueue<PipelineBuffer> &&queue, EventPtr shutdown_event, const std::string &name,
-        std::chrono::milliseconds timeout, DurationCollector &&duration_collector, AccumulatorPtr &&queue_size_accumulator,
-        std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, Event &&activation_event, Event &&deactivation_event,
-        PipelineDirection pipeline_direction, bool should_start_thread = true);
-    virtual ~PushQueueElement();
-
-    virtual hailo_status run_push(PipelineBuffer &&buffer, const PipelinePad &sink) override;
-    virtual void run_push_async(PipelineBuffer &&buffer, const PipelinePad &sink) override;
-    virtual Expected<PipelineBuffer> run_pull(PipelineBuffer &&optional, const PipelinePad &source) override;
-    virtual PipelinePad &next_pad() override;
-
-protected:
-    virtual hailo_status execute_deactivate() override;
-    virtual hailo_status run_in_thread() override;
-    virtual std::string thread_name() override { return "PUSH_QUEUE"; };
-    virtual hailo_status execute_abort() override;
-};
-
-class AsyncPushQueueElement : public PushQueueElement
-{
-public:
-    static Expected<std::shared_ptr<AsyncPushQueueElement>> create(const std::string &name, std::chrono::milliseconds timeout,
-        size_t queue_size, hailo_pipeline_elem_stats_flags_t flags, EventPtr shutdown_event,
-        std::shared_ptr<std::atomic<hailo_status>> pipeline_status, PipelineDirection pipeline_direction = PipelineDirection::PUSH);
-    static Expected<std::shared_ptr<AsyncPushQueueElement>> create(const std::string &name, const ElementBuildParams &build_params,
-        PipelineDirection pipeline_direction);
-    AsyncPushQueueElement(SpscQueue<PipelineBuffer> &&queue, EventPtr shutdown_event, const std::string &name,
-        std::chrono::milliseconds timeout, DurationCollector &&duration_collector, AccumulatorPtr &&queue_size_accumulator,
-        std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, Event &&activation_event, Event &&deactivation_event,
-        PipelineDirection pipeline_direction);
-
-    virtual hailo_status run_push(PipelineBuffer &&buffer, const PipelinePad &sink) override;
-    virtual void run_push_async(PipelineBuffer &&buffer, const PipelinePad &sink) override;
-
-protected:
-    virtual hailo_status run_in_thread() override;
-    virtual std::string thread_name() override { return "ASYNC_PUSH_Q"; };
-    virtual void start_thread() override;
-};
-
-class PullQueueElement : public BaseQueueElement
-{
-public:
-    static Expected<std::shared_ptr<PullQueueElement>> create(const std::string &name, std::chrono::milliseconds timeout,
-        size_t queue_size, hailo_pipeline_elem_stats_flags_t flags, EventPtr shutdown_event,
-        std::shared_ptr<std::atomic<hailo_status>> pipeline_status, PipelineDirection pipeline_direction = PipelineDirection::PULL);
-    static Expected<std::shared_ptr<PullQueueElement>> create(const std::string &name, const hailo_vstream_params_t &vstream_params,
-        EventPtr shutdown_event, std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
-        PipelineDirection pipeline_direction = PipelineDirection::PULL);
-    PullQueueElement(SpscQueue<PipelineBuffer> &&queue, EventPtr shutdown_event, const std::string &name,
-        std::chrono::milliseconds timeout, DurationCollector &&duration_collector, AccumulatorPtr &&queue_size_accumulator,
-        std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, Event &&activation_event, Event &&deactivation_event,
-        PipelineDirection pipeline_direction);
-    virtual ~PullQueueElement();
-
-    virtual hailo_status run_push(PipelineBuffer &&buffer, const PipelinePad &sink) override;
-    virtual void run_push_async(PipelineBuffer &&buffer, const PipelinePad &sink) override;
-    virtual Expected<PipelineBuffer> run_pull(PipelineBuffer &&optional, const PipelinePad &source) override;
-    virtual PipelinePad &next_pad() override;
-
-    virtual void set_on_cant_pull_callback(std::function<void()> callback) override
-    {
-        m_cant_pull_callback = callback;
-        m_queue.set_on_cant_enqueue_callback([this] () {
-            m_cant_pull_callback();
-        });
-    }
-
-    virtual void set_on_can_pull_callback(std::function<void()> callback) override
-    {
-        m_can_pull_callback = callback;
-        m_queue.set_on_can_enqueue_callback([this] () {
-            m_can_pull_callback();
-        });
-    }
-
-protected:
-    virtual hailo_status execute_deactivate() override;
-    virtual hailo_status run_in_thread() override;
-    virtual std::string thread_name() override { return "PULL_QUEUE"; };
-};
-
-class UserBufferQueueElement : public PullQueueElement
-{
-public:
-    static Expected<std::shared_ptr<UserBufferQueueElement>> create(const std::string &name, std::chrono::milliseconds timeout,
-        hailo_pipeline_elem_stats_flags_t flags, EventPtr shutdown_event, std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
-        PipelineDirection pipeline_direction = PipelineDirection::PULL);
-    static Expected<std::shared_ptr<UserBufferQueueElement>> create(const std::string &name, const hailo_vstream_params_t &vstream_params,
-        EventPtr shutdown_event, std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
-        PipelineDirection pipeline_direction = PipelineDirection::PULL);
-    UserBufferQueueElement(SpscQueue<PipelineBuffer> &&queue, SpscQueue<PipelineBuffer> &&full_buffer_queue, EventPtr shutdown_event,
-        const std::string &name, std::chrono::milliseconds timeout, DurationCollector &&duration_collector, AccumulatorPtr &&queue_size_accumulator,
-        std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, Event &&activation_event, Event &&deactivation_event,
-        PipelineDirection pipeline_direction);
-
-    virtual Expected<PipelineBuffer> run_pull(PipelineBuffer &&optional, const PipelinePad &source) override;
-
-    virtual void set_on_cant_pull_callback(std::function<void()> callback) override
-    {
-        m_cant_pull_callback = callback;
-    }
-
-    virtual void set_on_can_pull_callback(std::function<void()> callback) override
-    {
-        m_can_pull_callback = callback;
-    }
-
-protected:
-    virtual hailo_status execute_clear() override;
-    virtual hailo_status run_in_thread() override;
-
-private:
-    SpscQueue<PipelineBuffer> m_full_buffer_queue;
-};
-
-class BaseMuxElement : public PipelineElement
-{
-public:
-    BaseMuxElement(size_t sink_count, const std::string &name, std::chrono::milliseconds timeout,
-        DurationCollector &&duration_collector, std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
-        BufferPoolPtr buffer_pool, PipelineDirection pipeline_direction = PipelineDirection::PULL);
-    virtual ~BaseMuxElement() = default;
-
-    virtual hailo_status run_push(PipelineBuffer &&buffer, const PipelinePad &sink) override;
-    virtual void run_push_async(PipelineBuffer &&buffer, const PipelinePad &sink) override;
-    virtual Expected<PipelineBuffer> run_pull(PipelineBuffer &&optional, const PipelinePad &source) override;
-    virtual hailo_status enqueue_execution_buffer(MemoryView mem_view, const TransferDoneCallbackAsyncInfer &exec_done, const std::string &source_name) override;
-    virtual Expected<bool> are_buffer_pools_full() override;
-    virtual hailo_status fill_buffer_pools(bool is_dma_able) override;
-
-protected:
-    virtual Expected<PipelineBuffer> action(std::vector<PipelineBuffer> &&inputs, PipelineBuffer &&optional) = 0;
-    virtual std::vector<PipelinePad*> execution_pads() override;
-
-    std::chrono::milliseconds m_timeout;
-    BufferPoolPtr m_pool;
-
-private:
-    bool has_all_sinks_arrived();
-    std::unordered_map<std::string, bool> m_sink_has_arrived;
-    std::mutex m_mutex;
-    std::unordered_map<std::string, uint32_t> m_index_of_sink;
-    std::unordered_map<std::string, PipelineBuffer> m_input_buffers;
-    std::vector<PipelinePad*> m_next_pads;
-    std::condition_variable m_cv;
-};
-
-class BaseDemuxElement : public PipelineElement
-{
-public:
-    BaseDemuxElement(size_t source_count, const std::string &name, std::chrono::milliseconds timeout,
-        DurationCollector &&duration_collector, std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
-        std::vector<BufferPoolPtr> pools, PipelineDirection pipeline_direction);
-    virtual ~BaseDemuxElement() = default;
-
-    virtual hailo_status run_push(PipelineBuffer &&buffer, const PipelinePad &sink) override;
-    virtual void run_push_async(PipelineBuffer &&buffer, const PipelinePad &sink) override;
-    virtual Expected<PipelineBuffer> run_pull(PipelineBuffer &&optional, const PipelinePad &source) override;
-    hailo_status set_timeout(std::chrono::milliseconds timeout);
-    virtual hailo_status enqueue_execution_buffer(MemoryView mem_view, const TransferDoneCallbackAsyncInfer &exec_done, const std::string &source_name) override;
-    virtual Expected<bool> are_buffer_pools_full() override;
-    virtual hailo_status fill_buffer_pools(bool is_dma_able) override;
-    hailo_status fill_buffer_pool(bool is_dma_able, size_t pool_id);
-
-protected:
-    virtual hailo_status execute_activate() override;
-    virtual hailo_status execute_deactivate() override;
-    virtual hailo_status execute_post_deactivate(bool should_clear_abort) override;
-    virtual hailo_status execute_abort() override;
-    virtual Expected<std::vector<PipelineBuffer>> action(PipelineBuffer &&input) = 0;
-    virtual std::vector<PipelinePad*> execution_pads() override;
-
-    std::chrono::milliseconds m_timeout;
-    std::vector<BufferPoolPtr> m_pools;
-
-private:
-    bool were_all_srcs_arrived();
-
-    std::atomic_bool m_is_activated;
-    std::atomic_bool m_was_stream_aborted;
-    std::unordered_map<std::string, uint32_t> m_index_of_source;
-    std::vector<bool> m_was_source_called;
-    std::vector<PipelineBuffer> m_buffers_for_action;
-    std::mutex m_mutex;
-    std::condition_variable m_cv;
-    std::vector<PipelinePad*> m_next_pads;
-};
-
-enum class AccumulatorType
-{
-    FPS,
-    LATENCY,
-    QUEUE_SIZE
-};
-
 } /* namespace hailort */
 
 #endif /* _HAILO_PIPELINE_HPP_ */
diff --git a/hailort/libhailort/src/net_flow/pipeline/pipeline_builder.cpp b/hailort/libhailort/src/net_flow/pipeline/pipeline_builder.cpp
new file mode 100644
index 0000000..6a6615c
--- /dev/null
+++ b/hailort/libhailort/src/net_flow/pipeline/pipeline_builder.cpp
@@ -0,0 +1,964 @@
+/**
+ * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+**/
+/**
+ * @file pipeline_builder.cpp
+ * @brief Async pipeline builder impl
+ **/
+
+#include "pipeline_builder.hpp"
+#include "hailo/hailort.h"
+#include "net_flow/ops/yolov5_seg_post_process.hpp"
+#include "net_flow/ops/yolov8_post_process.hpp"
+#include "net_flow/ops/argmax_post_process.hpp"
+#include "net_flow/ops/softmax_post_process.hpp"
+#include "net_flow/ops/yolox_post_process.hpp"
+#include "net_flow/ops/ssd_post_process.hpp"
+
+#include <algorithm>
+
+namespace hailort
+{
+
+Expected<std::unordered_map<std::string, hailo_format_t>> PipelineBuilder::expand_auto_input_formats(std::shared_ptr<ConfiguredNetworkGroup>net_group,
+    const std::unordered_map<std::string, hailo_format_t> &inputs_formats, const std::unordered_map<std::string, hailo_stream_info_t> &named_stream_infos)
+{
+    std::unordered_map<std::string, hailo_format_t> expanded_input_format;
+    for (auto &input_format : inputs_formats) {
+        auto input_streams_names = net_group->get_stream_names_from_vstream_name(input_format.first);
+        CHECK_EXPECTED(input_streams_names);
+
+        auto is_multi_planar = (input_streams_names.value().size() > 1);
+        if(is_multi_planar) {
+            auto vstream_info_exp = net_group->get_input_vstream_infos();
+            CHECK_EXPECTED(vstream_info_exp);
+            auto vstream_infos = vstream_info_exp.release();
+            auto matching_vstream_info = std::find_if(vstream_infos.begin(), vstream_infos.end(), [&](const auto &item)
+                { return item.name == input_format.first; } );
+            CHECK_AS_EXPECTED(vstream_infos.end() != matching_vstream_info, HAILO_NOT_FOUND,
+                "Could not find input layer with name '{}'", input_format.first);
+            expanded_input_format[input_format.first] =
+                VStreamsBuilderUtils::expand_user_buffer_format_autos_multi_planar(*matching_vstream_info, input_format.second);
+        } else {
+            const auto &stream_name = input_streams_names.value()[0];
+            CHECK_AS_EXPECTED(contains(named_stream_infos, stream_name), HAILO_INTERNAL_FAILURE);
+            const auto &stream_info = named_stream_infos.at(stream_name);
+
+            expanded_input_format[input_format.first] = HailoRTDefaults::expand_auto_format(input_format.second,
+                stream_info.format);
+        }
+    }
+    return expanded_input_format;
+}
+
+Expected<std::unordered_map<std::string, hailo_format_t>> PipelineBuilder::expand_auto_output_formats(std::shared_ptr<ConfiguredNetworkGroup> net_group,
+    const std::unordered_map<std::string, hailo_format_t> &outputs_formats, const std::unordered_map<std::string, hailo_stream_info_t> &named_stream_infos)
+{
+    std::unordered_map<std::string, hailo_format_t> expanded_output_format;
+    for (auto &output_format : outputs_formats) {
+        auto output_streams_names = net_group->get_stream_names_from_vstream_name(output_format.first);
+        CHECK_EXPECTED(output_streams_names);
+
+        // TODO: Taking data from the first ll stream will not work in multi-planar work
+        const auto &stream_name = output_streams_names.value()[0];
+        CHECK_AS_EXPECTED(contains(named_stream_infos, stream_name), HAILO_INTERNAL_FAILURE);
+        const auto &stream_info = named_stream_infos.at(stream_name);
+
+        expanded_output_format[output_format.first] = HailoRTDefaults::expand_auto_format(output_format.second,
+            stream_info.format);
+    }
+    return expanded_output_format;
+}
+
+hailo_status PipelineBuilder::create_pre_async_hw_elements_per_input(std::shared_ptr<ConfiguredNetworkGroup> net_group,
+    const std::vector<std::string> &stream_names, const std::unordered_map<std::string, hailo_format_t> &inputs_formats,
+    const std::unordered_map<std::string, hailo_stream_info_t> &named_stream_infos, std::shared_ptr<AsyncPipeline> async_pipeline)
+{
+    auto vstream_names = net_group->get_vstream_names_from_stream_name(*stream_names.begin());
+    CHECK_EXPECTED_AS_STATUS(vstream_names);
+    CHECK(vstream_names.value().size() == 1, HAILO_NOT_SUPPORTED, "low level stream must have exactly 1 user input");
+    const auto &vstream_name = vstream_names.value()[0];
+    std::shared_ptr<PixBufferElement> multi_plane_splitter = nullptr;
+    std::shared_ptr<PipelineElement> last_element_connected_to_pipeline = nullptr;
+
+    auto entry_queue_elem_expected = add_push_queue_element(PipelineObject::create_element_name("EntryPushQueueElement", vstream_name, 0),
+        async_pipeline, nullptr, 0);
+    CHECK_EXPECTED_AS_STATUS(entry_queue_elem_expected);
+    auto entry_queue_elem = entry_queue_elem_expected.release();
+    async_pipeline->add_entry_element(entry_queue_elem, vstream_name);
+    last_element_connected_to_pipeline = entry_queue_elem;
+
+    bool is_multi_planar = (stream_names.size() > 1);
+    if (is_multi_planar) {
+        async_pipeline->set_as_multi_planar();
+        const auto &vstream_order = inputs_formats.at(vstream_name).order;
+
+        auto multi_plane_splitter_expected = create_multi_plane_splitter_element(vstream_name, vstream_order,
+            async_pipeline->get_build_params().pipeline_status, async_pipeline);
+        CHECK_EXPECTED_AS_STATUS(multi_plane_splitter_expected);
+        multi_plane_splitter = multi_plane_splitter_expected.release();
+
+        async_pipeline->add_element_to_pipeline(multi_plane_splitter);
+        CHECK_SUCCESS(PipelinePad::link_pads(entry_queue_elem, multi_plane_splitter));
+    }
+
+    uint8_t plane_index = 0;
+    for (const auto &stream_name : stream_names) {
+        CHECK(contains(named_stream_infos, stream_name), HAILO_INTERNAL_FAILURE);
+        const auto &input_stream_info = named_stream_infos.at(stream_name);
+
+        auto src_format = inputs_formats.at(vstream_name);
+        auto sink_index_expected = async_pipeline->get_async_hw_element()->get_sink_index_from_input_stream_name(stream_name);
+        CHECK_EXPECTED_AS_STATUS(sink_index_expected);
+        auto sink_index = static_cast<uint8_t>(sink_index_expected.release());
+
+        if(is_multi_planar) {
+            auto post_split_push_queue = add_push_queue_element(
+                PipelineObject::create_element_name("PostSplitPushQueue", stream_name, sink_index),
+                async_pipeline, nullptr);
+            CHECK_EXPECTED_AS_STATUS(post_split_push_queue);
+            CHECK_SUCCESS(PipelinePad::link_pads(multi_plane_splitter, post_split_push_queue.value(), plane_index++));
+
+            last_element_connected_to_pipeline = post_split_push_queue.value();
+
+            /* In multi-planar case, the format order of each plane (stream) is determined by the ll-stream's order.
+               Type and flags are determined by the vstream params */
+            src_format.order = input_stream_info.format.order;
+        }
+
+        auto should_transform = InputTransformContext::is_transformation_required(input_stream_info.shape,
+            src_format, input_stream_info.hw_shape, input_stream_info.format,
+            std::vector<hailo_quant_info_t>(1, input_stream_info.quant_info)); // Inputs always have single quant_info
+        CHECK_EXPECTED_AS_STATUS(should_transform);
+
+        if (should_transform.value()) {
+            bool is_dma_able = true;
+            auto pre_infer_elem = PreInferElement::create(input_stream_info.shape, src_format,
+                input_stream_info.hw_shape, input_stream_info.format, { input_stream_info.quant_info },
+                PipelineObject::create_element_name("PreInferElement", stream_name, input_stream_info.index),
+                async_pipeline->get_build_params(), PipelineDirection::PUSH, is_dma_able, async_pipeline);
+            CHECK_EXPECTED_AS_STATUS(pre_infer_elem);
+            async_pipeline->add_element_to_pipeline(pre_infer_elem.value());
+            CHECK_SUCCESS(PipelinePad::link_pads(last_element_connected_to_pipeline, pre_infer_elem.value()));
+
+            auto queue_elem = add_push_queue_element(PipelineObject::create_element_name("PushQueueElement", stream_name, input_stream_info.index),
+                async_pipeline, pre_infer_elem.value());
+            CHECK_EXPECTED_AS_STATUS(queue_elem);
+            CHECK_SUCCESS(PipelinePad::link_pads(pre_infer_elem.value(), queue_elem.value()));
+            CHECK_SUCCESS(PipelinePad::link_pads(queue_elem.value(), async_pipeline->get_async_hw_element(), 0, sink_index));
+        } else {
+            CHECK_SUCCESS(PipelinePad::link_pads(last_element_connected_to_pipeline, async_pipeline->get_async_hw_element(), 0, sink_index));
+        }
+    }
+
+    return HAILO_SUCCESS;
+}
+
+hailo_status PipelineBuilder::create_pre_async_hw_elements(std::shared_ptr<ConfiguredNetworkGroup> net_group,
+    const std::unordered_map<std::string, hailo_format_t> &inputs_formats, const std::unordered_map<std::string, hailo_stream_info_t> &named_stream_infos,
+    std::shared_ptr<AsyncPipeline> async_pipeline)
+{
+    for(const auto &input : inputs_formats) {
+        auto stream_names_under_vstream = net_group->get_stream_names_from_vstream_name(input.first);
+        CHECK_EXPECTED_AS_STATUS(stream_names_under_vstream);
+
+        auto status = create_pre_async_hw_elements_per_input(net_group, stream_names_under_vstream.release(), inputs_formats,
+            named_stream_infos, async_pipeline);
+        CHECK_SUCCESS(status);
+    }
+    return HAILO_SUCCESS;
+}
+
+Expected<std::shared_ptr<PostInferElement>> PipelineBuilder::add_post_infer_element(const hailo_format_t &output_format,
+    const hailo_nms_info_t &nms_info, std::shared_ptr<AsyncPipeline> async_pipeline, const hailo_3d_image_shape_t &src_image_shape,
+    const hailo_format_t &src_format, const hailo_3d_image_shape_t &dst_image_shape, const std::vector<hailo_quant_info_t> &dst_quant_infos,
+    bool is_last_copy_element, std::shared_ptr<PipelineElement> final_elem, const uint32_t final_elem_source_index)
+{
+    auto queue_elem = add_push_queue_element(PipelineObject::create_element_name("PushQueueElement", final_elem->name(), static_cast<uint8_t>(final_elem_source_index)),
+        async_pipeline, final_elem, final_elem_source_index);
+    CHECK_EXPECTED(queue_elem);
+
+    auto post_infer_elem = PostInferElement::create(src_image_shape, src_format, dst_image_shape, output_format,
+        dst_quant_infos, nms_info, PipelineObject::create_element_name("PostInferElement",
+        final_elem->name(), static_cast<uint8_t>(final_elem_source_index)), async_pipeline->get_build_params(),
+        PipelineDirection::PUSH, is_last_copy_element, async_pipeline);
+    CHECK_EXPECTED(post_infer_elem);
+
+    async_pipeline->add_element_to_pipeline(post_infer_elem.value());
+
+    CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(queue_elem.value(), post_infer_elem.value()));
+    return post_infer_elem.release();
+}
+
+Expected<std::shared_ptr<AsyncPushQueueElement>> PipelineBuilder::add_push_queue_element(const std::string &queue_name, std::shared_ptr<AsyncPipeline> async_pipeline,
+    std::shared_ptr<PipelineElement> final_elem, const uint32_t final_elem_source_index)
+{
+    auto push_queue_elem = AsyncPushQueueElement::create(queue_name, async_pipeline->get_build_params(), async_pipeline, PipelineDirection::PUSH);
+    CHECK_EXPECTED(push_queue_elem);
+
+    async_pipeline->add_element_to_pipeline(push_queue_elem.value());
+
+    // final elem will be nullptr in case it's the first element in pipeline
+    if (final_elem) {
+        CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(final_elem, push_queue_elem.value(), final_elem_source_index, 0));
+    }
+
+    return push_queue_elem.release();
+}
+
+Expected<std::shared_ptr<ConvertNmsToDetectionsElement>> PipelineBuilder::add_nms_to_detections_convert_element(std::shared_ptr<AsyncPipeline> async_pipeline,
+    const std::string &output_stream_name, uint8_t stream_index, const std::string &element_name, const net_flow::PostProcessOpMetadataPtr &op_metadata,
+    const bool is_last_copy_element, std::shared_ptr<PipelineElement> final_elem, const uint32_t final_elem_index)
+{
+    auto metadata = std::dynamic_pointer_cast<net_flow::NmsOpMetadata>(op_metadata);
+    assert(nullptr != metadata);
+
+    auto nms_to_detections_element = ConvertNmsToDetectionsElement::create(metadata->nms_info(),
+        PipelineObject::create_element_name(element_name, output_stream_name, stream_index),
+        async_pipeline->get_build_params(), PipelineDirection::PUSH, is_last_copy_element, async_pipeline);
+    CHECK_EXPECTED(nms_to_detections_element);
+
+    async_pipeline->add_element_to_pipeline(nms_to_detections_element.value());
+
+    CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(final_elem, nms_to_detections_element.value(), final_elem_index, 0));
+    return nms_to_detections_element.release();
+}
+
+Expected<std::shared_ptr<RemoveOverlappingBboxesElement>> PipelineBuilder::add_remove_overlapping_bboxes_element(std::shared_ptr<AsyncPipeline> async_pipeline,
+    const std::string &output_stream_name, uint8_t stream_index, const std::string &element_name, const net_flow::PostProcessOpMetadataPtr &op_metadata,
+    const bool is_last_copy_element, std::shared_ptr<PipelineElement> final_elem, const uint32_t final_elem_index)
+{
+    auto metadata = std::dynamic_pointer_cast<net_flow::NmsOpMetadata>(op_metadata);
+    assert(nullptr != metadata);
+
+    auto remove_overlapping_bboxes_element = RemoveOverlappingBboxesElement::create(metadata->nms_config(),
+        PipelineObject::create_element_name(element_name, output_stream_name, stream_index),
+        async_pipeline->get_build_params(), PipelineDirection::PUSH, is_last_copy_element, async_pipeline);
+    CHECK_EXPECTED(remove_overlapping_bboxes_element);
+
+    async_pipeline->add_element_to_pipeline(remove_overlapping_bboxes_element.value());
+
+    CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(final_elem, remove_overlapping_bboxes_element.value(), final_elem_index, 0));
+    return remove_overlapping_bboxes_element;
+}
+
+Expected<std::shared_ptr<FillNmsFormatElement>> PipelineBuilder::add_fill_nms_format_element(std::shared_ptr<AsyncPipeline> async_pipeline,
+    const std::string &output_stream_name, uint8_t stream_index, const std::string &element_name, const net_flow::PostProcessOpMetadataPtr &op_metadata,
+    const hailo_format_t &output_format, const bool is_last_copy_element, std::shared_ptr<PipelineElement> final_elem, const uint32_t final_elem_index)
+{
+    auto metadata = std::dynamic_pointer_cast<net_flow::NmsOpMetadata>(op_metadata);
+    assert(nullptr != metadata);
+
+    auto fill_nms_format_element = FillNmsFormatElement::create(metadata->nms_info(), output_format, metadata->nms_config(),
+        PipelineObject::create_element_name(element_name, output_stream_name, stream_index),
+        async_pipeline->get_build_params(), PipelineDirection::PUSH, is_last_copy_element, async_pipeline);
+    CHECK_EXPECTED(fill_nms_format_element);
+
+    async_pipeline->add_element_to_pipeline(fill_nms_format_element.value());
+
+    CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(final_elem, fill_nms_format_element.value(), final_elem_index, 0));
+    return fill_nms_format_element;
+}
+
+Expected<std::shared_ptr<LastAsyncElement>> PipelineBuilder::add_last_async_element(std::shared_ptr<AsyncPipeline> async_pipeline,
+    const std::string &output_format_name, std::shared_ptr<PipelineElement> final_elem, const uint32_t final_elem_source_index)
+{
+    auto last_async_element = LastAsyncElement::create(PipelineObject::create_element_name("LastAsyncElement",
+        final_elem->name(), static_cast<uint8_t>(final_elem_source_index)), async_pipeline->get_build_params(), async_pipeline);
+    CHECK_EXPECTED(last_async_element);
+
+    async_pipeline->add_element_to_pipeline(last_async_element.value());
+    CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(final_elem, last_async_element.value(), final_elem_source_index, 0));
+
+    async_pipeline->add_last_element(last_async_element.value(), output_format_name);
+
+    return last_async_element.release();
+}
+
+Expected<std::pair<std::string, hailo_format_t>> PipelineBuilder::get_output_format_from_edge_info_name(const std::string &edge_info_name,
+    const std::unordered_map<std::string, hailo_format_t> &outputs_formats)
+{
+    for (auto &output_format : outputs_formats) {
+        if (output_format.first == edge_info_name) {
+            return std::pair<std::string, hailo_format_t>(output_format);
+        }
+    }
+    return make_unexpected(HAILO_NOT_FOUND);
+}
+
+hailo_status PipelineBuilder::add_output_demux_flow(const std::string &output_stream_name, std::shared_ptr<AsyncPipeline> async_pipeline,
+    const std::unordered_map<std::string, hailo_format_t> &outputs_formats, std::shared_ptr<ConfiguredNetworkGroup> net_group,
+    const std::unordered_map<std::string, hailo_stream_info_t> &named_stream_infos)
+{
+    auto source_index = async_pipeline->get_async_hw_element()->get_source_index_from_output_stream_name(output_stream_name);
+    CHECK_EXPECTED_AS_STATUS(source_index);
+    const bool is_dma_able_hw_async = true;
+    // async_hw element must be filled with buffers like an edge element in order to support large batch sizes
+    auto status = async_pipeline->get_async_hw_element()->fill_buffer_pool(is_dma_able_hw_async, async_pipeline->get_build_params().buffer_pool_size_edges, *source_index);
+    CHECK_SUCCESS(status);
+
+    CHECK(contains(named_stream_infos, output_stream_name), HAILO_INTERNAL_FAILURE);
+    const auto &stream_info = named_stream_infos.at(output_stream_name);
+
+    auto layer_info = net_group->get_layer_info(output_stream_name);
+    CHECK_EXPECTED_AS_STATUS(layer_info);
+
+    auto expected_demuxer = OutputDemuxerBase::create(stream_info.hw_frame_size, *layer_info.value());
+    CHECK_EXPECTED_AS_STATUS(expected_demuxer);
+
+    auto demuxer_ptr = make_shared_nothrow<OutputDemuxerBase>(expected_demuxer.release());
+    CHECK_ARG_NOT_NULL(demuxer_ptr);
+
+    auto demux_elem = TransformDemuxElement::create(demuxer_ptr,
+        PipelineObject::create_element_name("TransformDemuxElement", output_stream_name, stream_info.index),
+        async_pipeline->get_build_params(), PipelineDirection::PUSH, true, async_pipeline);
+    CHECK_EXPECTED_AS_STATUS(demux_elem);
+    async_pipeline->add_element_to_pipeline(demux_elem.value());
+
+    CHECK_SUCCESS(PipelinePad::link_pads(async_pipeline->get_async_hw_element(), demux_elem.value(), *source_index, 0));
+
+    uint8_t i = 0;
+    for (auto &edge_info : demuxer_ptr->get_edges_stream_info()) {
+        auto output_format_expected = get_output_format_from_edge_info_name(edge_info.name, outputs_formats);
+        CHECK_EXPECTED_AS_STATUS(output_format_expected);
+
+        auto demux_queue_elem = add_push_queue_element(PipelineObject::create_element_name("PushQueueElement_demux", edge_info.name, i), async_pipeline,
+            demux_elem.value(), i);
+        CHECK_EXPECTED_AS_STATUS(demux_queue_elem);
+
+        auto should_transform = OutputTransformContext::is_transformation_required(edge_info.hw_shape, 
+            edge_info.format, edge_info.shape, output_format_expected.value().second, std::vector<hailo_quant_info_t>{edge_info.quant_info}); // TODO: Get quant vector (HRT-11077)
+        CHECK_EXPECTED_AS_STATUS(should_transform);
+
+        if (should_transform.value()) {
+            status = demux_elem.value()->fill_buffer_pool(false, async_pipeline->get_build_params().buffer_pool_size_internal, i);
+            CHECK_SUCCESS(status);
+
+            auto post_infer_elem = add_post_infer_element(output_format_expected.value().second, edge_info.nms_info,
+                async_pipeline, edge_info.hw_shape, edge_info.format, edge_info.shape, {edge_info.quant_info}, true, demux_queue_elem.value());
+            CHECK_EXPECTED_AS_STATUS(post_infer_elem);
+
+            auto last_async_element = add_last_async_element(async_pipeline, output_format_expected.value().first, post_infer_elem.value());
+            CHECK_EXPECTED_AS_STATUS(last_async_element);
+        } else {
+            auto last_async_element = add_last_async_element(async_pipeline, output_format_expected.value().first, demux_queue_elem.value());
+            CHECK_EXPECTED_AS_STATUS(last_async_element);
+        }
+        i++;
+    }
+    return HAILO_SUCCESS;
+}
+
+Expected<bool> PipelineBuilder::should_transform(const hailo_stream_info_t &stream_info, const std::vector<hailo_quant_info_t> &stream_quant_infos, 
+    const hailo_format_t &output_format)
+{
+    auto should_transform = OutputTransformContext::is_transformation_required(stream_info.hw_shape,
+        stream_info.format, stream_info.shape, output_format, stream_quant_infos);
+    CHECK_EXPECTED(should_transform);
+    return should_transform.release();
+}
+
+hailo_status PipelineBuilder::add_nms_fuse_flow(const std::vector<std::string> &output_streams_names,
+    const std::pair<std::string, hailo_format_t> &output_format, std::shared_ptr<AsyncPipeline> async_pipeline,
+    const std::unordered_map<std::string, hailo_stream_info_t> &named_stream_infos)
+{
+    const bool is_dma_able_hw_async = true;
+    for (const auto &stream_name : output_streams_names) {
+        auto output_index = async_pipeline->get_async_hw_element()->get_source_index_from_output_stream_name(stream_name);
+        CHECK_EXPECTED_AS_STATUS(output_index);
+        // async_hw element must be filled with buffers like an edge element in order to support large batch sizes
+        auto status = async_pipeline->get_async_hw_element()->fill_buffer_pool(is_dma_able_hw_async, async_pipeline->get_build_params().buffer_pool_size_edges, *output_index);
+        CHECK_SUCCESS(status);
+    }
+
+    std::vector<hailo_nms_info_t> nms_infos;
+    nms_infos.reserve(output_streams_names.size());
+    hailo_stream_info_t first_defused_stream_info = {};
+    for (const auto &stream_name : output_streams_names) {
+        CHECK(contains(named_stream_infos, stream_name), HAILO_INTERNAL_FAILURE);
+        const auto &curr_stream_info = named_stream_infos.at(stream_name);
+
+        CHECK(curr_stream_info.nms_info.defuse_info.class_group_index <= output_streams_names.size(),
+            HAILO_INVALID_ARGUMENT, "Not all defused nms outputs were grouped correctly!");
+        nms_infos.emplace_back(curr_stream_info.nms_info);
+        first_defused_stream_info = curr_stream_info;
+    }
+
+    // To get the fused layer name and src stream format, we use the stream info of one of the defuses
+    auto fused_layer_name = first_defused_stream_info.nms_info.defuse_info.original_name;
+
+    bool is_last_copy_element = true;
+    auto nms_elem = NmsMuxElement::create(nms_infos, PipelineObject::create_element_name("NmsMuxElement", fused_layer_name, 0),
+        async_pipeline->get_build_params(), PipelineDirection::PUSH, is_last_copy_element, async_pipeline);
+    CHECK_EXPECTED_AS_STATUS(nms_elem);
+
+    async_pipeline->add_element_to_pipeline(nms_elem.value());
+
+    uint32_t i = 0;
+    for (const auto &stream_name : output_streams_names) {
+        CHECK(contains(named_stream_infos, stream_name), HAILO_INTERNAL_FAILURE);
+        const auto &curr_stream_info = named_stream_infos.at(stream_name);
+
+        auto output_index = async_pipeline->get_async_hw_element()->get_source_index_from_output_stream_name(stream_name);
+        CHECK_EXPECTED_AS_STATUS(output_index);
+
+        auto queue_elem = add_push_queue_element(PipelineObject::create_element_name("PushQueueElement_nms_source", curr_stream_info.name, curr_stream_info.index),
+            async_pipeline, async_pipeline->get_async_hw_element(), output_index.value());
+        CHECK_EXPECTED_AS_STATUS(queue_elem);
+
+        CHECK_SUCCESS(PipelinePad::link_pads(queue_elem.value(), nms_elem.value(), 0, i));
+        i++;
+    }
+
+    // TODO(HRT-11078): Fix multi qp for fused NMS
+    auto stream_quant_infos = std::vector<hailo_quant_info_t>(1, first_defused_stream_info.quant_info);
+
+    auto should_transform_expected = should_transform(first_defused_stream_info, stream_quant_infos, output_format.second);
+    CHECK_EXPECTED_AS_STATUS(should_transform_expected);
+
+    if (should_transform_expected.value()) {
+        auto fused_layer_nms_info = nms_elem.value()->get_fused_nms_info();
+
+        hailo_status status = nms_elem.value()->fill_buffer_pool(false, async_pipeline->get_build_params().buffer_pool_size_internal, first_defused_stream_info.name);
+        CHECK_SUCCESS(status);
+
+        auto post_infer_elem = add_post_infer_element(output_format.second, fused_layer_nms_info, async_pipeline,
+            first_defused_stream_info.hw_shape, first_defused_stream_info.format, first_defused_stream_info.shape, stream_quant_infos, true, nms_elem.value());
+        CHECK_EXPECTED_AS_STATUS(post_infer_elem);
+
+        auto last_async_element = add_last_async_element(async_pipeline, output_format.first, post_infer_elem.value());
+        CHECK_EXPECTED_AS_STATUS(last_async_element);
+    } else {
+        auto last_async_element = add_last_async_element(async_pipeline, output_format.first, nms_elem.value());
+        CHECK_EXPECTED_AS_STATUS(last_async_element);
+    }
+
+    return HAILO_SUCCESS;
+}
+
+hailo_status PipelineBuilder::add_softmax_flow(std::shared_ptr<AsyncPipeline> async_pipeline, const std::vector<std::string> &output_streams_names,
+    const std::pair<std::string, hailo_format_t> &output_format, const net_flow::PostProcessOpMetadataPtr &softmax_op_metadata,
+    const std::unordered_map<std::string, hailo_stream_info_t> &named_stream_infos)
+{
+    assert(output_streams_names.size() == 1);
+    const auto &stream_name = *output_streams_names.begin();
+
+    CHECK(contains(named_stream_infos, stream_name), HAILO_INTERNAL_FAILURE);
+    const auto &stream_info = named_stream_infos.at(stream_name);
+
+    auto updated_output_format = output_format;
+
+    auto hw_async_elem_index = async_pipeline->get_async_hw_element()->get_source_index_from_output_stream_name(stream_name);
+    CHECK_EXPECTED_AS_STATUS(hw_async_elem_index);
+
+    auto op_input_format = softmax_op_metadata->inputs_metadata().begin()->second.format;
+    auto output_format_expanded = net_flow::SoftmaxOpMetadata::expand_output_format_autos(updated_output_format.second, op_input_format);
+
+    // TODO (HRT-11078): Fix multi qp for PP
+    auto stream_quant_infos = std::vector<hailo_quant_info_t>(1, stream_info.quant_info);
+
+    auto post_infer_elem = add_post_infer_element(output_format_expanded, {}, async_pipeline, stream_info.hw_shape, stream_info.format,
+        stream_info.shape, stream_quant_infos, false, async_pipeline->get_async_hw_element(), hw_async_elem_index.value());
+    CHECK_EXPECTED_AS_STATUS(post_infer_elem);
+
+    auto queue_elem = add_push_queue_element(PipelineObject::create_element_name("PushQueueElement_softmax", async_pipeline->get_async_hw_element()->name(),
+        static_cast<uint8_t>(hw_async_elem_index.value())), async_pipeline, post_infer_elem.value());
+    CHECK_EXPECTED_AS_STATUS(queue_elem);
+
+    // Updating metadata according to user request
+    // Currently softmax only supports inputs to be float32 and order NHWC or NC
+    auto updated_inputs_metadata = softmax_op_metadata.get()->inputs_metadata();
+    updated_inputs_metadata.begin()->second.format = output_format_expanded;
+    auto updated_outputs_metadata = softmax_op_metadata.get()->outputs_metadata();
+    updated_outputs_metadata.begin()->second.format = output_format_expanded;
+    auto metadata = std::dynamic_pointer_cast<net_flow::SoftmaxOpMetadata>(softmax_op_metadata);
+    assert(nullptr != metadata);
+    metadata->set_outputs_metadata(updated_outputs_metadata);
+    metadata->set_inputs_metadata(updated_inputs_metadata);
+    CHECK_SUCCESS(metadata->validate_format_info());
+
+    auto op_expected = net_flow::SoftmaxPostProcessOp::create(metadata);
+    CHECK_EXPECTED_AS_STATUS(op_expected);
+
+    auto softmax_op = op_expected.release();
+    auto softmax_element = SoftmaxPostProcessElement::create(softmax_op,
+        PipelineObject::create_element_name("SoftmaxPostProcessElement", stream_name, stream_info.index),
+        async_pipeline->get_build_params(), PipelineDirection::PUSH, true, async_pipeline);
+    CHECK_EXPECTED_AS_STATUS(softmax_element);
+
+    async_pipeline->add_element_to_pipeline(softmax_element.value());
+    CHECK_SUCCESS(PipelinePad::link_pads(queue_elem.value(), softmax_element.value()));
+
+    auto last_async_element = add_last_async_element(async_pipeline, updated_output_format.first, softmax_element.value());
+    CHECK_EXPECTED_AS_STATUS(last_async_element);
+
+    return HAILO_SUCCESS;
+}
+
+hailo_status PipelineBuilder::add_argmax_flow(std::shared_ptr<AsyncPipeline> async_pipeline, const std::vector<std::string> &output_streams_names,
+    const std::pair<std::string, hailo_format_t> &output_format, const net_flow::PostProcessOpMetadataPtr &argmax_op_metadata,
+    const std::unordered_map<std::string, hailo_stream_info_t> &named_stream_infos)
+{
+    assert(output_streams_names.size() == 1);
+    const auto &stream_name = *output_streams_names.begin();
+
+    CHECK(contains(named_stream_infos, stream_name), HAILO_INTERNAL_FAILURE);
+    const auto &stream_info = named_stream_infos.at(stream_name);
+
+    auto hw_async_elem_index = async_pipeline->get_async_hw_element()->get_source_index_from_output_stream_name(stream_name);
+    CHECK_EXPECTED_AS_STATUS(hw_async_elem_index);
+
+    auto queue_elem = add_push_queue_element(PipelineObject::create_element_name("PushQueueElement_argmax", async_pipeline->get_async_hw_element()->name(),
+        static_cast<uint8_t>(hw_async_elem_index.value())), async_pipeline, async_pipeline->get_async_hw_element());
+    CHECK_EXPECTED_AS_STATUS(queue_elem);
+
+    // Updating metadata according to user request
+    auto op_input_format = argmax_op_metadata->inputs_metadata().begin()->second.format;
+    auto updated_outputs_metadata = argmax_op_metadata.get()->outputs_metadata();
+    updated_outputs_metadata.begin()->second.format = net_flow::ArgmaxOpMetadata::expand_output_format_autos(output_format.second, op_input_format);;
+    auto metadata = std::dynamic_pointer_cast<net_flow::ArgmaxOpMetadata>(argmax_op_metadata);
+    assert(nullptr != metadata);
+    metadata->set_outputs_metadata(updated_outputs_metadata);
+    CHECK_SUCCESS(metadata->validate_format_info());
+
+    auto op_expected = net_flow::ArgmaxPostProcessOp::create(metadata);
+    CHECK_EXPECTED_AS_STATUS(op_expected);
+    auto argmax_op = op_expected.release();
+    bool is_last_copy_element = true;
+
+    auto argmax_element = ArgmaxPostProcessElement::create(argmax_op,
+        PipelineObject::create_element_name("ArgmaxPostProcessElement", stream_name, stream_info.index),
+        async_pipeline->get_build_params(), PipelineDirection::PUSH, is_last_copy_element, async_pipeline);
+    CHECK_EXPECTED_AS_STATUS(argmax_element);
+
+    async_pipeline->add_element_to_pipeline(argmax_element.value());
+    CHECK_SUCCESS(PipelinePad::link_pads(queue_elem.value(), argmax_element.value()));
+
+    auto last_async_element = add_last_async_element(async_pipeline, output_format.first, argmax_element.value());
+    CHECK_EXPECTED_AS_STATUS(last_async_element);
+
+    return HAILO_SUCCESS;
+}
+
+hailo_status PipelineBuilder::add_nms_flow(std::shared_ptr<AsyncPipeline> async_pipeline, const std::vector<std::string> &output_streams_names,
+    const std::pair<std::string, hailo_format_t> &output_format, const std::shared_ptr<hailort::net_flow::Op> &nms_op,
+    const hailo_vstream_info_t &vstream_info, const std::unordered_map<std::string, hailo_stream_info_t> &named_stream_infos)
+{
+    const auto &first_stream_name = *output_streams_names.begin();
+    CHECK(contains(named_stream_infos, first_stream_name), HAILO_INTERNAL_FAILURE);
+    const auto &first_stream_info = named_stream_infos.at(first_stream_name);
+
+    CHECK(output_format.second.type == HAILO_FORMAT_TYPE_FLOAT32, HAILO_INVALID_ARGUMENT,
+        "NMS output format type must be HAILO_FORMAT_TYPE_FLOAT32");
+    CHECK(HailoRTCommon::is_nms(output_format.second.order), HAILO_INVALID_ARGUMENT,
+        "NMS output format order must be HAILO_FORMAT_ORDER_HAILO_NMS or HAILO_FORMAT_ORDER_HAILO_NMS_WITH_BYTE_MASK");
+
+    std::unordered_map<std::string, net_flow::BufferMetaData> inputs_metadata;
+    std::unordered_map<std::string, net_flow::BufferMetaData> outputs_metadata;
+    for (uint32_t i = 0; i < output_streams_names.size(); ++i) {
+        const auto &curr_stream_name = output_streams_names[i];
+        CHECK(contains(named_stream_infos, curr_stream_name), HAILO_INTERNAL_FAILURE);
+        const auto &curr_stream_info = named_stream_infos.at(curr_stream_name);
+
+        net_flow::BufferMetaData input_metadata = {
+            curr_stream_info.shape,
+            curr_stream_info.hw_shape,
+            curr_stream_info.format,
+            curr_stream_info.quant_info
+        };
+        inputs_metadata.insert({curr_stream_info.name, input_metadata});
+    }
+
+    assert(nms_op->outputs_metadata().size() == 1);
+
+    net_flow::BufferMetaData output_metadata = {
+        vstream_info.shape,
+        vstream_info.shape,
+        vstream_info.format,
+        vstream_info.quant_info
+    };
+    outputs_metadata.insert({nms_op->outputs_metadata().begin()->first, output_metadata});
+
+    auto nms_elem = NmsPostProcessMuxElement::create(nms_op, PipelineObject::create_element_name("NmsPostProcessMuxElement", nms_op->get_name(), 0),
+        async_pipeline->get_build_params(), PipelineDirection::PUSH, true, async_pipeline);
+    CHECK_EXPECTED_AS_STATUS(nms_elem);
+
+    async_pipeline->add_element_to_pipeline(nms_elem.value());
+
+    hailo_format_t nms_src_format = {};
+    nms_src_format.flags = HAILO_FORMAT_FLAGS_NONE;
+    nms_src_format.order = HAILO_FORMAT_ORDER_NHCW;
+    nms_src_format.type = first_stream_info.format.type;
+
+    for (uint32_t i = 0; i < output_streams_names.size(); ++i) {
+        const auto &curr_stream_name = output_streams_names[i];
+        CHECK(contains(named_stream_infos, curr_stream_name), HAILO_INTERNAL_FAILURE);
+        const auto &curr_stream_info = named_stream_infos.at(curr_stream_name);
+
+        // TODO (HRT-11052): Fix multi qp for NMS
+        auto stream_quant_infos = std::vector<hailo_quant_info_t>(1, curr_stream_info.quant_info); //output_stream_base->get_quant_infos();
+
+        auto should_transform = OutputTransformContext::is_transformation_required(curr_stream_info.hw_shape, curr_stream_info.format,
+            curr_stream_info.hw_shape, nms_src_format, stream_quant_infos);
+        CHECK_EXPECTED_AS_STATUS(should_transform);
+
+        CHECK(!(should_transform.value()), HAILO_INVALID_ARGUMENT, "Unexpected transformation required for {}", curr_stream_name);
+
+        auto source_id = async_pipeline->get_async_hw_element()->get_source_index_from_output_stream_name(curr_stream_name);
+        CHECK_EXPECTED_AS_STATUS(source_id);
+
+        auto nms_source_queue_elem = add_push_queue_element(PipelineObject::create_element_name("PushQueueElement_nms_source", curr_stream_info.name, curr_stream_info.index),
+            async_pipeline, async_pipeline->get_async_hw_element(), source_id.value());
+        CHECK_EXPECTED_AS_STATUS(nms_source_queue_elem);
+
+        CHECK_SUCCESS(PipelinePad::link_pads(nms_source_queue_elem.value(), nms_elem.value(), 0, i));
+        nms_elem.value()->add_sink_name(curr_stream_name);
+    }
+    auto last_async_element = add_last_async_element(async_pipeline, output_format.first, nms_elem.value());
+    CHECK_EXPECTED_AS_STATUS(last_async_element);
+
+    return HAILO_SUCCESS;
+}
+
+hailo_status PipelineBuilder::add_iou_flow( std::shared_ptr<AsyncPipeline> async_pipeline, const std::vector<std::string> &output_streams_names,
+    const std::pair<std::string, hailo_format_t> &output_format, const net_flow::PostProcessOpMetadataPtr &iou_op_metadata,
+    const std::unordered_map<std::string, hailo_stream_info_t> &named_stream_infos)
+{
+    assert(output_streams_names.size() == 1);
+    auto output_stream_name = output_streams_names[0];
+    CHECK(contains(named_stream_infos, output_stream_name), HAILO_INTERNAL_FAILURE);
+    const auto &output_stream_info = named_stream_infos.at(output_stream_name);
+
+    auto output_index = async_pipeline->get_async_hw_element()->get_source_index_from_output_stream_name(output_stream_name);
+        CHECK_EXPECTED_AS_STATUS(output_index);
+
+    auto hw_read_queue_element = add_push_queue_element(PipelineObject::create_element_name("PushQueueElement_hw_read", output_stream_name, output_stream_info.index),
+        async_pipeline, async_pipeline->get_async_hw_element() , output_index.value());
+    CHECK_EXPECTED_AS_STATUS(hw_read_queue_element);
+
+    // TODO (HRT-11078): Fix multi qp for PP
+    auto stream_quant_infos = std::vector<hailo_quant_info_t>(1, output_stream_info.quant_info); //output_stream_base->get_quant_infos();
+
+    auto post_infer_element = add_post_infer_element(output_format.second, output_stream_info.nms_info,
+        async_pipeline, output_stream_info.hw_shape, output_stream_info.format, output_stream_info.shape, stream_quant_infos, false, hw_read_queue_element.value());
+    CHECK_EXPECTED_AS_STATUS(post_infer_element);
+
+    auto pre_nms_convert_queue_element = add_push_queue_element(PipelineObject::create_element_name("PushQueueElement_pre_nms_convert", output_stream_name,
+        output_stream_info.index), async_pipeline, post_infer_element.value());
+    CHECK_EXPECTED_AS_STATUS(pre_nms_convert_queue_element);
+
+    auto nms_to_detections_element = add_nms_to_detections_convert_element(async_pipeline, output_stream_name, output_stream_info.index,
+        "NmsFormatToDetectionsElement", iou_op_metadata, false, pre_nms_convert_queue_element.value());
+    CHECK_EXPECTED_AS_STATUS(nms_to_detections_element);
+
+    auto pre_remove_overlapping_bboxes_element_queue_element = add_push_queue_element(PipelineObject::create_element_name("PushQueueElement_pre_bboxes_removing",
+        output_stream_name, output_stream_info.index), async_pipeline, nms_to_detections_element.value());
+    CHECK_EXPECTED_AS_STATUS(pre_remove_overlapping_bboxes_element_queue_element);
+
+    auto remove_overlapping_bboxes_element = add_remove_overlapping_bboxes_element(async_pipeline, output_stream_name, output_stream_info.index,
+        "RemoveOverlappingBboxesElement", iou_op_metadata, false, pre_remove_overlapping_bboxes_element_queue_element.value());
+    CHECK_EXPECTED_AS_STATUS(remove_overlapping_bboxes_element);
+
+    auto pre_fill_nms_format_element_queue_element = add_push_queue_element(PipelineObject::create_element_name("PushQueueElement_pre_fill_nms_format", output_stream_name,
+        output_stream_info.index), async_pipeline, remove_overlapping_bboxes_element.value());
+    CHECK_EXPECTED_AS_STATUS(pre_fill_nms_format_element_queue_element);
+
+    auto fill_nms_format_element = add_fill_nms_format_element(async_pipeline, output_stream_name, output_stream_info.index,
+        "FillNmsFormatElement", iou_op_metadata, output_format.second, true, pre_fill_nms_format_element_queue_element.value());
+    CHECK_EXPECTED_AS_STATUS(fill_nms_format_element);
+
+    auto last_async_element = add_last_async_element(async_pipeline, output_format.first, fill_nms_format_element.value());
+    CHECK_EXPECTED_AS_STATUS(last_async_element);
+
+    return HAILO_SUCCESS;
+}
+
+hailo_status PipelineBuilder::add_nms_flows(std::shared_ptr<AsyncPipeline> async_pipeline, const std::vector<std::string> &output_streams_names,
+    const std::pair<std::string, hailo_format_t> &output_format, const net_flow::PostProcessOpMetadataPtr &op_metadata,
+    const std::vector<hailo_vstream_info_t> &vstreams_infos, const std::unordered_map<std::string, hailo_stream_info_t> &named_stream_infos)
+{
+    assert(1 <= op_metadata->outputs_metadata().size());
+    auto updated_outputs_metadata = op_metadata->outputs_metadata();
+    std::pair<std::string, hailo_format_t> expanded_output_format = {output_format.first,
+        net_flow::NmsOpMetadata::expand_output_format_autos_by_op_type(output_format.second, op_metadata->type())};
+    updated_outputs_metadata.begin()->second.format = expanded_output_format.second;
+
+    op_metadata->set_outputs_metadata(updated_outputs_metadata);
+    CHECK_SUCCESS(op_metadata->validate_format_info());
+    std::shared_ptr<hailort::net_flow::Op> op;
+
+    switch (op_metadata->type()) {
+    case net_flow::OperationType::IOU:
+        return add_iou_flow(async_pipeline, output_streams_names, expanded_output_format, op_metadata, named_stream_infos);
+
+    case net_flow::OperationType::YOLOX:
+    {
+        auto metadata = std::dynamic_pointer_cast<net_flow::YoloxOpMetadata>(op_metadata);
+        assert(nullptr != metadata);
+        auto op_expected = net_flow::YOLOXPostProcessOp::create(metadata);
+        CHECK_EXPECTED_AS_STATUS(op_expected);
+        op = op_expected.release();
+        break;
+    }
+    case net_flow::OperationType::YOLOV8:
+    {
+        auto metadata = std::dynamic_pointer_cast<net_flow::Yolov8OpMetadata>(op_metadata);
+        assert(nullptr != metadata);
+        auto op_expected = net_flow::YOLOV8PostProcessOp::create(metadata);
+        CHECK_EXPECTED_AS_STATUS(op_expected);
+        op = op_expected.release();
+        break;
+    }
+    case net_flow::OperationType::YOLOV5:
+    {
+        auto metadata = std::dynamic_pointer_cast<net_flow::Yolov5OpMetadata>(op_metadata);
+        assert(nullptr != metadata);
+        auto op_expected = net_flow::YOLOv5PostProcessOp::create(metadata);
+        CHECK_EXPECTED_AS_STATUS(op_expected);
+        op = op_expected.release();
+        break;
+    }
+    case (net_flow::OperationType::YOLOV5SEG):
+    {
+        auto metadata = std::dynamic_pointer_cast<net_flow::Yolov5SegOpMetadata>(op_metadata);
+        assert(nullptr != metadata);
+        auto op_expected = net_flow::Yolov5SegPostProcess::create(metadata);
+        CHECK_EXPECTED_AS_STATUS(op_expected);
+        op = op_expected.release();
+        break;
+    }
+    case net_flow::OperationType::SSD:
+    {
+        auto metadata = std::dynamic_pointer_cast<net_flow::SSDOpMetadata>(op_metadata);
+        assert(nullptr != metadata);
+        auto op_expected = net_flow::SSDPostProcessOp::create(metadata);
+        CHECK_EXPECTED_AS_STATUS(op_expected);
+        op = op_expected.release();
+        break;
+    }
+    default:
+        break;
+    }
+    hailo_vstream_info_t output_vstream_info;
+    for (auto &current_output_vstream_info : vstreams_infos) {
+        if (current_output_vstream_info.name == op->outputs_metadata().begin()->first) {
+            output_vstream_info = current_output_vstream_info;
+        }
+    }
+    return add_nms_flow(async_pipeline, output_streams_names, expanded_output_format, op, output_vstream_info, named_stream_infos);
+}
+
+hailo_status PipelineBuilder::add_ops_flows(std::shared_ptr<AsyncPipeline> async_pipeline,
+    const std::pair<std::string, hailo_format_t> &output_format, net_flow::PostProcessOpMetadataPtr &op_metadata,
+    const std::vector<std::string> &output_streams_names, const std::vector<hailo_vstream_info_t> &vstreams_infos,
+    const std::unordered_map<std::string, hailo_stream_info_t> &named_stream_infos)
+{
+    const bool is_dma_able_hw_async = true;
+    for (const auto &stream_name : output_streams_names) {
+        auto source_index = async_pipeline->get_async_hw_element()->get_source_index_from_output_stream_name(stream_name);
+        CHECK_EXPECTED_AS_STATUS(source_index);
+        // async_hw element must be filled with buffers like an edge element in order to support large batch sizes
+        auto status = async_pipeline->get_async_hw_element()->fill_buffer_pool(is_dma_able_hw_async, async_pipeline->get_build_params().buffer_pool_size_edges, *source_index);
+        CHECK_SUCCESS(status);
+    }
+
+    switch (op_metadata->type()) {
+    case net_flow::OperationType::YOLOX:
+    case net_flow::OperationType::YOLOV8:
+    case net_flow::OperationType::SSD:
+    case net_flow::OperationType::YOLOV5:
+    case net_flow::OperationType::YOLOV5SEG:
+    case net_flow::OperationType::IOU:
+        return add_nms_flows(async_pipeline, output_streams_names, output_format, op_metadata, vstreams_infos, named_stream_infos);
+
+    case net_flow::OperationType::ARGMAX:
+        return add_argmax_flow(async_pipeline, output_streams_names, output_format, op_metadata, named_stream_infos);
+
+    case net_flow::OperationType::SOFTMAX:
+        return add_softmax_flow(async_pipeline, output_streams_names, output_format, op_metadata, named_stream_infos);
+
+    default:
+        LOGGER__ERROR("op type {} of op {} is not in any of the supported post process OP types", net_flow::OpMetadata::get_operation_type_str(op_metadata->type()), op_metadata->get_name());
+        return HAILO_INVALID_OPERATION;
+    }
+}
+
+hailo_status PipelineBuilder::create_post_async_hw_elements(std::shared_ptr<ConfiguredNetworkGroup> net_group,
+        const std::unordered_map<std::string, hailo_format_t> &expanded_outputs_formats, std::unordered_map<std::string, hailo_format_t> &original_outputs_formats,
+        const std::unordered_map<std::string, hailo_stream_info_t> &named_stream_infos, std::shared_ptr<AsyncPipeline> async_pipeline)
+{
+    // streams_added is a vector which holds all stream names which vstreams connected to them were already added (for demux cases)
+    std::vector<std::string> streams_added;
+
+    // Building DBs that connect output_vstreams, output_streams and ops.
+    // Note: Assuming each post process op has a unique output streams.
+    //       In other words, not possible for an output stream to be connected to more than one op
+    std::unordered_map<std::string, net_flow::PostProcessOpMetadataPtr> post_process_metadata;
+    std::unordered_map<stream_name_t, op_name_t> op_inputs_to_op_name;
+    for (auto &metadata : net_group->get_ops_metadata().release()) {
+        post_process_metadata.insert({metadata->get_name(), metadata});
+        for (auto &input_name : metadata->get_input_names()) {
+            op_inputs_to_op_name.insert({input_name, metadata->get_name()});
+        }
+    }
+
+    for (auto &output_format : expanded_outputs_formats) {
+        auto stream_names = net_group->get_stream_names_from_vstream_name(output_format.first);
+        CHECK_EXPECTED_AS_STATUS(stream_names);
+
+        if (contains(streams_added, *stream_names->begin())) {
+            continue;
+        }
+        for (auto &output_name : stream_names.value()) {
+            streams_added.push_back(output_name);
+        }
+
+        CHECK(contains(named_stream_infos, *stream_names->begin()), HAILO_INTERNAL_FAILURE);
+        const auto &first_stream_info = named_stream_infos.at(*stream_names->begin());
+
+        if (contains(op_inputs_to_op_name, *stream_names->begin())) {
+            auto &op_name = op_inputs_to_op_name.at(*stream_names->begin());
+            auto &op_metadata = post_process_metadata.at(op_name);
+
+            auto output_vstreams_infos = net_group->get_output_vstream_infos();
+            CHECK_EXPECTED_AS_STATUS(output_vstreams_infos);
+
+            std::pair<std::string, hailo_format_t> original_output_format = {output_format.first, original_outputs_formats.at(output_format.first)};
+
+            hailo_status status = add_ops_flows(async_pipeline, original_output_format,
+                op_metadata, stream_names.value(), output_vstreams_infos.value(), named_stream_infos);
+            CHECK_SUCCESS(status);
+
+        } else if ((HAILO_FORMAT_ORDER_HAILO_NMS == first_stream_info.format.order) &&
+            (first_stream_info.nms_info.is_defused)) {
+            // Case defuse NMS
+            hailo_status status = add_nms_fuse_flow(stream_names.value(), output_format, async_pipeline, named_stream_infos);
+            CHECK_SUCCESS(status);
+        } else if (first_stream_info.is_mux) {
+            // case demux in output from NN core (only one output stream is currently supported)
+            hailo_status status = add_output_demux_flow(*stream_names->begin(), async_pipeline, expanded_outputs_formats, net_group, named_stream_infos);
+            CHECK_SUCCESS(status);
+        } else {
+            // case simple and single output from NN core to user (and transformation at best)
+            auto final_elem_source_index = async_pipeline->get_async_hw_element()->get_source_index_from_output_stream_name(*stream_names->begin());
+            CHECK_EXPECTED_AS_STATUS(final_elem_source_index);
+
+            auto layer_info = net_group->get_layer_info(first_stream_info.name);
+            CHECK_EXPECTED_AS_STATUS(layer_info);
+            auto stream_quant_infos = layer_info.value()->quant_infos;
+
+            auto should_transform_expected = should_transform(first_stream_info, stream_quant_infos, output_format.second);
+            CHECK_EXPECTED_AS_STATUS(should_transform_expected);
+
+            if (should_transform_expected.value()) {
+                // async_hw element must be filled with buffers like an edge element in order to support large batch sizes
+                hailo_status status = async_pipeline->get_async_hw_element()->fill_buffer_pool(true, async_pipeline->get_build_params().buffer_pool_size_edges, *final_elem_source_index);
+                CHECK_SUCCESS(status);
+
+                auto post_infer_elem = add_post_infer_element(output_format.second, first_stream_info.nms_info, async_pipeline, first_stream_info.hw_shape,
+                    first_stream_info.format, first_stream_info.shape, stream_quant_infos, true, async_pipeline->get_async_hw_element(), final_elem_source_index.value());
+                CHECK_EXPECTED_AS_STATUS(post_infer_elem);
+
+                auto last_async_element = add_last_async_element(async_pipeline, output_format.first, post_infer_elem.value());
+                CHECK_EXPECTED_AS_STATUS(last_async_element);
+            } else {
+                auto last_async_element = add_last_async_element(async_pipeline, output_format.first, async_pipeline->get_async_hw_element(),
+                    final_elem_source_index.value());
+                CHECK_EXPECTED_AS_STATUS(last_async_element);
+            }
+        }
+    }
+    return HAILO_SUCCESS;
+}
+
+Expected<std::shared_ptr<AsyncPipeline>> PipelineBuilder::create_pipeline(std::shared_ptr<ConfiguredNetworkGroup> net_group,
+    const std::unordered_map<std::string, hailo_format_t> &inputs_formats,
+    const std::unordered_map<std::string, hailo_format_t> &outputs_formats,
+    const uint32_t timeout, std::shared_ptr<std::atomic<hailo_status>> pipeline_status)
+{
+    std::unordered_map<std::string, std::shared_ptr<PipelineElement>> entry_elements;
+    std::unordered_map<std::string, std::shared_ptr<PipelineElement>> last_elements;
+
+    ElementBuildParams build_params;
+
+    // Buffer pool sizes for pipeline elements should be:
+    // * The minimum of the maximum queue size of all LL streams (input and output) - for edge elements
+    // * HAILO_DEFAULT_ASYNC_INFER_QUEUE_SIZE - for internal elements
+    auto buffer_pool_size_expected = net_group->get_min_buffer_pool_size();
+    CHECK_EXPECTED(buffer_pool_size_expected);
+    build_params.buffer_pool_size_edges = buffer_pool_size_expected.release();
+    build_params.buffer_pool_size_internal = std::min(static_cast<uint32_t>(build_params.buffer_pool_size_edges), static_cast<uint32_t>(HAILO_DEFAULT_ASYNC_INFER_QUEUE_SIZE));
+    build_params.elem_stats_flags = HAILO_PIPELINE_ELEM_STATS_NONE;
+    build_params.vstream_stats_flags = HAILO_VSTREAM_STATS_NONE;
+
+    auto async_pipeline_expected = AsyncPipeline::create_shared();
+    CHECK_EXPECTED(async_pipeline_expected);
+    auto async_pipeline = async_pipeline_expected.release();
+
+    auto all_stream_infos = net_group->get_all_stream_infos();
+    CHECK_EXPECTED(all_stream_infos);
+
+    std::unordered_map<std::string, hailo_stream_info_t> named_stream_infos;
+    for (const auto &info : all_stream_infos.value()) {
+        named_stream_infos.emplace(info.name, info);
+    }
+
+    auto input_expanded_format = expand_auto_input_formats(net_group, inputs_formats, named_stream_infos);
+    CHECK_EXPECTED(input_expanded_format);
+
+    auto output_expanded_format = expand_auto_output_formats(net_group, outputs_formats, named_stream_infos);
+    CHECK_EXPECTED(output_expanded_format);
+
+    auto outputs_original_formats = outputs_formats;  // The original formats is needed for specific format expanding (required for PP OPs, like argmax)
+
+    auto shutdown_event_expected = Event::create_shared(Event::State::not_signalled);
+    CHECK_EXPECTED(shutdown_event_expected);
+
+    build_params.shutdown_event = shutdown_event_expected.release();
+    build_params.pipeline_status = pipeline_status;
+    build_params.timeout = std::chrono::milliseconds(timeout);
+
+    async_pipeline->set_build_params(build_params);
+
+    // all elements in async pipeline start as last elements, and in the end of this func all non-last-copy elements will be added buffers
+    bool is_last_copy_element = true;
+
+    auto async_hw_elem = AsyncHwElement::create(named_stream_infos, build_params.timeout,
+        build_params.buffer_pool_size_edges, build_params.elem_stats_flags, build_params.vstream_stats_flags,
+        build_params.shutdown_event, "AsyncHwElement", build_params.pipeline_status, net_group,
+        PipelineDirection::PUSH, is_last_copy_element, async_pipeline);
+    CHECK_EXPECTED(async_hw_elem);
+    async_pipeline->add_element_to_pipeline(async_hw_elem.value());
+    async_pipeline->set_async_hw_element(async_hw_elem.release());
+
+    hailo_status status = create_pre_async_hw_elements(net_group, input_expanded_format.value(), named_stream_infos,
+        async_pipeline);
+    CHECK_SUCCESS_AS_EXPECTED(status);
+
+    status = create_post_async_hw_elements(net_group, output_expanded_format.value(), outputs_original_formats, named_stream_infos,
+        async_pipeline);
+    CHECK_SUCCESS_AS_EXPECTED(status);
+
+    return async_pipeline;
+}
+
+Expected<std::shared_ptr<PixBufferElement>> PipelineBuilder::create_multi_plane_splitter_element(const std::string &input_name,
+    hailo_format_order_t order, std::shared_ptr<std::atomic<hailo_status>> pipeline_status, std::shared_ptr<AsyncPipeline> async_pipeline)
+{
+    CHECK_AS_EXPECTED((HAILO_FORMAT_ORDER_NV12 == order) || (HAILO_FORMAT_ORDER_NV21 == order) || (HAILO_FORMAT_ORDER_I420 == order),
+        HAILO_INVALID_ARGUMENT, "The given order ({}) is not a multi-planar order", HailoRTCommon::get_format_order_str(order));
+
+    // TODO: Support fps/latency collection for queue elems (HRT-7711)
+    auto duration_collector_expected = DurationCollector::create(HAILO_PIPELINE_ELEM_STATS_NONE);
+    CHECK_EXPECTED(duration_collector_expected);
+
+    auto planes_splitter = PixBufferElement::create(PipelineObject::create_element_name("PixBufferElement",
+        input_name, 0), std::chrono::milliseconds(HAILO_INFINITE), duration_collector_expected.release(), pipeline_status, order,
+        async_pipeline);
+    CHECK_EXPECTED(planes_splitter);
+
+    return planes_splitter.release();
+}
+
+} /* namespace hailort */
diff --git a/hailort/libhailort/src/net_flow/pipeline/pipeline_builder.hpp b/hailort/libhailort/src/net_flow/pipeline/pipeline_builder.hpp
new file mode 100644
index 0000000..1ddf71c
--- /dev/null
+++ b/hailort/libhailort/src/net_flow/pipeline/pipeline_builder.hpp
@@ -0,0 +1,101 @@
+/**
+ * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+**/
+/**
+ * @file pipeline_builder.hpp
+ * @brief Async Pipeline Builder
+ **/
+
+#ifndef _HAILO_PIPELINE_BUILDER_HPP_
+#define _HAILO_PIPELINE_BUILDER_HPP_
+
+#include "hailo/hailort.h"
+#include "network_group/network_group_internal.hpp"
+#include "net_flow/pipeline/vstream_internal.hpp"
+#include "net_flow/pipeline/async_infer_runner.hpp"
+#include "net_flow/ops/op.hpp"
+
+namespace hailort
+{
+
+
+class PipelineBuilder final
+{
+public:
+    PipelineBuilder() = delete;
+
+    static Expected<std::shared_ptr<AsyncPipeline>> create_pipeline(std::shared_ptr<ConfiguredNetworkGroup> net_group,
+        const std::unordered_map<std::string, hailo_format_t> &inputs_formats,
+        const std::unordered_map<std::string, hailo_format_t> &outputs_formats, const uint32_t timeout,
+        std::shared_ptr<std::atomic<hailo_status>> pipeline_status);
+
+    static Expected<std::unordered_map<std::string, hailo_format_t>> expand_auto_input_formats(std::shared_ptr<ConfiguredNetworkGroup> net_group,
+        const std::unordered_map<std::string, hailo_format_t> &inputs_formats, const std::unordered_map<std::string, hailo_stream_info_t> &named_stream_infos);
+    static Expected<std::unordered_map<std::string, hailo_format_t>> expand_auto_output_formats(std::shared_ptr<ConfiguredNetworkGroup> net_group,
+        const std::unordered_map<std::string, hailo_format_t> &outputs_formats, const std::unordered_map<std::string, hailo_stream_info_t> &named_stream_infos);
+    static Expected<std::pair<std::string, hailo_format_t>> get_output_format_from_edge_info_name(const std::string &edge_info_name,
+        const std::unordered_map<std::string, hailo_format_t> &outputs_formats);
+
+    static hailo_status create_pre_async_hw_elements(std::shared_ptr<ConfiguredNetworkGroup> net_group,
+        const std::unordered_map<std::string, hailo_format_t> &inputs_formats, const std::unordered_map<std::string, hailo_stream_info_t> &named_stream_infos,
+        std::shared_ptr<AsyncPipeline> async_pipeline);
+    static hailo_status create_pre_async_hw_elements_per_input(std::shared_ptr<ConfiguredNetworkGroup> net_group,
+        const std::vector<std::string> &stream_names, const std::unordered_map<std::string, hailo_format_t> &inputs_formats,
+        const std::unordered_map<std::string, hailo_stream_info_t> &named_stream_infos, std::shared_ptr<AsyncPipeline> async_pipeline);
+    static hailo_status create_post_async_hw_elements(std::shared_ptr<ConfiguredNetworkGroup> net_group,
+        const std::unordered_map<std::string, hailo_format_t> &expanded_outputs_formats, std::unordered_map<std::string, hailo_format_t> &original_outputs_formats,
+        const std::unordered_map<std::string, hailo_stream_info_t> &named_stream_infos, std::shared_ptr<AsyncPipeline> async_pipeline);
+
+    static hailo_status add_argmax_flow(std::shared_ptr<AsyncPipeline> async_pipeline, const std::vector<std::string> &output_streams_names,
+        const std::pair<std::string, hailo_format_t> &output_format, const net_flow::PostProcessOpMetadataPtr &argmax_op_metadata,
+        const std::unordered_map<std::string, hailo_stream_info_t> &named_stream_infos);
+    static hailo_status add_softmax_flow(std::shared_ptr<AsyncPipeline> async_pipeline, const std::vector<std::string> &output_streams_names,
+        const std::pair<std::string, hailo_format_t> &output_format, const net_flow::PostProcessOpMetadataPtr &softmax_op_metadata,
+        const std::unordered_map<std::string, hailo_stream_info_t> &named_stream_infos);
+    static hailo_status add_ops_flows(std::shared_ptr<AsyncPipeline> async_pipeline, const std::pair<std::string, hailo_format_t> &output_format,
+        net_flow::PostProcessOpMetadataPtr &op_metadata, const std::vector<std::string> &output_streams_names,
+        const std::vector<hailo_vstream_info_t> &vstreams_infos, const std::unordered_map<std::string, hailo_stream_info_t> &named_stream_infos);
+    static hailo_status add_output_demux_flow(const std::string &output_stream_name,
+        std::shared_ptr<AsyncPipeline> async_pipeline, const std::unordered_map<std::string, hailo_format_t> &outputs_formats,
+        std::shared_ptr<ConfiguredNetworkGroup> net_group, const std::unordered_map<std::string, hailo_stream_info_t> &named_stream_infos);
+    static hailo_status add_nms_fuse_flow(const std::vector<std::string> &output_streams_names, const std::pair<std::string, hailo_format_t> &output_format,
+        std::shared_ptr<AsyncPipeline> async_pipeline, const std::unordered_map<std::string, hailo_stream_info_t> &named_stream_infos);
+    static hailo_status add_nms_flow(std::shared_ptr<AsyncPipeline> async_pipeline, const std::vector<std::string> &output_streams_names,
+        const std::pair<std::string, hailo_format_t> &output_format, const std::shared_ptr<hailort::net_flow::Op> &nms_op,
+        const hailo_vstream_info_t &vstream_info, const std::unordered_map<std::string, hailo_stream_info_t> &named_stream_infos);
+    static hailo_status add_iou_flow(std::shared_ptr<AsyncPipeline> async_pipeline, const std::vector<std::string> &output_streams_names,
+        const std::pair<std::string, hailo_format_t> &output_format, const net_flow::PostProcessOpMetadataPtr &iou_op_metadata,
+        const std::unordered_map<std::string, hailo_stream_info_t> &named_stream_infos);
+    static hailo_status add_nms_flows(std::shared_ptr<AsyncPipeline> async_pipeline, const std::vector<std::string> &output_streams_names,
+        const std::pair<std::string, hailo_format_t> &output_format, const net_flow::PostProcessOpMetadataPtr &op_metadata,
+        const std::vector<hailo_vstream_info_t> &vstreams_infos, const std::unordered_map<std::string, hailo_stream_info_t> &named_stream_infos);
+
+
+    static Expected<std::shared_ptr<PostInferElement>> add_post_infer_element(const hailo_format_t &output_format, const hailo_nms_info_t &nms_info,
+        std::shared_ptr<AsyncPipeline> async_pipeline, const hailo_3d_image_shape_t &src_image_shape, const hailo_format_t &src_format,
+        const hailo_3d_image_shape_t &dst_image_shape, const std::vector<hailo_quant_info_t> &dst_quant_infos, bool is_last_copy_element,
+        std::shared_ptr<PipelineElement> final_elem, const uint32_t final_elem_source_index = 0);
+    static Expected<std::shared_ptr<LastAsyncElement>> add_last_async_element(std::shared_ptr<AsyncPipeline> async_pipeline,
+        const std::string &output_format_name, std::shared_ptr<PipelineElement> final_elem, const uint32_t final_elem_source_index = 0);
+    static Expected<std::shared_ptr<AsyncPushQueueElement>> add_push_queue_element(const std::string &queue_name, std::shared_ptr<AsyncPipeline> async_pipeline,
+        std::shared_ptr<PipelineElement> final_elem, const uint32_t final_elem_source_index = 0);
+    static Expected<std::shared_ptr<ConvertNmsToDetectionsElement>> add_nms_to_detections_convert_element(std::shared_ptr<AsyncPipeline> async_pipeline,
+        const std::string &output_stream_name, uint8_t stream_index, const std::string &element_name, const net_flow::PostProcessOpMetadataPtr &op_metadata,
+        const bool is_last_copy_element, std::shared_ptr<PipelineElement> final_elem, const uint32_t final_elem_source_index = 0);
+    static Expected<std::shared_ptr<RemoveOverlappingBboxesElement>> add_remove_overlapping_bboxes_element(std::shared_ptr<AsyncPipeline> async_pipeline,
+        const std::string &output_stream_name, uint8_t stream_index, const std::string &element_name, const net_flow::PostProcessOpMetadataPtr &op_metadata,
+        const bool is_last_copy_element, std::shared_ptr<PipelineElement> final_elem, const uint32_t final_elem_source_index = 0);
+    static Expected<std::shared_ptr<FillNmsFormatElement>> add_fill_nms_format_element(std::shared_ptr<AsyncPipeline> async_pipeline,
+        const std::string &output_stream_name, uint8_t stream_index, const std::string &element_name, const net_flow::PostProcessOpMetadataPtr &op_metadata,
+        const hailo_format_t &output_format, const bool is_last_copy_element, std::shared_ptr<PipelineElement> final_elem, const uint32_t final_elem_source_index = 0);
+    static Expected<std::shared_ptr<PixBufferElement>> create_multi_plane_splitter_element(const std::string &input_name,
+        hailo_format_order_t order, std::shared_ptr<std::atomic<hailo_status>> pipeline_status, std::shared_ptr<AsyncPipeline> async_pipeline);
+
+    static Expected<bool> should_transform(const hailo_stream_info_t &stream_info, const std::vector<hailo_quant_info_t> &stream_quant_infos, 
+        const hailo_format_t &output_format);
+};
+
+} /* namespace hailort */
+
+#endif /* _HAILO_PIPELINE_BUILDER_HPP_ */
diff --git a/hailort/libhailort/src/net_flow/pipeline/pipeline_internal.cpp b/hailort/libhailort/src/net_flow/pipeline/pipeline_internal.cpp
new file mode 100644
index 0000000..084ea3a
--- /dev/null
+++ b/hailort/libhailort/src/net_flow/pipeline/pipeline_internal.cpp
@@ -0,0 +1,1569 @@
+/**
+ * Copyright (c) 2020-2023 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+ **/
+/**
+ * @file pipeline_internal.cpp
+ * @brief Implemention of the pipeline elements
+ **/
+#include "net_flow/pipeline/pipeline_internal.hpp"
+#include "net_flow/pipeline/async_infer_runner.hpp"
+#include "common/os_utils.hpp"
+#include "common/runtime_statistics_internal.hpp"
+
+namespace hailort
+{
+
+PipelineElementInternal::PipelineElementInternal(const std::string &name, DurationCollector &&duration_collector,
+                                 std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
+                                 PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline) :
+    PipelineElement(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction),
+    m_async_pipeline(async_pipeline)
+{}
+
+void PipelineElementInternal::handle_non_recoverable_async_error(hailo_status error_status)
+{
+    hailo_status pipeline_status = m_pipeline_status->load();
+    if ((HAILO_SUCCESS == pipeline_status) && (error_status != HAILO_SHUTDOWN_EVENT_SIGNALED)) {
+        LOGGER__ERROR("Non-recoverable Async Infer Pipeline error. status error code: {}", error_status);
+        m_is_terminating_element = true;
+        if (auto async_pipeline = m_async_pipeline.lock()) {
+            async_pipeline->shutdown(error_status);
+        }
+    }
+}
+
+SourceElement::SourceElement(const std::string &name, DurationCollector &&duration_collector,
+                             std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
+                             PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline) :
+    PipelineElementInternal(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction, async_pipeline)
+{
+    m_sources.emplace_back(*this, name, PipelinePad::Type::SOURCE);
+}
+
+PipelinePad &SourceElement::source()
+{
+    return m_sources[0];
+}
+
+std::vector<PipelinePad*> SourceElement::execution_pads()
+{
+    std::vector<PipelinePad*> result{&source()};
+    return result;
+}
+
+SinkElement::SinkElement(const std::string &name, DurationCollector &&duration_collector,
+                         std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
+                         PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline) :
+    PipelineElementInternal(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction, async_pipeline)
+{
+    m_sinks.emplace_back(*this, name, PipelinePad::Type::SINK);
+}
+
+PipelinePad &SinkElement::sink()
+{
+    return m_sinks[0];
+}
+
+std::vector<PipelinePad*> SinkElement::execution_pads()
+{
+    std::vector<PipelinePad*> result{&sink()};
+    return result;
+}
+
+hailo_status SinkElement::execute_terminate(hailo_status /*error_status*/)
+{
+    return HAILO_SUCCESS;
+}
+
+hailo_status SinkElement::execute_dequeue_user_buffers(hailo_status /*error_status*/)
+{
+    return HAILO_SUCCESS;
+}
+
+hailo_status AsyncPushQueueElement::execute_dequeue_user_buffers(hailo_status error_status)
+{
+    auto status = m_queue.clear();
+    CHECK_SUCCESS(PipelineElement::execute_dequeue_user_buffers(error_status));
+    return status;
+}
+
+IntermediateElement::IntermediateElement(const std::string &name, DurationCollector &&duration_collector,
+                                         std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
+                                         PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline) :
+    PipelineElementInternal(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction, async_pipeline)
+{
+    m_sinks.emplace_back(*this, name, PipelinePad::Type::SINK);
+    m_sources.emplace_back(*this, name, PipelinePad::Type::SOURCE);
+}
+
+std::vector<PipelinePad*> IntermediateElement::execution_pads()
+{
+    std::vector<PipelinePad*> result{&next_pad()};
+    return result;
+}
+
+FilterElement::FilterElement(const std::string &name, DurationCollector &&duration_collector,
+                             std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
+                             PipelineDirection pipeline_direction, BufferPoolPtr buffer_pool,
+                             std::chrono::milliseconds timeout, std::shared_ptr<AsyncPipeline> async_pipeline) :
+    IntermediateElement(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction, async_pipeline),
+    m_pool(buffer_pool),
+    m_timeout(timeout)
+{}
+
+hailo_status FilterElement::run_push(PipelineBuffer &&buffer, const PipelinePad &/*sink*/)
+{
+    auto output = action(std::move(buffer), PipelineBuffer());
+    if (HAILO_SHUTDOWN_EVENT_SIGNALED == output.status()) {
+        return output.status();
+    }
+    CHECK_EXPECTED_AS_STATUS(output);
+
+    hailo_status status = next_pad().run_push(output.release());
+    if (HAILO_SHUTDOWN_EVENT_SIGNALED == status) {
+        LOGGER__INFO("run_push of {} was shutdown!", name());
+        return status;
+    }
+    if (HAILO_STREAM_ABORTED_BY_USER == status) {
+        LOGGER__INFO("run_push of {} was aborted!", name());
+        return status;
+    }
+    CHECK_SUCCESS(status);
+
+    return HAILO_SUCCESS;
+}
+
+void FilterElement::run_push_async(PipelineBuffer &&buffer, const PipelinePad &/*sink*/)
+{
+    assert(m_pipeline_direction == PipelineDirection::PUSH);
+    if (HAILO_SUCCESS != buffer.action_status()) {
+        auto buffer_from_pool = m_pool->get_available_buffer(PipelineBuffer(), m_timeout);
+        if (HAILO_SUCCESS != buffer_from_pool.status()) {
+            handle_non_recoverable_async_error(buffer_from_pool.status());
+        } else {
+            buffer_from_pool->set_action_status(buffer.action_status());
+
+            auto exec_done_cb = buffer.get_exec_done_cb();
+            exec_done_cb(buffer.action_status());
+
+            next_pad().run_push_async(buffer_from_pool.release());
+        }
+        return;
+    }
+
+    auto output = action(std::move(buffer), PipelineBuffer());
+    if (HAILO_SUCCESS == output.status()) {
+        next_pad().run_push_async(output.release());
+    } else {
+        next_pad().run_push_async(PipelineBuffer(output.status()));
+    }
+    return;
+}
+
+Expected<PipelineBuffer> FilterElement::run_pull(PipelineBuffer &&optional, const PipelinePad &/*source*/)
+{
+    auto buffer = next_pad().run_pull();
+    if (HAILO_SHUTDOWN_EVENT_SIGNALED == buffer.status()) {
+        LOGGER__INFO("run_pull in FilterElement was shutdown!");
+        return make_unexpected(buffer.status());
+    }
+    CHECK_EXPECTED(buffer);
+    return action(buffer.release(), std::move(optional));
+}
+
+std::vector<AccumulatorPtr> FilterElement::get_queue_size_accumulators()
+{
+    if (nullptr == m_pool || nullptr == m_pool->get_queue_size_accumulator()) {
+        return std::vector<AccumulatorPtr>();
+    }
+    return {m_pool->get_queue_size_accumulator()};
+}
+
+hailo_status FilterElement::enqueue_execution_buffer(MemoryView mem_view, const TransferDoneCallbackAsyncInfer &exec_done, const std::string &source_name)
+{
+    (void)source_name;
+
+    auto status = m_pool->enqueue_buffer(mem_view, exec_done);
+    CHECK_SUCCESS(status);
+
+    return HAILO_SUCCESS;
+}
+
+hailo_status FilterElement::execute_dequeue_user_buffers(hailo_status error_status)
+{
+    auto status = empty_buffer_pool(m_pool, error_status, m_timeout);
+    CHECK_SUCCESS(status);
+    return PipelineElement::execute_dequeue_user_buffers(error_status);
+}
+
+Expected<bool> FilterElement::can_push_buffer_upstream(const uint32_t /*source_index*/)
+{
+    return !m_pool->is_full();
+}
+
+hailo_status FilterElement::fill_buffer_pool(bool is_dma_able, size_t num_of_buffers, const uint32_t /*source_index*/)
+{
+    auto status = m_pool->allocate_buffers(is_dma_able, num_of_buffers);
+    CHECK_SUCCESS(status);
+
+    return HAILO_SUCCESS;
+}
+
+Expected<bool> FilterElement::can_push_buffer_upstream(const std::string &source_name)
+{
+    auto source_index = get_source_index_from_source_name(source_name);
+    CHECK_EXPECTED(source_index);
+    return can_push_buffer_upstream(*source_index);
+}
+
+hailo_status FilterElement::fill_buffer_pool(bool is_dma_able, size_t num_of_buffers, const std::string &source_name)
+{
+    auto source_index = get_source_index_from_source_name(source_name);
+    CHECK_EXPECTED_AS_STATUS(source_index);
+    return fill_buffer_pool(is_dma_able, num_of_buffers, *source_index);
+}
+
+Expected<SpscQueue<PipelineBuffer>> BaseQueueElement::create_queue(size_t queue_size, EventPtr shutdown_event)
+{
+    auto queue = SpscQueue<PipelineBuffer>::create(queue_size, shutdown_event);
+    CHECK_EXPECTED(queue);
+
+    return queue.release();
+}
+
+BaseQueueElement::BaseQueueElement(SpscQueue<PipelineBuffer> &&queue, EventPtr shutdown_event, const std::string &name,
+                                   std::chrono::milliseconds timeout, DurationCollector &&duration_collector,
+                                   AccumulatorPtr &&queue_size_accumulator, std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
+                                   Event &&activation_event, Event &&deactivation_event, PipelineDirection pipeline_direction,
+                                   std::shared_ptr<AsyncPipeline> async_pipeline) :
+    IntermediateElement(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction, async_pipeline),
+    m_queue(std::move(queue)),
+    m_shutdown_event(shutdown_event),
+    m_timeout(timeout),
+    m_is_thread_running(true),
+    m_activation_event(std::move(activation_event)),
+    m_deactivation_event(std::move(deactivation_event)),
+    m_queue_size_accumulator(std::move(queue_size_accumulator)),
+    m_is_run_in_thread_running(false)
+{}
+
+BaseQueueElement::~BaseQueueElement()
+{
+    LOGGER__INFO("Queue element {} has {} frames in his Queue on destruction", name(), m_queue.size_approx());
+}
+
+void BaseQueueElement::start_thread()
+{
+    m_thread = std::thread([this] () {
+        OsUtils::set_current_thread_name(thread_name());
+        while (m_is_thread_running.load()) {
+            auto status = m_activation_event.wait(INIFINITE_TIMEOUT());
+
+            if (!m_is_thread_running) {
+                LOGGER__INFO("Thread in element {} is not running anymore, exiting..", this->name());
+                break;
+            }
+            if (HAILO_SUCCESS == status) {
+                {
+                    std::unique_lock<std::mutex> lock(m_mutex);
+                    m_is_run_in_thread_running = true;
+                }
+                m_cv.notify_all();
+
+                status = run_in_thread();
+
+                {
+                    std::unique_lock<std::mutex> lock(m_mutex);
+                    m_is_run_in_thread_running = false;
+                }
+                m_cv.notify_all();
+            }
+
+            if (HAILO_SUCCESS != status) {
+                if (HAILO_SHUTDOWN_EVENT_SIGNALED != status) {
+                    // We do not want to log error for HAILO_STREAM_ABORTED_BY_USER
+                    if (HAILO_STREAM_ABORTED_BY_USER != status) {
+                        LOGGER__ERROR("Queue element {} run in thread function failed! status = {}", this->name(), status);
+                    }
+
+                    // Store the real error in pipeline_status
+                    m_pipeline_status->store(status);
+
+                    // Signal other threads to stop
+                    hailo_status shutdown_status = m_shutdown_event->signal();
+                    if (HAILO_SUCCESS != shutdown_status) {
+                        LOGGER__CRITICAL("Failed shutting down queue with status {}", shutdown_status);
+                    }
+                }
+                //Thread has done its execution. Mark to the thread to wait for activation again
+                hailo_status event_status = m_activation_event.reset();
+                if (HAILO_SUCCESS != event_status) {
+                    LOGGER__CRITICAL("Failed reset activation event of element {}, with status {}", this->name(), event_status);
+                }
+
+                // Mark to deactivation function that the thread is done
+                event_status = m_deactivation_event.signal();
+                if (HAILO_SUCCESS != event_status) {
+                    LOGGER__CRITICAL("Failed signaling deactivation event of element {}, with status {}", this->name(), event_status);
+                }
+            }
+        }
+    });
+}
+
+void BaseQueueElement::stop_thread()
+{
+    m_shutdown_event->signal();
+
+    // Mark thread as not running, then wake it in case it is waiting on m_activation_event
+    m_is_thread_running = false;
+    m_activation_event.signal();
+
+    if (m_thread.joinable()) {
+        m_thread.join();
+    }
+}
+
+std::vector<AccumulatorPtr> BaseQueueElement::get_queue_size_accumulators()
+{
+    if (nullptr == m_queue_size_accumulator) {
+        return std::vector<AccumulatorPtr>();
+    }
+    return {m_queue_size_accumulator};
+}
+
+hailo_status BaseQueueElement::execute_activate()
+{
+    hailo_status status = PipelineElementInternal::execute_activate();
+    CHECK_SUCCESS(status);
+
+    status = m_activation_event.signal();
+    CHECK_SUCCESS(status);
+
+    return HAILO_SUCCESS;
+}
+
+hailo_status BaseQueueElement::execute_post_deactivate(bool should_clear_abort)
+{
+    hailo_status status = m_deactivation_event.wait(INIFINITE_TIMEOUT());
+    if (HAILO_SUCCESS != status) {
+        LOGGER__ERROR("Failed to post_deactivate() in {} with status {}", name(), status);
+    }
+
+    status = m_deactivation_event.reset();
+    if (HAILO_SUCCESS != status) {
+        LOGGER__ERROR("Failed to reset of deactivation event in {} with status {}", name(), status);
+    }
+
+    return PipelineElementInternal::execute_post_deactivate(should_clear_abort);
+}
+
+hailo_status BaseQueueElement::execute_clear()
+{
+    auto status = PipelineElementInternal::execute_clear();
+    if (HAILO_SUCCESS != status) {
+        LOGGER__ERROR("Failed to clear() in {} with status {}", name(), status);
+    }
+
+    auto queue_status = m_queue.clear();
+    CHECK_SUCCESS(queue_status, "Failed to clear() queue in {} with status {}", name(), status);
+
+    return status;
+}
+
+hailo_status BaseQueueElement::execute_wait_for_finish()
+{
+    std::unique_lock<std::mutex> lock(m_mutex);
+    m_cv.wait(lock, [this] () {
+        return !m_is_run_in_thread_running;
+    });
+    return HAILO_SUCCESS;
+}
+
+hailo_status BaseQueueElement::enqueue_execution_buffer(MemoryView mem_view, const TransferDoneCallbackAsyncInfer &exec_done, const std::string &source_name)
+{
+    (void)source_name;
+    return m_sinks[0].prev()->element().enqueue_execution_buffer(mem_view, exec_done, m_sinks[0].prev()->name());
+}
+
+Expected<bool> BaseQueueElement::can_push_buffer_upstream(const uint32_t source_index)
+{
+    return m_sinks[0].prev()->element().can_push_buffer_upstream(source_index);
+}
+
+Expected<bool> BaseQueueElement::can_push_buffer_downstream(const uint32_t /*source_index*/)
+{
+    return !m_queue.is_queue_full();
+}
+
+hailo_status BaseQueueElement::fill_buffer_pool(bool is_dma_able, size_t num_of_buffers, const uint32_t source_index)
+{
+    return m_sinks[0].prev()->element().fill_buffer_pool(is_dma_able, num_of_buffers, source_index);
+}
+
+Expected<bool> BaseQueueElement::can_push_buffer_upstream(const std::string &source_name)
+{
+    return m_sinks[0].prev()->element().can_push_buffer_upstream(source_name);
+}
+
+Expected<bool> BaseQueueElement::can_push_buffer_downstream(const std::string &/*source_name*/)
+{
+    return !m_queue.is_queue_full();
+}
+
+hailo_status BaseQueueElement::fill_buffer_pool(bool is_dma_able, size_t num_of_buffers, const std::string &source_name)
+{
+    return m_sinks[0].prev()->element().fill_buffer_pool(is_dma_able, num_of_buffers, source_name);
+}
+
+hailo_status PushQueueElement::execute_abort()
+{
+    auto status = m_shutdown_event->reset();
+    CHECK_SUCCESS(status);
+
+    m_pipeline_status->store(HAILO_STREAM_ABORTED_BY_USER);
+
+    status = PipelineElementInternal::execute_abort();
+    CHECK_SUCCESS(status);
+
+    status = m_activation_event.signal();
+    CHECK_SUCCESS(status);
+
+    return HAILO_SUCCESS;
+}
+
+hailo_status BaseQueueElement::execute_clear_abort()
+{
+    auto status = m_shutdown_event->reset();
+    CHECK_SUCCESS(status);
+
+    m_pipeline_status->store(HAILO_SUCCESS);
+    return PipelineElementInternal::execute_clear_abort();
+}
+
+hailo_status BaseQueueElement::set_timeout(std::chrono::milliseconds timeout)
+{
+    m_timeout = timeout;
+    return HAILO_SUCCESS;
+}
+
+std::string BaseQueueElement::description() const
+{
+    std::stringstream element_description;
+
+    element_description << "(" << this->name();
+    if (HAILO_INFINITE != this->m_timeout.count()) {
+        element_description << " | timeout: "  << std::chrono::duration_cast<std::chrono::seconds>(this->m_timeout).count() << "s";
+    }
+    element_description << ")";
+
+    return element_description.str();
+}
+
+hailo_status BaseQueueElement::pipeline_status()
+{
+    auto status = m_pipeline_status->load();
+
+    // We treat HAILO_STREAM_ABORTED_BY_USER as success because it is caused by user action (aborting streams)
+    if (HAILO_STREAM_ABORTED_BY_USER == status) {
+        return HAILO_SUCCESS;
+    }
+    return status;
+}
+
+Expected<std::shared_ptr<PushQueueElement>> PushQueueElement::create(const std::string &name, std::chrono::milliseconds timeout,
+        size_t queue_size, hailo_pipeline_elem_stats_flags_t flags, EventPtr shutdown_event,
+        std::shared_ptr<std::atomic<hailo_status>> pipeline_status, PipelineDirection pipeline_direction,
+        std::shared_ptr<AsyncPipeline> async_pipeline)
+{
+    auto queue = BaseQueueElement::create_queue(queue_size, shutdown_event);
+    CHECK_EXPECTED(queue);
+
+    auto activation_event = Event::create(Event::State::not_signalled);
+    CHECK_EXPECTED(activation_event);
+
+    auto deactivation_event = Event::create(Event::State::not_signalled);
+    CHECK_EXPECTED(deactivation_event);
+
+    // TODO: Support fps/latency collection for queue elems (HRT-7711)
+    auto duration_collector = DurationCollector::create(HAILO_PIPELINE_ELEM_STATS_NONE);
+    CHECK_EXPECTED(duration_collector);
+
+    AccumulatorPtr queue_size_accumulator = nullptr;
+    if ((flags & HAILO_PIPELINE_ELEM_STATS_MEASURE_QUEUE_SIZE) != 0) {
+        queue_size_accumulator = make_shared_nothrow<FullAccumulator<double>>("queue_size");
+        CHECK_AS_EXPECTED(nullptr != queue_size_accumulator, HAILO_OUT_OF_HOST_MEMORY);
+    }
+
+    auto queue_ptr = make_shared_nothrow<PushQueueElement>(queue.release(), shutdown_event, name, timeout,
+        duration_collector.release(), std::move(queue_size_accumulator), std::move(pipeline_status),
+        activation_event.release(), deactivation_event.release(), pipeline_direction, async_pipeline);
+    CHECK_AS_EXPECTED(nullptr != queue_ptr, HAILO_OUT_OF_HOST_MEMORY, "Creating PushQueueElement {} failed!", name);
+
+    LOGGER__INFO("Created {}", queue_ptr->name());
+
+    return queue_ptr;
+}
+
+Expected<std::shared_ptr<PushQueueElement>> PushQueueElement::create(const std::string &name, const hailo_vstream_params_t &vstream_params,
+        EventPtr shutdown_event, std::shared_ptr<std::atomic<hailo_status>> pipeline_status, PipelineDirection pipeline_direction,
+        std::shared_ptr<AsyncPipeline> async_pipeline)
+{
+    return PushQueueElement::create(name, std::chrono::milliseconds(vstream_params.timeout_ms), vstream_params.queue_size,
+    vstream_params.pipeline_elements_stats_flags, shutdown_event, pipeline_status, pipeline_direction, async_pipeline);
+}
+
+PushQueueElement::PushQueueElement(SpscQueue<PipelineBuffer> &&queue, EventPtr shutdown_event, const std::string &name,
+                                   std::chrono::milliseconds timeout, DurationCollector &&duration_collector, 
+                                   AccumulatorPtr &&queue_size_accumulator, std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
+                                   Event &&activation_event, Event &&deactivation_event, PipelineDirection pipeline_direction,
+                                   std::shared_ptr<AsyncPipeline> async_pipeline, bool should_start_thread) :
+    BaseQueueElement(std::move(queue), shutdown_event, name, timeout, std::move(duration_collector), std::move(queue_size_accumulator),
+                     std::move(pipeline_status), std::move(activation_event), std::move(deactivation_event), pipeline_direction, async_pipeline)
+{
+    if (should_start_thread) {
+        start_thread();
+    }
+}
+
+PushQueueElement::~PushQueueElement()
+{
+    stop_thread();
+}
+
+hailo_status PushQueueElement::run_push(PipelineBuffer &&buffer, const PipelinePad &/*sink*/)
+{
+    // TODO: Support fps/latency collection for queue elems (HRT-7711)
+    if (nullptr != m_queue_size_accumulator) {
+        m_queue_size_accumulator->add_data_point(static_cast<double>(m_queue.size_approx()));
+    }
+    auto status = m_pipeline_status->load();
+    if (HAILO_STREAM_ABORTED_BY_USER == status) {
+        LOGGER__INFO("run_push of {} was aborted!", name());
+        return status;
+    }
+    CHECK_SUCCESS(m_pipeline_status->load());
+    status = m_queue.enqueue(std::move(buffer), m_timeout);
+    if (HAILO_SHUTDOWN_EVENT_SIGNALED == status) {
+        auto queue_thread_status = pipeline_status();
+        CHECK_SUCCESS(queue_thread_status,
+            "Shutdown event was signaled in enqueue of queue element {} because thread has failed with status={}!", name(),
+            queue_thread_status);
+        LOGGER__INFO("Shutdown event was signaled in enqueue of queue element {}!", name());
+        return HAILO_SHUTDOWN_EVENT_SIGNALED;
+    }
+    CHECK_SUCCESS(status);
+    return HAILO_SUCCESS;
+}
+
+void PushQueueElement::run_push_async(PipelineBuffer &&/*buffer*/, const PipelinePad &/*sink*/) {
+    LOGGER__ERROR("run_push_async is not supported for {}", name());
+    assert(false);
+}
+
+Expected<PipelineBuffer> PushQueueElement::run_pull(PipelineBuffer &&/*optional*/, const PipelinePad &/*source*/)
+{
+    return make_unexpected(HAILO_INVALID_OPERATION);
+}
+
+hailo_status PushQueueElement::execute_deactivate()
+{
+    // Mark to the threads that deactivate() was called.
+    hailo_status status = m_queue.enqueue(PipelineBuffer(PipelineBuffer::Type::DEACTIVATE));
+    if (HAILO_SUCCESS != status) {
+        // We want to deactivate source even if enqueue failed
+        auto deactivation_status = PipelineElementInternal::execute_deactivate();
+        CHECK_SUCCESS(deactivation_status);
+        if ((HAILO_STREAM_ABORTED_BY_USER == status) || (HAILO_SHUTDOWN_EVENT_SIGNALED == status)) {
+            LOGGER__INFO("enqueue() in element {} was aborted, got status = {}", name(), status);
+        }
+        else {
+             LOGGER__ERROR("enqueue() in element {} failed, got status = {}", name(), status);
+             return status;
+        }
+    }
+
+    return HAILO_SUCCESS;
+}
+
+PipelinePad &PushQueueElement::next_pad()
+{
+    // Note: The next elem to be run is downstream from this elem (i.e. buffers are pushed)
+    return *m_sources[0].next();
+}
+
+hailo_status PushQueueElement::run_in_thread()
+{
+    auto buffer = m_queue.dequeue(INIFINITE_TIMEOUT());
+    if (HAILO_SHUTDOWN_EVENT_SIGNALED == buffer.status()) {
+        LOGGER__INFO("Shutdown event was signaled in dequeue of queue element {}!", name());
+        return HAILO_SHUTDOWN_EVENT_SIGNALED;
+    }
+    CHECK_EXPECTED_AS_STATUS(buffer);
+
+    // Return if deactivated
+    if (PipelineBuffer::Type::DEACTIVATE == buffer->get_type()) {
+        hailo_status status = m_shutdown_event->signal();
+        CHECK_SUCCESS(status);
+
+        status = next_pad().deactivate();
+        if (HAILO_SUCCESS != status) {
+            LOGGER__ERROR("Deactivate of source in {} has failed with status {}", name(), status);
+        }
+
+        return HAILO_SHUTDOWN_EVENT_SIGNALED;
+    }
+
+    hailo_status status = next_pad().run_push(buffer.release());
+    if (HAILO_STREAM_ABORTED_BY_USER == status) {
+        LOGGER__INFO("run_push of {} was aborted!", name());
+        return status;
+    }
+    CHECK_SUCCESS(status);
+
+    return HAILO_SUCCESS;
+}
+
+Expected<std::shared_ptr<AsyncPushQueueElement>> AsyncPushQueueElement::create(const std::string &name, std::chrono::milliseconds timeout,
+        size_t queue_size, hailo_pipeline_elem_stats_flags_t flags, EventPtr shutdown_event, std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
+        std::shared_ptr<AsyncPipeline> async_pipeline, PipelineDirection pipeline_direction)
+{
+    auto queue = BaseQueueElement::create_queue(queue_size, shutdown_event);
+    CHECK_EXPECTED(queue);
+
+    auto activation_event = Event::create(Event::State::not_signalled);
+    CHECK_EXPECTED(activation_event);
+
+    auto deactivation_event = Event::create(Event::State::not_signalled);
+    CHECK_EXPECTED(deactivation_event);
+
+    // TODO: Support fps/latency collection for queue elems (HRT-7711)
+    auto duration_collector = DurationCollector::create(HAILO_PIPELINE_ELEM_STATS_NONE);
+    CHECK_EXPECTED(duration_collector);
+
+    AccumulatorPtr queue_size_accumulator = nullptr;
+    if ((flags & HAILO_PIPELINE_ELEM_STATS_MEASURE_QUEUE_SIZE) != 0) {
+        queue_size_accumulator = make_shared_nothrow<FullAccumulator<double>>("queue_size");
+        CHECK_AS_EXPECTED(nullptr != queue_size_accumulator, HAILO_OUT_OF_HOST_MEMORY);
+    }
+
+    auto queue_ptr = make_shared_nothrow<AsyncPushQueueElement>(queue.release(), shutdown_event, name, timeout,
+        duration_collector.release(), std::move(queue_size_accumulator), std::move(pipeline_status),
+        activation_event.release(), deactivation_event.release(), pipeline_direction, async_pipeline);
+    CHECK_AS_EXPECTED(nullptr != queue_ptr, HAILO_OUT_OF_HOST_MEMORY, "Creating PushQueueElement {} failed!", name);
+
+    LOGGER__INFO("Created {}", queue_ptr->name());
+
+    return queue_ptr;
+}
+
+Expected<std::shared_ptr<AsyncPushQueueElement>> AsyncPushQueueElement::create(const std::string &name, const ElementBuildParams &build_params,
+    std::shared_ptr<AsyncPipeline> async_pipeline, PipelineDirection pipeline_direction)
+{
+    return AsyncPushQueueElement::create(name, build_params.timeout, build_params.buffer_pool_size_edges,
+            build_params.elem_stats_flags, build_params.shutdown_event, build_params.pipeline_status, async_pipeline, pipeline_direction);
+}
+
+AsyncPushQueueElement::AsyncPushQueueElement(SpscQueue<PipelineBuffer> &&queue, EventPtr shutdown_event, const std::string &name,
+                                   std::chrono::milliseconds timeout, DurationCollector &&duration_collector, 
+                                   AccumulatorPtr &&queue_size_accumulator, std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
+                                   Event &&activation_event, Event &&deactivation_event, PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline) :
+    PushQueueElement(std::move(queue), shutdown_event, name, timeout, std::move(duration_collector), std::move(queue_size_accumulator),
+                     std::move(pipeline_status), std::move(activation_event), std::move(deactivation_event), pipeline_direction, async_pipeline, false)
+{
+    start_thread();
+}
+
+void AsyncPushQueueElement::run_push_async(PipelineBuffer &&buffer, const PipelinePad &/*sink*/)
+{
+    // TODO: Support fps/latency collection for queue elems (HRT-7711)
+    if (nullptr != m_queue_size_accumulator) {
+        m_queue_size_accumulator->add_data_point(static_cast<double>(m_queue.size_approx()));
+    }
+
+    auto status = m_queue.enqueue(std::move(buffer), m_timeout);
+    if (HAILO_SUCCESS != status && HAILO_SHUTDOWN_EVENT_SIGNALED != status) {
+        handle_non_recoverable_async_error(status);
+        stop_thread();
+    }
+}
+
+void AsyncPushQueueElement::start_thread()
+{
+    m_thread = std::thread([this] () {
+        OsUtils::set_current_thread_name(thread_name());
+        while (m_is_thread_running.load()) {
+            auto status = m_pipeline_status->load();
+            if (HAILO_SUCCESS != status) {
+                LOGGER__INFO("Thread in element {} is not running anymore, exiting..", name());
+                m_is_thread_running = false;
+                break;
+            }
+
+            status = run_in_thread();
+            if (HAILO_SUCCESS != status) {
+                handle_non_recoverable_async_error(status);
+                m_is_thread_running = false;
+                break;
+            }
+        }
+    });
+}
+
+hailo_status AsyncPushQueueElement::run_push(PipelineBuffer &&/*buffer*/, const PipelinePad &/*sink*/)
+{
+    return HAILO_INVALID_OPERATION;
+}
+
+hailo_status AsyncPushQueueElement::run_in_thread()
+{
+    auto buffer = m_queue.dequeue(INIFINITE_TIMEOUT());
+    auto buffer_status = buffer.status();
+    switch (buffer_status) {
+    case HAILO_SHUTDOWN_EVENT_SIGNALED:
+        break;
+
+    case HAILO_SUCCESS:
+        // Return if deactivated
+        if (PipelineBuffer::Type::DEACTIVATE == buffer->get_type()) {
+            hailo_status status = m_shutdown_event->signal();
+            CHECK_SUCCESS(status);
+
+            status = next_pad().deactivate();
+            if (HAILO_SUCCESS != status) {
+                LOGGER__ERROR("Deactivate of source in {} has failed with status {}", name(), status);
+            }
+
+            return HAILO_SHUTDOWN_EVENT_SIGNALED;
+        }
+
+        next_pad().run_push_async(buffer.release());
+        break;
+
+    default:
+        next_pad().run_push_async(PipelineBuffer(buffer_status));
+    }
+
+    return buffer_status;
+}
+
+hailo_status AsyncPushQueueElement::execute_deactivate()
+{
+    // Mark to the threads that deactivate() was called.
+    hailo_status status = m_queue.enqueue(PipelineBuffer(PipelineBuffer::Type::DEACTIVATE));
+    if (HAILO_SUCCESS != status) {
+        // We want to deactivate source even if enqueue failed
+        auto deactivation_status = PipelineElementInternal::execute_deactivate();
+        CHECK_SUCCESS(deactivation_status);
+        if ((HAILO_STREAM_ABORTED_BY_USER == status) || (HAILO_SHUTDOWN_EVENT_SIGNALED == status)) {
+            LOGGER__INFO("enqueue() in element {} was aborted, got status = {}", name(), status);
+        } else {
+             LOGGER__ERROR("enqueue() in element {} failed, got status = {}", name(), status);
+             return status;
+        }
+    }
+
+    return HAILO_SUCCESS;
+}
+
+hailo_status AsyncPushQueueElement::execute_post_deactivate(bool should_clear_abort)
+{
+    // We marked thread to stop with PipelineBuffer::Type::DEACTIVATE, now we wait for it to finish
+    stop_thread();
+    return PipelineElementInternal::execute_post_deactivate(should_clear_abort);
+}
+
+hailo_status AsyncPushQueueElement::execute_terminate(hailo_status error_status)
+{
+    if (m_is_terminated) {
+        return HAILO_SUCCESS;
+    }
+
+    auto terminate_status = PipelineElement::execute_terminate(error_status);
+
+    if ((!next_pad().element().is_terminating_element())) {
+        stop_thread();
+    }
+
+    CHECK_SUCCESS(terminate_status);
+
+    return HAILO_SUCCESS;
+}
+
+Expected<std::shared_ptr<PullQueueElement>> PullQueueElement::create(const std::string &name, std::chrono::milliseconds timeout,
+        size_t queue_size, hailo_pipeline_elem_stats_flags_t flags, EventPtr shutdown_event,
+        std::shared_ptr<std::atomic<hailo_status>> pipeline_status, PipelineDirection pipeline_direction)
+{
+    auto queue = BaseQueueElement::create_queue(queue_size, shutdown_event);
+    CHECK_EXPECTED(queue);
+
+    auto activation_event = Event::create(Event::State::not_signalled);
+    CHECK_EXPECTED(activation_event);
+
+    auto deactivation_event = Event::create(Event::State::not_signalled);
+    CHECK_EXPECTED(deactivation_event);
+
+    // TODO: Support fps/latency collection for queue elems (HRT-7711)
+    auto duration_collector = DurationCollector::create(HAILO_PIPELINE_ELEM_STATS_NONE);
+    CHECK_EXPECTED(duration_collector);
+
+    AccumulatorPtr queue_size_accumulator = nullptr;
+    if ((flags & HAILO_PIPELINE_ELEM_STATS_MEASURE_QUEUE_SIZE) != 0) {
+        queue_size_accumulator = make_shared_nothrow<FullAccumulator<double>>("queue_size");
+        CHECK_AS_EXPECTED(nullptr != queue_size_accumulator, HAILO_OUT_OF_HOST_MEMORY);
+    }
+
+    auto queue_ptr = make_shared_nothrow<PullQueueElement>(queue.release(), shutdown_event, name, timeout,
+        duration_collector.release(), std::move(queue_size_accumulator), std::move(pipeline_status),
+        activation_event.release(), deactivation_event.release(), pipeline_direction);
+    CHECK_AS_EXPECTED(nullptr != queue_ptr, HAILO_OUT_OF_HOST_MEMORY, "Creating PullQueueElement {} failed!", name);
+
+    LOGGER__INFO("Created {}", queue_ptr->name());
+
+    return queue_ptr;
+}
+Expected<std::shared_ptr<PullQueueElement>> PullQueueElement::create(const std::string &name, const hailo_vstream_params_t &vstream_params,
+        EventPtr shutdown_event, std::shared_ptr<std::atomic<hailo_status>> pipeline_status, PipelineDirection pipeline_direction)
+{
+    return PullQueueElement::create(name, std::chrono::milliseconds(vstream_params.timeout_ms),
+        vstream_params.queue_size, vstream_params.pipeline_elements_stats_flags, shutdown_event, pipeline_status, pipeline_direction);
+}
+
+PullQueueElement::PullQueueElement(SpscQueue<PipelineBuffer> &&queue, EventPtr shutdown_event, const std::string &name,
+                                   std::chrono::milliseconds timeout, DurationCollector &&duration_collector,
+                                   AccumulatorPtr &&queue_size_accumulator, std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
+                                   Event &&activation_event, Event &&deactivation_event, PipelineDirection pipeline_direction) :
+    BaseQueueElement(std::move(queue), shutdown_event, name, timeout, std::move(duration_collector), std::move(queue_size_accumulator),
+                     std::move(pipeline_status), std::move(activation_event), std::move(deactivation_event), pipeline_direction, nullptr)
+{
+    start_thread();
+}
+
+PullQueueElement::~PullQueueElement()
+{
+    stop_thread();
+}
+
+hailo_status PullQueueElement::run_push(PipelineBuffer &&/*buffer*/, const PipelinePad &/*sink*/)
+{
+    return HAILO_INVALID_OPERATION;
+}
+
+void PullQueueElement::run_push_async(PipelineBuffer &&/*buffer*/, const PipelinePad &/*sink*/)
+{
+    LOGGER__ERROR("run_push_async is not supported for {}", name());
+    assert(false);
+}
+
+Expected<PipelineBuffer> PullQueueElement::run_pull(PipelineBuffer &&optional, const PipelinePad &/*sink*/)
+{
+    // TODO: Support fps/latency collection for queue elems (HRT-7711)
+    CHECK_AS_EXPECTED(!optional, HAILO_INVALID_ARGUMENT, "Optional buffer is not allowed in queue element!");
+
+    if (nullptr != m_queue_size_accumulator) {
+        m_queue_size_accumulator->add_data_point(static_cast<double>(m_queue.size_approx()));
+    }
+    auto output = m_queue.dequeue(m_timeout);
+    if (HAILO_SHUTDOWN_EVENT_SIGNALED == output.status()) {
+        auto queue_thread_status = pipeline_status();
+        CHECK_SUCCESS_AS_EXPECTED(queue_thread_status,
+            "Shutdown event was signaled in dequeue of queue element {} because thread has failed with status={}!", name(),
+            queue_thread_status);
+        LOGGER__INFO("Shutdown event was signaled in dequeue of queue element {}!", name());
+        return make_unexpected(HAILO_SHUTDOWN_EVENT_SIGNALED);
+    }
+    CHECK_EXPECTED(output);
+
+    return output;
+}
+
+hailo_status PullQueueElement::execute_deactivate()
+{
+    hailo_status status = PipelineElementInternal::execute_deactivate();
+    auto shutdown_event_status = m_shutdown_event->signal();
+    CHECK_SUCCESS(status);
+    CHECK_SUCCESS(shutdown_event_status);
+
+    return HAILO_SUCCESS;
+}
+
+PipelinePad &PullQueueElement::next_pad()
+{
+    // Note: The next elem to be run is upstream from this elem (i.e. buffers are pulled)
+    return *m_sinks[0].prev();
+}
+
+hailo_status PullQueueElement::run_in_thread()
+{
+    auto buffer = next_pad().run_pull();
+    if (HAILO_SHUTDOWN_EVENT_SIGNALED == buffer.status()) {
+        LOGGER__INFO("Shutdown event was signaled in run_pull of queue element {}!", name());
+        return HAILO_SHUTDOWN_EVENT_SIGNALED;
+    }
+    if (HAILO_STREAM_ABORTED_BY_USER == buffer.status()) {
+        LOGGER__INFO("run_pull of queue element {} was aborted!", name());
+        return HAILO_STREAM_ABORTED_BY_USER;
+    }
+    if (HAILO_NETWORK_GROUP_NOT_ACTIVATED == buffer.status()) {
+        LOGGER__INFO("run_pull of queue element {} was called before network_group is activated!", name());
+        return HAILO_NETWORK_GROUP_NOT_ACTIVATED;
+    }
+    CHECK_EXPECTED_AS_STATUS(buffer);
+    
+    hailo_status status = m_queue.enqueue(buffer.release(), INIFINITE_TIMEOUT());
+    if (HAILO_SHUTDOWN_EVENT_SIGNALED == status) {
+        LOGGER__INFO("Shutdown event was signaled in enqueue of queue element {}!", name());
+        return HAILO_SHUTDOWN_EVENT_SIGNALED;
+    }
+    CHECK_SUCCESS(status);
+
+    return HAILO_SUCCESS;
+}
+
+Expected<std::shared_ptr<UserBufferQueueElement>> UserBufferQueueElement::create(const std::string &name, std::chrono::milliseconds timeout,
+    hailo_pipeline_elem_stats_flags_t flags, EventPtr shutdown_event, std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
+    PipelineDirection pipeline_direction)
+{
+    auto pending_buffer_queue = BaseQueueElement::create_queue(1, shutdown_event);
+    CHECK_EXPECTED(pending_buffer_queue);
+
+    auto full_buffer_queue = BaseQueueElement::create_queue(1, shutdown_event);
+    CHECK_EXPECTED(full_buffer_queue);
+
+    auto activation_event = Event::create(Event::State::not_signalled);
+    CHECK_EXPECTED(activation_event);
+
+    auto deactivation_event = Event::create(Event::State::not_signalled);
+    CHECK_EXPECTED(deactivation_event);
+
+    // TODO: Support fps/latency collection for queue elems (HRT-7711)
+    auto duration_collector = DurationCollector::create(HAILO_PIPELINE_ELEM_STATS_NONE);
+    CHECK_EXPECTED(duration_collector);
+
+    AccumulatorPtr queue_size_accumulator = nullptr;
+    if ((flags & HAILO_PIPELINE_ELEM_STATS_MEASURE_QUEUE_SIZE) != 0) {
+        queue_size_accumulator = make_shared_nothrow<FullAccumulator<double>>("queue_size");
+        CHECK_AS_EXPECTED(nullptr != queue_size_accumulator, HAILO_OUT_OF_HOST_MEMORY);
+    }
+
+    auto queue_ptr = make_shared_nothrow<UserBufferQueueElement>(pending_buffer_queue.release(),
+        full_buffer_queue.release(), shutdown_event, name, timeout, duration_collector.release(),
+        std::move(queue_size_accumulator), std::move(pipeline_status), activation_event.release(),
+        deactivation_event.release(), pipeline_direction);
+    CHECK_AS_EXPECTED(nullptr != queue_ptr, HAILO_OUT_OF_HOST_MEMORY, "Creating UserBufferQueueElement {} failed!", name);
+
+    LOGGER__INFO("Created {}", queue_ptr->name());
+
+    return queue_ptr;
+}
+
+Expected<std::shared_ptr<UserBufferQueueElement>> UserBufferQueueElement::create(const std::string &name, const hailo_vstream_params_t &vstream_params,
+        EventPtr shutdown_event, std::shared_ptr<std::atomic<hailo_status>> pipeline_status, PipelineDirection pipeline_direction)
+{
+    return UserBufferQueueElement::create(name, std::chrono::milliseconds(vstream_params.timeout_ms),
+        vstream_params.pipeline_elements_stats_flags, shutdown_event, pipeline_status, pipeline_direction);
+}
+
+UserBufferQueueElement::UserBufferQueueElement(SpscQueue<PipelineBuffer> &&queue, SpscQueue<PipelineBuffer> &&full_buffer_queue,
+                                               EventPtr shutdown_event, const std::string &name, std::chrono::milliseconds timeout,
+                                               DurationCollector &&duration_collector, AccumulatorPtr &&queue_size_accumulator,
+                                               std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
+                                               Event &&activation_event, Event &&deactivation_event,
+                                               PipelineDirection pipeline_direction) :
+    PullQueueElement(std::move(queue), shutdown_event, name, timeout, std::move(duration_collector),
+                     std::move(queue_size_accumulator), std::move(pipeline_status), std::move(activation_event),
+                     std::move(deactivation_event),
+                     pipeline_direction),
+    m_full_buffer_queue(std::move(full_buffer_queue))
+{}
+
+Expected<PipelineBuffer> UserBufferQueueElement::run_pull(PipelineBuffer &&optional, const PipelinePad &/*source*/)
+{
+    // TODO: Support fps/latency collection for queue elems (HRT-7711)
+    CHECK_AS_EXPECTED(optional, HAILO_INVALID_ARGUMENT, "Optional buffer must be valid in {}!", name());
+
+    hailo_status status = m_queue.enqueue(std::move(optional), m_timeout);
+    if (HAILO_SHUTDOWN_EVENT_SIGNALED == status) {
+        LOGGER__INFO("Shutdown event was signaled in enqueue of queue element {}!", name());
+        return make_unexpected(HAILO_SHUTDOWN_EVENT_SIGNALED);
+    }
+    CHECK_SUCCESS_AS_EXPECTED(status);
+
+    if (nullptr != m_queue_size_accumulator) {
+        m_queue_size_accumulator->add_data_point(static_cast<double>(m_full_buffer_queue.size_approx()));
+    }
+    auto output = m_full_buffer_queue.dequeue(m_timeout);
+    if (HAILO_SHUTDOWN_EVENT_SIGNALED == output.status()) {
+        LOGGER__INFO("Shutdown event was signaled in dequeue of queue element {}!", name());
+        return make_unexpected(HAILO_SHUTDOWN_EVENT_SIGNALED);
+    }
+    CHECK_AS_EXPECTED(HAILO_TIMEOUT != output.status(), HAILO_TIMEOUT, "{} (D2H) failed with status={} (timeout={}ms)",
+        name(), HAILO_TIMEOUT, m_timeout.count());
+    CHECK_EXPECTED(output);
+
+    CHECK_AS_EXPECTED(output->data() == optional.data(), HAILO_INTERNAL_FAILURE, "The buffer received in {} was not the same as the user buffer!", name());
+    return output;
+}
+
+hailo_status UserBufferQueueElement::execute_clear()
+{
+    auto status = PipelineElementInternal::execute_clear();
+    if (HAILO_SUCCESS != status) {
+        LOGGER__ERROR("Failed to clear() in {} with status {}", name(), status);
+    }
+
+    auto queue_clear_status = m_full_buffer_queue.clear();
+    if (HAILO_SUCCESS != queue_clear_status) {
+        LOGGER__ERROR("Failed to clear() in {} with status {}", name(), queue_clear_status);
+        status = queue_clear_status;
+    }
+
+    queue_clear_status = m_queue.clear();
+    if (HAILO_SUCCESS != queue_clear_status) {
+        LOGGER__ERROR("Failed to clear() in {} with status {}", name(), queue_clear_status);
+        status = queue_clear_status;
+    }
+
+    return status;
+}
+
+hailo_status UserBufferQueueElement::run_in_thread()
+{
+    auto optional = m_queue.dequeue(INIFINITE_TIMEOUT());
+    if (HAILO_SHUTDOWN_EVENT_SIGNALED == optional.status()) {
+        LOGGER__INFO("Shutdown event was signaled in dequeue of {}!", name());
+        return HAILO_SHUTDOWN_EVENT_SIGNALED;
+    }
+    CHECK_EXPECTED_AS_STATUS(optional);
+
+    auto buffer = next_pad().run_pull(optional.release());
+    if (HAILO_SHUTDOWN_EVENT_SIGNALED == buffer.status()) {
+        LOGGER__INFO("Shutdown event was signaled in run_pull of {}!", name());
+        return HAILO_SHUTDOWN_EVENT_SIGNALED;
+    }
+    if (HAILO_STREAM_ABORTED_BY_USER == buffer.status()) {
+        LOGGER__INFO("run_pull of {} was aborted!", name());
+        return HAILO_STREAM_ABORTED_BY_USER;
+    }
+    CHECK_EXPECTED_AS_STATUS(buffer);
+    
+    hailo_status status = m_full_buffer_queue.enqueue(buffer.release(), INIFINITE_TIMEOUT());
+    if (HAILO_SHUTDOWN_EVENT_SIGNALED == status) {
+        LOGGER__INFO("Shutdown event was signaled in enqueue of {}!", name());
+        return HAILO_SHUTDOWN_EVENT_SIGNALED;
+    }
+    CHECK_SUCCESS(status);
+
+    return HAILO_SUCCESS;
+}
+
+BaseMuxElement::BaseMuxElement(size_t sink_count, const std::string &name, std::chrono::milliseconds timeout,
+                               DurationCollector &&duration_collector, std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
+                               BufferPoolPtr buffer_pool, PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline) :
+    PipelineElementInternal(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction, async_pipeline),
+    m_timeout(timeout),
+    m_pool(buffer_pool)
+{
+    m_sources.emplace_back(*this, name, PipelinePad::Type::SOURCE);
+    m_sinks.reserve(sink_count);
+    m_sink_has_arrived.reserve(sink_count);
+    for (uint32_t i = 0; i < sink_count; ++i) {
+        m_sinks.emplace_back(*this, name, PipelinePad::Type::SINK);
+        m_index_of_sink[m_sinks[i].name()] = i;
+        m_sink_has_arrived[m_sinks[i].name()] = false;
+    }
+}
+
+std::vector<PipelinePad*> BaseMuxElement::execution_pads()
+{
+    if (m_next_pads.size() == 0) {
+        if (PipelineDirection::PUSH == m_pipeline_direction) {
+            m_next_pads.reserve(m_sources.size());
+            for (auto &source : m_sources ) {
+                m_next_pads.push_back(source.next());
+            }
+        } else {
+            m_next_pads.reserve(m_sinks.size());
+            for (auto &sink : m_sinks ) {
+                m_next_pads.push_back(sink.prev());
+            }
+        }
+    }
+    return m_next_pads;
+}
+
+hailo_status BaseMuxElement::execute_terminate(hailo_status error_status)
+{
+    if (m_is_terminated) {
+        return HAILO_SUCCESS;
+    }
+
+    auto terminate_status = PipelineElement::execute_terminate(error_status);
+
+    if (!m_is_terminating_element) {
+        {
+            // There is a case where the other thread is halted (via context switch) before the wait_for() function,
+            // then we call notify_all() here, and then the wait_for() is called - resulting in a timeout.
+            // notify_all() only works on threads which are already waiting, so that's why we acquire the lock here.
+            std::unique_lock<std::mutex> lock(m_mutex);
+        }
+        m_cv.notify_all();
+    }
+
+    CHECK_SUCCESS(terminate_status);
+
+    return HAILO_SUCCESS;
+}
+
+
+hailo_status BaseMuxElement::run_push(PipelineBuffer &&/*buffer*/, const PipelinePad &/*sink*/)
+{
+    return HAILO_INVALID_OPERATION;
+}
+
+void BaseMuxElement::run_push_async(PipelineBuffer &&buffer, const PipelinePad &sink)
+{
+    assert(PipelineDirection::PUSH == m_pipeline_direction);
+    assert(m_next_pads.size() == 1);
+
+    std::unique_lock<std::mutex> lock(m_mutex);
+
+    m_sink_has_arrived[sink.name()] = true;
+    m_input_buffers[sink.name()] = std::move(buffer);
+    if (has_all_sinks_arrived()) {
+        hailo_status all_buffers_status = HAILO_SUCCESS;
+        for (auto &input_buffer : m_input_buffers) {
+            if (HAILO_SUCCESS != input_buffer.second.action_status()) {
+                all_buffers_status = input_buffer.second.action_status();
+                break;  // error from one buffer is enough
+            }
+        }
+
+        if (HAILO_SUCCESS != all_buffers_status) {
+            auto acquired_buffer = m_pool->get_available_buffer(PipelineBuffer(), m_timeout);
+            if (HAILO_SUCCESS == acquired_buffer.status()) {
+                acquired_buffer->set_action_status(all_buffers_status);
+
+                auto exec_done_cb = m_input_buffers[sink.name()].get_exec_done_cb();
+                exec_done_cb(m_input_buffers[sink.name()].action_status());
+
+                m_next_pads[0]->run_push_async(acquired_buffer.release());
+            } else {
+                handle_non_recoverable_async_error(acquired_buffer.status());
+            }
+        } else {
+            std::vector<PipelineBuffer> input_buffers;
+            input_buffers.resize(m_input_buffers.size());
+            for (auto &input_buffer : m_input_buffers) {
+                input_buffers[m_index_of_sink[input_buffer.first]] = std::move(input_buffer.second);
+            }
+
+            auto output = action(std::move(input_buffers), PipelineBuffer());
+            if (HAILO_SUCCESS == output.status()) {
+                m_next_pads[0]->run_push_async(output.release());
+            } else {
+                m_next_pads[0]->run_push_async(PipelineBuffer(output.status()));
+            }
+        }
+
+        for (const auto &curr_sink : m_sinks) {
+            m_sink_has_arrived[curr_sink.name()] = false;
+        }
+        m_input_buffers.clear();
+
+        // Manual unlocking is done before notifying, to avoid waking up the waiting thread only to block again
+        lock.unlock();
+        m_cv.notify_all();
+    } else {
+        auto done = m_cv.wait_for(lock, m_timeout, [&](){
+            if (m_pipeline_status->load() != HAILO_SUCCESS) {
+                return true; // so we can exit this flow
+            }
+            return !m_sink_has_arrived[sink.name()];
+        });
+
+        if (!done) {
+            LOGGER__ERROR("Waiting for other threads in AsyncHwElement {} has reached a timeout (timeout={}ms)", name(), m_timeout.count());
+            handle_non_recoverable_async_error(HAILO_TIMEOUT);
+        }
+
+        if (m_pipeline_status->load() == HAILO_STREAM_ABORTED_BY_USER) {
+            lock.unlock();
+            m_cv.notify_all();
+        }
+    }
+}
+
+bool BaseMuxElement::has_all_sinks_arrived()
+{
+    for (const auto &current_sink : m_sink_has_arrived) {
+        if (!current_sink.second) {
+            return false;
+        }
+    }
+    return true;
+}
+Expected<PipelineBuffer> BaseMuxElement::run_pull(PipelineBuffer &&optional, const PipelinePad &/*source*/)
+{
+    CHECK_AS_EXPECTED(m_pipeline_direction == PipelineDirection::PULL, HAILO_INVALID_OPERATION,
+        "PostInferElement {} does not support run_pull operation", name());
+    std::vector<PipelineBuffer> inputs;
+    inputs.reserve(m_sinks.size());
+    for (auto &sink : m_sinks) {
+        auto buffer = sink.prev()->run_pull();
+        if (HAILO_SHUTDOWN_EVENT_SIGNALED == buffer.status()) {
+            return make_unexpected(buffer.status());
+        }
+        CHECK_EXPECTED(buffer);
+
+        inputs.push_back(buffer.release());
+    }
+
+    auto output = action(std::move(inputs), std::move(optional));
+    CHECK_EXPECTED(output);
+
+    return output;
+}
+
+hailo_status BaseMuxElement::enqueue_execution_buffer(MemoryView mem_view, const TransferDoneCallbackAsyncInfer &exec_done, const std::string &source_name)
+{
+    (void)source_name;
+    auto status = m_pool->enqueue_buffer(mem_view, exec_done);
+    CHECK_SUCCESS(status);
+
+    return HAILO_SUCCESS;
+}
+
+hailo_status BaseMuxElement::execute_dequeue_user_buffers(hailo_status error_status)
+{
+    auto status = empty_buffer_pool(m_pool, error_status, m_timeout);
+    CHECK_SUCCESS(status);
+    return PipelineElement::execute_dequeue_user_buffers(error_status);
+}
+
+Expected<bool> BaseMuxElement::can_push_buffer_upstream(const uint32_t /*source_index*/)
+{
+    return !m_pool->is_full();
+}
+
+hailo_status BaseMuxElement::fill_buffer_pool(bool is_dma_able, size_t num_of_buffers, const uint32_t /*source_index*/)
+{
+    auto status = m_pool->allocate_buffers(is_dma_able, num_of_buffers);
+    CHECK_SUCCESS(status);
+
+    return HAILO_SUCCESS;
+}
+
+Expected<bool> BaseMuxElement::can_push_buffer_upstream(const std::string &source_name)
+{
+    auto source_index = get_source_index_from_source_name(source_name);
+    CHECK_EXPECTED(source_index);
+    return can_push_buffer_upstream(*source_index);
+}
+
+hailo_status BaseMuxElement::fill_buffer_pool(bool is_dma_able, size_t num_of_buffers, const std::string &source_name)
+{
+    auto source_index = get_source_index_from_source_name(source_name);
+    CHECK_EXPECTED_AS_STATUS(source_index);
+    return fill_buffer_pool(is_dma_able, num_of_buffers, *source_index);
+}
+
+BaseDemuxElement::BaseDemuxElement(size_t source_count, const std::string &name, std::chrono::milliseconds timeout,
+        DurationCollector &&duration_collector, std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
+        std::vector<BufferPoolPtr> pools, PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline) :
+    PipelineElementInternal(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction, async_pipeline),
+    m_timeout(timeout),
+    m_pools(pools),
+    m_is_activated(false),
+    m_was_stream_aborted(false),
+    m_source_name_to_index(),
+    m_was_source_called(source_count, false),
+    m_buffers_for_action()
+{
+    m_sinks.emplace_back(*this, name, PipelinePad::Type::SINK);
+    m_sources.reserve(source_count);
+    for (uint32_t i = 0; i < source_count; i++) {
+        m_sources.emplace_back(*this, name, PipelinePad::Type::SOURCE);
+        m_source_name_to_index[m_sources[i].name()] = i;
+    }
+}
+
+hailo_status BaseDemuxElement::run_push(PipelineBuffer &&buffer, const PipelinePad &/*sink*/)
+{
+    CHECK(PipelineDirection::PUSH == m_pipeline_direction, HAILO_INVALID_OPERATION,
+        "BaseDemuxElement {} does not support run_push operation", name());
+
+    auto outputs = action(std::move(buffer));
+    if (HAILO_SHUTDOWN_EVENT_SIGNALED == outputs.status()) {
+        return outputs.status();
+    }
+    CHECK_EXPECTED_AS_STATUS(outputs);
+
+    for (const auto &pad : execution_pads()) {
+        assert(m_source_name_to_index.count(pad->prev()->name()) > 0);
+        auto source_index = m_source_name_to_index[pad->prev()->name()];
+        auto status = pad->run_push(std::move(outputs.value()[source_index]));
+
+        if (HAILO_SHUTDOWN_EVENT_SIGNALED == status) {
+            LOGGER__INFO("run_push of {} was shutdown!", name());
+            return status;
+        }
+        if (HAILO_STREAM_ABORTED_BY_USER == status) {
+            LOGGER__INFO("run_push of {} was aborted!", name());
+            return status;
+        }
+        CHECK_SUCCESS(status);
+    }
+
+    return HAILO_SUCCESS;
+}
+
+void BaseDemuxElement::run_push_async(PipelineBuffer &&buffer, const PipelinePad &/*sink*/)
+{
+    assert(PipelineDirection::PUSH == m_pipeline_direction);
+    if (HAILO_SUCCESS != buffer.action_status()) {
+        for (const auto &pad : execution_pads()) {
+            auto source_index = m_source_name_to_index[pad->prev()->name()];
+            auto acquired_buffer = m_pools[source_index]->acquire_buffer(m_timeout);
+            if (HAILO_SUCCESS == acquired_buffer.status()) {
+                acquired_buffer->set_action_status(buffer.action_status());
+
+                auto exec_done_cb = buffer.get_exec_done_cb();
+                exec_done_cb(buffer.action_status());
+
+                pad->run_push_async(acquired_buffer.release());
+            } else {
+                handle_non_recoverable_async_error(acquired_buffer.status());
+            }
+        }
+        return;
+    }
+
+    auto outputs = action(std::move(buffer));
+
+    for (const auto &pad : execution_pads()) {
+        assert(m_source_name_to_index.count(pad->prev()->name()) > 0);
+        auto source_index = m_source_name_to_index[pad->prev()->name()];
+        if (HAILO_SUCCESS == outputs.status()) {
+            pad->run_push_async(std::move(outputs.value()[source_index]));
+        } else {
+            pad->run_push_async(PipelineBuffer(outputs.status()));
+        }
+    }
+}
+
+Expected<PipelineBuffer> BaseDemuxElement::run_pull(PipelineBuffer &&optional, const PipelinePad &source)
+{
+    CHECK_AS_EXPECTED(m_pipeline_direction == PipelineDirection::PULL, HAILO_INVALID_OPERATION,
+        "BaseDemuxElement {} does not support run_pull operation", name());
+
+    CHECK_AS_EXPECTED(!optional, HAILO_INVALID_ARGUMENT, "Optional buffer is not allowed in demux element!");
+
+    std::unique_lock<std::mutex> lock(m_mutex);
+    if (!m_is_activated) {
+        return make_unexpected(HAILO_SHUTDOWN_EVENT_SIGNALED);
+    }
+
+    if (m_was_stream_aborted) {
+        return make_unexpected(HAILO_STREAM_ABORTED_BY_USER);
+    }
+
+    m_was_source_called[m_source_name_to_index[source.name()]] = true;
+
+    if (were_all_srcs_arrived()) {
+        // If all srcs arrived, execute the demux
+        auto input = execution_pads()[0]->run_pull();
+        if (HAILO_STREAM_ABORTED_BY_USER == input.status()) {
+            LOGGER__INFO("run_pull of demux element was aborted!");
+            m_was_stream_aborted = true;
+            lock.unlock();
+            m_cv.notify_all();
+            return make_unexpected(input.status());
+        }
+        if (HAILO_SHUTDOWN_EVENT_SIGNALED == input.status()) {
+            return make_unexpected(input.status());
+        }
+        CHECK_EXPECTED(input);
+
+        auto outputs = action(input.release());
+        if (HAILO_SHUTDOWN_EVENT_SIGNALED == outputs.status()) {
+            return make_unexpected(outputs.status());
+        }
+        CHECK_EXPECTED(outputs);
+
+        m_buffers_for_action = outputs.release();
+
+        for (uint32_t i = 0; i < m_was_source_called.size(); i++) {
+            m_was_source_called[i] = false;
+        }
+
+        // Manual unlocking is done before notifying, to avoid waking up the waiting thread only to block again
+        lock.unlock();
+        m_cv.notify_all();
+    } else {
+        // If not all srcs arrived, wait until m_was_source_called is false (set to false after the demux execution)
+        auto wait_successful = m_cv.wait_for(lock, m_timeout, [&](){
+            return !m_was_source_called[m_source_name_to_index[source.name()]] || m_was_stream_aborted || !m_is_activated;
+        });
+        CHECK_AS_EXPECTED(wait_successful, HAILO_TIMEOUT, "Waiting for other threads in demux {} has reached a timeout (timeout={}ms)", name(), m_timeout.count());
+
+        if (m_was_stream_aborted) {
+            lock.unlock();
+            m_cv.notify_all();
+            return make_unexpected(HAILO_STREAM_ABORTED_BY_USER);
+        }
+
+        // We check if the element is not activated in case notify_all() was called from deactivate()
+        if (!m_is_activated) {
+            lock.unlock();
+            m_cv.notify_all();
+            return make_unexpected(HAILO_SHUTDOWN_EVENT_SIGNALED);
+        }
+    }
+
+    assert(m_source_name_to_index[source.name()] < m_buffers_for_action.size());
+    return std::move(m_buffers_for_action[m_source_name_to_index[source.name()]]);
+}
+
+bool BaseDemuxElement::were_all_srcs_arrived()
+{
+    return std::all_of(m_was_source_called.begin(), m_was_source_called.end(), [](bool v) { return v; });
+}
+
+hailo_status BaseDemuxElement::execute_activate()
+{
+    if (m_is_activated) {
+        return HAILO_SUCCESS;
+    }
+    m_is_activated = true;// TODO Should this always be true, no matter the status of source().activate()?
+    m_was_stream_aborted = false;
+
+    return PipelineElementInternal::execute_activate();
+}
+
+hailo_status BaseDemuxElement::execute_deactivate()
+{
+    if (!m_is_activated) {
+        return HAILO_SUCCESS;
+    }
+    m_is_activated = false;
+
+    // deactivate should be called before mutex acquire and notify_all because it is possible that all queues are waiting on
+    // the run_pull of the source (HwRead) and the mutex is already acquired so this would prevent a timeout error
+    hailo_status status = PipelineElementInternal::execute_deactivate();
+
+    {
+        // There is a case where the other thread is halted (via context switch) before the wait_for() function,
+        // then we call notify_all() here, and then the wait_for() is called - resulting in a timeout.
+        // notify_all() only works on threads which are already waiting, so that's why we acquire the lock here.
+        std::unique_lock<std::mutex> lock(m_mutex);
+    }
+    m_cv.notify_all();
+
+    CHECK_SUCCESS(status);
+
+    return HAILO_SUCCESS;
+}
+
+hailo_status BaseDemuxElement::execute_post_deactivate(bool should_clear_abort)
+{
+    for (uint32_t i = 0; i < m_was_source_called.size(); i++) {
+        m_was_source_called[i] = false;
+    }
+    return PipelineElementInternal::execute_post_deactivate(should_clear_abort);
+}
+
+hailo_status BaseDemuxElement::execute_abort()
+{
+    auto status = PipelineElementInternal::execute_abort();
+    CHECK_SUCCESS(status);
+    {
+        // There is a case where the other thread is halted (via context switch) before the wait_for() function,
+        // then we call notify_all() here, and then the wait_for() is called - resulting in a timeout.
+        // notify_all() only works on threads which are already waiting, so that's why we acquire the lock here.
+        std::unique_lock<std::mutex> lock(m_mutex);
+    }
+    m_cv.notify_all();
+
+    return HAILO_SUCCESS;
+}
+
+hailo_status BaseDemuxElement::set_timeout(std::chrono::milliseconds timeout)
+{
+    m_timeout = timeout;
+    return HAILO_SUCCESS;
+}
+
+hailo_status BaseDemuxElement::enqueue_execution_buffer(MemoryView mem_view, const TransferDoneCallbackAsyncInfer &exec_done, const std::string &source_name)
+{
+    auto pool_id = m_source_name_to_index.at(source_name);
+    auto status = m_pools[pool_id]->enqueue_buffer(mem_view, exec_done);
+    CHECK_SUCCESS(status);
+
+    return HAILO_SUCCESS;
+}
+
+hailo_status BaseDemuxElement::execute_dequeue_user_buffers(hailo_status error_status)
+{
+    for (auto &pool : m_pools) {
+        auto status = empty_buffer_pool(pool, error_status, m_timeout);
+        CHECK_SUCCESS(status);
+    }
+    return PipelineElement::execute_dequeue_user_buffers(error_status);;
+}
+
+Expected<bool> BaseDemuxElement::can_push_buffer_upstream(const uint32_t source_index)
+{
+    CHECK_AS_EXPECTED(source_index < m_pools.size(), HAILO_INTERNAL_FAILURE);
+    return !m_pools[source_index]->is_full();
+}
+
+hailo_status BaseDemuxElement::fill_buffer_pool(bool is_dma_able, size_t num_of_buffers, const uint32_t source_index)
+{
+    CHECK(source_index < m_pools.size(), HAILO_INTERNAL_FAILURE);
+    CHECK_SUCCESS(m_pools[source_index]->allocate_buffers(is_dma_able, num_of_buffers));
+    return HAILO_SUCCESS;
+}
+
+Expected<bool> BaseDemuxElement::can_push_buffer_upstream(const std::string &source_name)
+{
+    auto source_index = get_source_index_from_source_name(source_name);
+    CHECK_EXPECTED(source_index);
+    return can_push_buffer_upstream(*source_index);
+}
+
+hailo_status BaseDemuxElement::fill_buffer_pool(bool is_dma_able, size_t num_of_buffers, const std::string &source_name)
+{
+    auto source_index = get_source_index_from_source_name(source_name);
+    CHECK_EXPECTED_AS_STATUS(source_index);
+    return fill_buffer_pool(is_dma_able, num_of_buffers, *source_index);
+}
+
+Expected<uint32_t> BaseDemuxElement::get_source_index_from_source_name(const std::string &source_name)
+{
+    CHECK_AS_EXPECTED(contains(m_source_name_to_index, source_name), HAILO_NOT_FOUND);
+    auto ret_val = m_source_name_to_index.at(source_name);
+    return ret_val;
+}
+
+std::vector<PipelinePad*> BaseDemuxElement::execution_pads()
+{
+    if (m_next_pads.size() == 0)
+    {
+        if (PipelineDirection::PUSH == m_pipeline_direction) {
+            m_next_pads.reserve(m_sources.size());
+            for (auto &source : m_sources ) {
+                m_next_pads.push_back(source.next());
+            }
+        } else {
+            m_next_pads.reserve(m_sinks.size());
+            for (auto &sink : m_sinks ) {
+                m_next_pads.push_back(sink.prev());
+            }
+        }
+    }
+    return m_next_pads;
+}
+
+} /* namespace hailort */
diff --git a/hailort/libhailort/src/net_flow/pipeline/pipeline_internal.hpp b/hailort/libhailort/src/net_flow/pipeline/pipeline_internal.hpp
new file mode 100644
index 0000000..c699d74
--- /dev/null
+++ b/hailort/libhailort/src/net_flow/pipeline/pipeline_internal.hpp
@@ -0,0 +1,374 @@
+/**
+ * Copyright (c) 2020-2023 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+ **/
+/**
+ * @file pipeline_internal.hpp
+ * @brief Hailo Infer Pipeline elements
+ **/
+
+#ifndef _HAILO_PIPELINE_ELEMENTS_HPP_
+#define _HAILO_PIPELINE_ELEMENTS_HPP_
+
+#include "net_flow/pipeline/pipeline.hpp"
+
+namespace hailort
+{
+
+class AsyncPipeline; // Forward declaration
+
+struct ElementBuildParams
+{
+    std::shared_ptr<std::atomic<hailo_status>> pipeline_status;
+    std::chrono::milliseconds timeout;
+    EventPtr shutdown_event;
+    size_t buffer_pool_size_internal;
+    size_t buffer_pool_size_edges;
+    hailo_pipeline_elem_stats_flags_t elem_stats_flags;
+    hailo_vstream_stats_flags_t vstream_stats_flags;
+};
+
+class PipelineElementInternal : public PipelineElement
+{
+public:
+    PipelineElementInternal(const std::string &name, DurationCollector &&duration_collector,
+                    std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
+                    PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline = nullptr);
+    PipelineElementInternal &operator=(PipelineElementInternal &&other) = delete;
+
+protected:
+    void handle_non_recoverable_async_error(hailo_status error_status);
+    std::weak_ptr<AsyncPipeline> m_async_pipeline;
+
+    friend class PipelinePad;
+};
+
+
+// An element with one source pad only (generates data)
+class SourceElement : public PipelineElementInternal
+{
+public:
+    SourceElement(const std::string &name, DurationCollector &&duration_collector,
+                  std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
+                  PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline);
+    PipelinePad &source();
+
+protected:
+    virtual std::vector<PipelinePad*> execution_pads() override;
+};
+
+// An element with one sink pad only (consumes data)
+class SinkElement : public PipelineElementInternal
+{
+public:
+    SinkElement(const std::string &name, DurationCollector &&duration_collector,
+                std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
+                PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline);
+    PipelinePad &sink();
+
+protected:
+    virtual std::vector<PipelinePad*> execution_pads() override;
+    virtual hailo_status execute_terminate(hailo_status error_status) override;
+    virtual hailo_status execute_dequeue_user_buffers(hailo_status error_status) override;
+};
+
+// Transfers data from one pad to another pad. Has one sink pad and one source pad.
+class IntermediateElement : public PipelineElementInternal
+{
+public:
+    IntermediateElement(const std::string &name, DurationCollector &&duration_collector,
+                        std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
+                        PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline);
+    virtual PipelinePad &next_pad() = 0;
+
+protected:
+    virtual std::vector<PipelinePad*> execution_pads() override;
+};
+
+class FilterElement : public IntermediateElement
+{
+public:
+    FilterElement(const std::string &name, DurationCollector &&duration_collector,
+                  std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
+                  PipelineDirection pipeline_direction, BufferPoolPtr buffer_pool, std::chrono::milliseconds timeout,
+                  std::shared_ptr<AsyncPipeline> async_pipeline);
+    virtual ~FilterElement() = default;
+
+    virtual hailo_status run_push(PipelineBuffer &&buffer, const PipelinePad &sink) override;
+    virtual void run_push_async(PipelineBuffer &&buffer, const PipelinePad &sink) override;
+    virtual Expected<PipelineBuffer> run_pull(PipelineBuffer &&optional, const PipelinePad &source) override;
+
+    virtual hailo_status enqueue_execution_buffer(MemoryView mem_view, const TransferDoneCallbackAsyncInfer &exec_done, const std::string &source_name) override;
+    virtual Expected<bool> can_push_buffer_upstream(const uint32_t source_index) override;
+    virtual hailo_status fill_buffer_pool(bool is_dma_able, size_t num_of_buffers, const uint32_t source_index) override;
+    virtual Expected<bool> can_push_buffer_upstream(const std::string &source_name) override;
+    virtual hailo_status fill_buffer_pool(bool is_dma_able, size_t num_of_buffers, const std::string &source_name) override;
+
+    virtual std::vector<AccumulatorPtr> get_queue_size_accumulators() override;
+
+protected:
+    // The optional buffer functions as an output buffer that the user can write to instead of acquiring a new buffer
+    virtual Expected<PipelineBuffer> action(PipelineBuffer &&input, PipelineBuffer &&optional) = 0;
+    virtual hailo_status execute_dequeue_user_buffers(hailo_status error_status) override;
+
+    BufferPoolPtr m_pool;
+    std::chrono::milliseconds m_timeout;
+};
+
+class BaseQueueElement : public IntermediateElement
+{
+public:
+    virtual ~BaseQueueElement();
+
+    hailo_status set_timeout(std::chrono::milliseconds timeout);
+    virtual std::string description() const override;
+
+    static constexpr auto INIFINITE_TIMEOUT() { return std::chrono::milliseconds(HAILO_INFINITE); }
+
+protected:
+    static Expected<SpscQueue<PipelineBuffer>> create_queue(size_t queue_size, EventPtr shutdown_event);
+    BaseQueueElement(SpscQueue<PipelineBuffer> &&queue, EventPtr shutdown_event, const std::string &name,
+        std::chrono::milliseconds timeout, DurationCollector &&duration_collector,
+        AccumulatorPtr &&queue_size_accumulator, std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
+        Event &&activation_event, Event &&deactivation_event,
+        PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline);
+
+    hailo_status pipeline_status();
+
+    virtual hailo_status execute_activate() override;
+    virtual hailo_status execute_post_deactivate(bool should_clear_abort) override;
+    virtual hailo_status execute_clear() override;
+    virtual hailo_status execute_clear_abort() override;
+    virtual hailo_status execute_wait_for_finish() override;
+
+    virtual hailo_status enqueue_execution_buffer(MemoryView mem_view, const TransferDoneCallbackAsyncInfer &exec_done, const std::string &source_name) override;
+    virtual Expected<bool> can_push_buffer_upstream(const uint32_t source_index) override;
+    virtual Expected<bool> can_push_buffer_downstream(const uint32_t source_index) override;
+    virtual hailo_status fill_buffer_pool(bool is_dma_able, size_t num_of_buffers, const uint32_t source_index) override;
+    virtual Expected<bool> can_push_buffer_upstream(const std::string &source_name) override;
+    virtual Expected<bool> can_push_buffer_downstream(const std::string &source_name) override;
+    virtual hailo_status fill_buffer_pool(bool is_dma_able, size_t num_of_buffers, const std::string &source_name) override;
+
+    /// Starts/stops the queue thread. This functions needs to be called on subclasses ctor and dtor
+    /// accordingly because otherwise, if we will start/stop thread in this class we will face pure-call
+    /// to `run_in_thread`.
+    /// This functions don't return status because they are meant to be called on ctor and dtor 
+    virtual void start_thread();
+    virtual void stop_thread();
+
+    virtual std::vector<AccumulatorPtr> get_queue_size_accumulators() override;
+
+    virtual hailo_status run_in_thread() = 0;
+    virtual std::string thread_name() = 0;
+
+    SpscQueue<PipelineBuffer> m_queue;
+    EventPtr m_shutdown_event;
+    std::chrono::milliseconds m_timeout;
+    std::thread m_thread;
+    std::atomic_bool m_is_thread_running;
+    Event m_activation_event;
+    Event m_deactivation_event;
+    AccumulatorPtr m_queue_size_accumulator;
+    std::atomic_bool m_is_run_in_thread_running;
+    std::condition_variable m_cv;
+    std::mutex m_mutex;
+};
+
+class PushQueueElement : public BaseQueueElement
+{
+public:
+    static Expected<std::shared_ptr<PushQueueElement>> create(const std::string &name, std::chrono::milliseconds timeout,
+        size_t queue_size, hailo_pipeline_elem_stats_flags_t flags, EventPtr shutdown_event,
+        std::shared_ptr<std::atomic<hailo_status>> pipeline_status, PipelineDirection pipeline_direction = PipelineDirection::PUSH,
+        std::shared_ptr<AsyncPipeline> async_pipeline = nullptr);
+    static Expected<std::shared_ptr<PushQueueElement>> create(const std::string &name, const hailo_vstream_params_t &vstream_params,
+        EventPtr shutdown_event, std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
+        PipelineDirection pipeline_direction = PipelineDirection::PUSH, std::shared_ptr<AsyncPipeline> async_pipeline = nullptr);
+    PushQueueElement(SpscQueue<PipelineBuffer> &&queue, EventPtr shutdown_event, const std::string &name,
+        std::chrono::milliseconds timeout, DurationCollector &&duration_collector, AccumulatorPtr &&queue_size_accumulator,
+        std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, Event &&activation_event, Event &&deactivation_event,
+        PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline, bool should_start_thread = true);
+    virtual ~PushQueueElement();
+
+    virtual hailo_status run_push(PipelineBuffer &&buffer, const PipelinePad &sink) override;
+    virtual void run_push_async(PipelineBuffer &&buffer, const PipelinePad &sink) override;
+    virtual Expected<PipelineBuffer> run_pull(PipelineBuffer &&optional, const PipelinePad &source) override;
+    virtual PipelinePad &next_pad() override;
+
+protected:
+    virtual hailo_status execute_deactivate() override;
+    virtual hailo_status run_in_thread() override;
+    virtual std::string thread_name() override { return "PUSH_QUEUE"; };
+    virtual hailo_status execute_abort() override;
+};
+
+class AsyncPushQueueElement : public PushQueueElement
+{
+public:
+    static Expected<std::shared_ptr<AsyncPushQueueElement>> create(const std::string &name, std::chrono::milliseconds timeout,
+        size_t queue_size, hailo_pipeline_elem_stats_flags_t flags, EventPtr shutdown_event,
+        std::shared_ptr<std::atomic<hailo_status>> pipeline_status, std::shared_ptr<AsyncPipeline> async_pipeline,
+        PipelineDirection pipeline_direction = PipelineDirection::PUSH);
+    static Expected<std::shared_ptr<AsyncPushQueueElement>> create(const std::string &name, const ElementBuildParams &build_params,
+        std::shared_ptr<AsyncPipeline> async_pipeline, PipelineDirection pipeline_direction);
+    AsyncPushQueueElement(SpscQueue<PipelineBuffer> &&queue, EventPtr shutdown_event, const std::string &name,
+        std::chrono::milliseconds timeout, DurationCollector &&duration_collector, AccumulatorPtr &&queue_size_accumulator,
+        std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, Event &&activation_event, Event &&deactivation_event,
+        PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline);
+
+    virtual hailo_status run_push(PipelineBuffer &&buffer, const PipelinePad &sink) override;
+    virtual void run_push_async(PipelineBuffer &&buffer, const PipelinePad &sink) override;
+    virtual hailo_status execute_dequeue_user_buffers(hailo_status error_status) override;
+
+protected:
+    virtual hailo_status run_in_thread() override;
+    virtual std::string thread_name() override { return "ASYNC_PUSH_Q"; };
+    virtual void start_thread() override;
+    virtual hailo_status execute_terminate(hailo_status error_status);
+    virtual hailo_status execute_post_deactivate(bool should_clear_abort) override;
+    virtual hailo_status execute_deactivate() override;
+};
+
+class PullQueueElement : public BaseQueueElement
+{
+public:
+    static Expected<std::shared_ptr<PullQueueElement>> create(const std::string &name, std::chrono::milliseconds timeout,
+        size_t queue_size, hailo_pipeline_elem_stats_flags_t flags, EventPtr shutdown_event,
+        std::shared_ptr<std::atomic<hailo_status>> pipeline_status, PipelineDirection pipeline_direction = PipelineDirection::PULL);
+    static Expected<std::shared_ptr<PullQueueElement>> create(const std::string &name, const hailo_vstream_params_t &vstream_params,
+        EventPtr shutdown_event, std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
+        PipelineDirection pipeline_direction = PipelineDirection::PULL);
+    PullQueueElement(SpscQueue<PipelineBuffer> &&queue, EventPtr shutdown_event, const std::string &name,
+        std::chrono::milliseconds timeout, DurationCollector &&duration_collector, AccumulatorPtr &&queue_size_accumulator,
+        std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, Event &&activation_event, Event &&deactivation_event,
+        PipelineDirection pipeline_direction);
+    virtual ~PullQueueElement();
+
+    virtual hailo_status run_push(PipelineBuffer &&buffer, const PipelinePad &sink) override;
+    virtual void run_push_async(PipelineBuffer &&buffer, const PipelinePad &sink) override;
+    virtual Expected<PipelineBuffer> run_pull(PipelineBuffer &&optional, const PipelinePad &source) override;
+    virtual PipelinePad &next_pad() override;
+
+protected:
+    virtual hailo_status execute_deactivate() override;
+    virtual hailo_status run_in_thread() override;
+    virtual std::string thread_name() override { return "PULL_QUEUE"; };
+};
+
+class UserBufferQueueElement : public PullQueueElement
+{
+public:
+    static Expected<std::shared_ptr<UserBufferQueueElement>> create(const std::string &name, std::chrono::milliseconds timeout,
+        hailo_pipeline_elem_stats_flags_t flags, EventPtr shutdown_event, std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
+        PipelineDirection pipeline_direction = PipelineDirection::PULL);
+    static Expected<std::shared_ptr<UserBufferQueueElement>> create(const std::string &name, const hailo_vstream_params_t &vstream_params,
+        EventPtr shutdown_event, std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
+        PipelineDirection pipeline_direction = PipelineDirection::PULL);
+    UserBufferQueueElement(SpscQueue<PipelineBuffer> &&queue, SpscQueue<PipelineBuffer> &&full_buffer_queue, EventPtr shutdown_event,
+        const std::string &name, std::chrono::milliseconds timeout, DurationCollector &&duration_collector, AccumulatorPtr &&queue_size_accumulator,
+        std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, Event &&activation_event, Event &&deactivation_event,
+        PipelineDirection pipeline_direction);
+
+    virtual Expected<PipelineBuffer> run_pull(PipelineBuffer &&optional, const PipelinePad &source) override;
+
+protected:
+    virtual hailo_status execute_clear() override;
+    virtual hailo_status run_in_thread() override;
+
+private:
+    SpscQueue<PipelineBuffer> m_full_buffer_queue;
+};
+
+class BaseMuxElement : public PipelineElementInternal
+{
+public:
+    virtual ~BaseMuxElement() = default;
+
+    virtual hailo_status run_push(PipelineBuffer &&buffer, const PipelinePad &sink) override;
+    virtual void run_push_async(PipelineBuffer &&buffer, const PipelinePad &sink) override;
+    virtual Expected<PipelineBuffer> run_pull(PipelineBuffer &&optional, const PipelinePad &source) override;
+
+    virtual hailo_status enqueue_execution_buffer(MemoryView mem_view, const TransferDoneCallbackAsyncInfer &exec_done, const std::string &source_name) override;
+    virtual Expected<bool> can_push_buffer_upstream(const uint32_t source_index) override;
+    virtual hailo_status fill_buffer_pool(bool is_dma_able, size_t num_of_buffers, const uint32_t source_index) override;
+    virtual Expected<bool> can_push_buffer_upstream(const std::string &source_name) override;
+    virtual hailo_status fill_buffer_pool(bool is_dma_able, size_t num_of_buffers, const std::string &source_name) override;
+
+protected:
+    BaseMuxElement(size_t sink_count, const std::string &name, std::chrono::milliseconds timeout,
+        DurationCollector &&duration_collector, std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
+        BufferPoolPtr buffer_pool, PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline);
+    virtual hailo_status execute_terminate(hailo_status error_status) override;
+    virtual Expected<PipelineBuffer> action(std::vector<PipelineBuffer> &&inputs, PipelineBuffer &&optional) = 0;
+    virtual std::vector<PipelinePad*> execution_pads() override;
+    virtual hailo_status execute_dequeue_user_buffers(hailo_status error_status) override;
+
+    std::chrono::milliseconds m_timeout;
+    BufferPoolPtr m_pool;
+
+private:
+    bool has_all_sinks_arrived();
+    std::unordered_map<std::string, bool> m_sink_has_arrived;
+    std::mutex m_mutex;
+    std::unordered_map<std::string, uint32_t> m_index_of_sink;
+    std::unordered_map<std::string, PipelineBuffer> m_input_buffers;
+    std::vector<PipelinePad*> m_next_pads;
+    std::condition_variable m_cv;
+};
+
+class BaseDemuxElement : public PipelineElementInternal
+{
+public:
+    virtual ~BaseDemuxElement() = default;
+
+    virtual hailo_status run_push(PipelineBuffer &&buffer, const PipelinePad &sink) override;
+    virtual void run_push_async(PipelineBuffer &&buffer, const PipelinePad &sink) override;
+    virtual Expected<PipelineBuffer> run_pull(PipelineBuffer &&optional, const PipelinePad &source) override;
+    hailo_status set_timeout(std::chrono::milliseconds timeout);
+
+    virtual hailo_status enqueue_execution_buffer(MemoryView mem_view, const TransferDoneCallbackAsyncInfer &exec_done, const std::string &source_name) override;
+    virtual Expected<bool> can_push_buffer_upstream(const uint32_t source_index) override;
+    virtual hailo_status fill_buffer_pool(bool is_dma_able, size_t num_of_buffers, const uint32_t source_index) override;
+    virtual Expected<bool> can_push_buffer_upstream(const std::string &source_name) override;
+    virtual hailo_status fill_buffer_pool(bool is_dma_able, size_t num_of_buffers, const std::string &source_name) override;
+
+    virtual Expected<uint32_t> get_source_index_from_source_name(const std::string &source_name) override;
+
+protected:
+    BaseDemuxElement(size_t source_count, const std::string &name, std::chrono::milliseconds timeout,
+        DurationCollector &&duration_collector, std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
+        std::vector<BufferPoolPtr> pools, PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline);
+    virtual hailo_status execute_activate() override;
+    virtual hailo_status execute_deactivate() override;
+    virtual hailo_status execute_post_deactivate(bool should_clear_abort) override;
+    virtual hailo_status execute_abort() override;
+    virtual Expected<std::vector<PipelineBuffer>> action(PipelineBuffer &&input) = 0;
+    virtual std::vector<PipelinePad*> execution_pads() override;
+    virtual hailo_status execute_dequeue_user_buffers(hailo_status error_status) override;
+
+    std::chrono::milliseconds m_timeout;
+    std::vector<BufferPoolPtr> m_pools;
+
+private:
+    bool were_all_srcs_arrived();
+
+    std::atomic_bool m_is_activated;
+    std::atomic_bool m_was_stream_aborted;
+    std::unordered_map<std::string, uint32_t> m_source_name_to_index;
+    std::vector<bool> m_was_source_called;
+    std::vector<PipelineBuffer> m_buffers_for_action;
+    std::mutex m_mutex;
+    std::condition_variable m_cv;
+    std::vector<PipelinePad*> m_next_pads;
+};
+
+enum class AccumulatorType
+{
+    FPS,
+    LATENCY,
+    QUEUE_SIZE
+};
+
+} /* namespace hailort */
+
+#endif /* _HAILO_PIPELINE_ELEMENTS_HPP_ */
diff --git a/hailort/libhailort/src/net_flow/pipeline/vstream.cpp b/hailort/libhailort/src/net_flow/pipeline/vstream.cpp
index dc2115e..8530e13 100644
--- a/hailort/libhailort/src/net_flow/pipeline/vstream.cpp
+++ b/hailort/libhailort/src/net_flow/pipeline/vstream.cpp
@@ -17,11 +17,12 @@
 #include "hailo/vdevice.hpp"
 #include "hailo/hailort_defaults.hpp"
 #include "hailo/hailort_common.hpp"
-#include "net_flow/pipeline/pipeline.hpp"
+#include "net_flow/pipeline/pipeline_internal.hpp"
 #include "stream_common/stream_internal.hpp"
 #include "net_flow/ops/nms_post_process.hpp"
 #include "net_flow/ops/ssd_post_process.hpp"
 #include "net_flow/ops/yolox_post_process.hpp"
+#include "net_flow/ops/yolov8_post_process.hpp"
 #include "net_flow/ops/yolov5_post_process.hpp"
 #include "net_flow/ops/argmax_post_process.hpp"
 #include "net_flow/ops/softmax_post_process.hpp"
@@ -55,7 +56,7 @@ Expected<std::shared_ptr<PreInferElement>> PreInferElement::create(const hailo_3
     const hailo_3d_image_shape_t &dst_image_shape, const hailo_format_t &dst_format, const std::vector<hailo_quant_info_t> &dst_quant_infos,
     const std::string &name, std::chrono::milliseconds timeout, size_t buffer_pool_size, hailo_pipeline_elem_stats_flags_t elem_flags,
     hailo_vstream_stats_flags_t vstream_flags, EventPtr shutdown_event, std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
-    PipelineDirection pipeline_direction, bool is_dma_able)
+    PipelineDirection pipeline_direction, bool is_dma_able, std::shared_ptr<AsyncPipeline> async_pipeline)
 {
     auto transform_context = InputTransformContext::create(src_image_shape, src_format, dst_image_shape, dst_format,
         dst_quant_infos);
@@ -70,7 +71,8 @@ Expected<std::shared_ptr<PreInferElement>> PreInferElement::create(const hailo_3
     CHECK_EXPECTED(duration_collector);
 
     auto pre_infer_elem_ptr = make_shared_nothrow<PreInferElement>(transform_context.release(),
-        buffer_pool.release(), name, timeout, duration_collector.release(), std::move(pipeline_status), pipeline_direction);
+        buffer_pool.release(), name, timeout, duration_collector.release(), std::move(pipeline_status), pipeline_direction,
+        async_pipeline);
     CHECK_AS_EXPECTED(nullptr != pre_infer_elem_ptr, HAILO_OUT_OF_HOST_MEMORY);
 
     LOGGER__INFO("Created {}", pre_infer_elem_ptr->name());
@@ -81,26 +83,28 @@ Expected<std::shared_ptr<PreInferElement>> PreInferElement::create(const hailo_3
 Expected<std::shared_ptr<PreInferElement>> PreInferElement::create(const hailo_3d_image_shape_t &src_image_shape, const hailo_format_t &src_format,
         const hailo_3d_image_shape_t &dst_image_shape, const hailo_format_t &dst_format, const std::vector<hailo_quant_info_t> &dst_quant_infos, const std::string &name,
         const hailo_vstream_params_t &vstream_params, EventPtr shutdown_event, std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
-        PipelineDirection pipeline_direction, bool is_dma_able)
+        PipelineDirection pipeline_direction, bool is_dma_able, std::shared_ptr<AsyncPipeline> async_pipeline)
 {
     return PreInferElement::create(src_image_shape, src_format, dst_image_shape, dst_format, dst_quant_infos, name,
         std::chrono::milliseconds(vstream_params.timeout_ms), vstream_params.queue_size, vstream_params.pipeline_elements_stats_flags,
-        vstream_params.vstream_stats_flags, shutdown_event, pipeline_status, pipeline_direction, is_dma_able);
+        vstream_params.vstream_stats_flags, shutdown_event, pipeline_status, pipeline_direction, is_dma_able, async_pipeline);
 }
 
 Expected<std::shared_ptr<PreInferElement>> PreInferElement::create(const hailo_3d_image_shape_t &src_image_shape, const hailo_format_t &src_format,
     const hailo_3d_image_shape_t &dst_image_shape, const hailo_format_t &dst_format, const std::vector<hailo_quant_info_t> &dst_quant_infos,
-    const std::string &name, const ElementBuildParams &build_params, PipelineDirection pipeline_direction, bool is_dma_able)
+    const std::string &name, const ElementBuildParams &build_params, PipelineDirection pipeline_direction, bool is_dma_able,
+    std::shared_ptr<AsyncPipeline> async_pipeline)
 {
     return PreInferElement::create(src_image_shape, src_format, dst_image_shape, dst_format, dst_quant_infos, name,
-        build_params.timeout, build_params.buffer_pool_size, build_params.elem_stats_flags, build_params.vstream_stats_flags,
-        build_params.shutdown_event, build_params.pipeline_status, pipeline_direction, is_dma_able);
+        build_params.timeout, build_params.buffer_pool_size_internal, build_params.elem_stats_flags, build_params.vstream_stats_flags,
+        build_params.shutdown_event, build_params.pipeline_status, pipeline_direction, is_dma_able, async_pipeline);
 }
 
 PreInferElement::PreInferElement(std::unique_ptr<InputTransformContext> &&transform_context, BufferPoolPtr buffer_pool,
                                 const std::string &name, std::chrono::milliseconds timeout, DurationCollector &&duration_collector,
-                                std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, PipelineDirection pipeline_direction) :
-    FilterElement(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction, buffer_pool, timeout),
+                                std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, PipelineDirection pipeline_direction,
+                                std::shared_ptr<AsyncPipeline> async_pipeline) :
+    FilterElement(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction, buffer_pool, timeout, async_pipeline),
     m_transform_context(std::move(transform_context))
 {}
 
@@ -133,6 +137,10 @@ Expected<PipelineBuffer> PreInferElement::action(PipelineBuffer &&input, Pipelin
     if (HAILO_SHUTDOWN_EVENT_SIGNALED == transformed_buffer.status()) {
         return make_unexpected(transformed_buffer.status());
     }
+    
+    if (!transformed_buffer) {
+        input.get_exec_done_cb()(transformed_buffer.status());
+    }
     CHECK_AS_EXPECTED(HAILO_TIMEOUT != transformed_buffer.status(), HAILO_TIMEOUT,
         "{} (H2D) failed with status={} (timeout={}ms)", name(), HAILO_TIMEOUT, m_timeout.count());
     CHECK_EXPECTED(transformed_buffer);
@@ -141,13 +149,17 @@ Expected<PipelineBuffer> PreInferElement::action(PipelineBuffer &&input, Pipelin
     m_duration_collector.start_measurement();
     const auto status = m_transform_context->transform(input.as_view(), dst);
     m_duration_collector.complete_measurement();
+
     auto exec_done_cb = input.get_exec_done_cb();
-    CompletionInfoAsyncInferInternal completion_info {status};
-    exec_done_cb(completion_info);
+    exec_done_cb(status);
+    transformed_buffer->set_action_status(status);
+
+    auto metadata = input.get_metadata();
+
     CHECK_SUCCESS_AS_EXPECTED(status);
 
     // Note: The latency to be measured starts as the input buffer is sent to the InputVStream (via write())
-    transformed_buffer->set_metadata(input.get_metadata());
+    transformed_buffer->set_metadata(std::move(metadata));
 
     return transformed_buffer.release();
 }
@@ -156,7 +168,7 @@ Expected<std::shared_ptr<ConvertNmsToDetectionsElement>> ConvertNmsToDetectionsE
         const hailo_nms_info_t &nms_info, const std::string &name, hailo_pipeline_elem_stats_flags_t elem_flags,
         std::shared_ptr<std::atomic<hailo_status>> pipeline_status, std::chrono::milliseconds timeout,
         hailo_vstream_stats_flags_t vstream_flags, EventPtr shutdown_event, size_t buffer_pool_size,
-        PipelineDirection pipeline_direction, bool is_last_copy_element)
+        PipelineDirection pipeline_direction, bool is_last_copy_element, std::shared_ptr<AsyncPipeline> async_pipeline)
 {
     // The actual data will be in the metadata
     auto frame_size = 0;
@@ -168,7 +180,7 @@ Expected<std::shared_ptr<ConvertNmsToDetectionsElement>> ConvertNmsToDetectionsE
     CHECK_EXPECTED(duration_collector);
 
     auto convert_nms_to_detections_elem_ptr = make_shared_nothrow<ConvertNmsToDetectionsElement>(std::move(nms_info),
-        name, duration_collector.release(), std::move(pipeline_status), buffer_pool, timeout, pipeline_direction);
+        name, duration_collector.release(), std::move(pipeline_status), buffer_pool, timeout, pipeline_direction, async_pipeline);
     CHECK_AS_EXPECTED(nullptr != convert_nms_to_detections_elem_ptr, HAILO_OUT_OF_HOST_MEMORY);
 
     LOGGER__INFO("Created {}", convert_nms_to_detections_elem_ptr->name());
@@ -178,18 +190,17 @@ Expected<std::shared_ptr<ConvertNmsToDetectionsElement>> ConvertNmsToDetectionsE
 
 Expected<std::shared_ptr<ConvertNmsToDetectionsElement>> ConvertNmsToDetectionsElement::create(
         const hailo_nms_info_t &nms_info, const std::string &name, const ElementBuildParams &build_params,
-        PipelineDirection pipeline_direction, bool is_last_copy_element)
+        PipelineDirection pipeline_direction, bool is_last_copy_element, std::shared_ptr<AsyncPipeline> async_pipeline)
 {
     return ConvertNmsToDetectionsElement::create(nms_info, name, build_params.elem_stats_flags, build_params.pipeline_status,
-        build_params.timeout, build_params.vstream_stats_flags, build_params.shutdown_event, build_params.buffer_pool_size,
-        pipeline_direction, is_last_copy_element);
+        build_params.timeout, build_params.vstream_stats_flags, build_params.shutdown_event, build_params.buffer_pool_size_edges,
+        pipeline_direction, is_last_copy_element, async_pipeline);
 }
 
 ConvertNmsToDetectionsElement::ConvertNmsToDetectionsElement(const hailo_nms_info_t &&nms_info, const std::string &name,
-                                   DurationCollector &&duration_collector,
-                                   std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
-                                   BufferPoolPtr buffer_pool, std::chrono::milliseconds timeout, PipelineDirection pipeline_direction) :
-    FilterElement(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction, buffer_pool, timeout),
+        DurationCollector &&duration_collector, std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, BufferPoolPtr buffer_pool,
+        std::chrono::milliseconds timeout, PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline) :
+    FilterElement(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction, buffer_pool, timeout, async_pipeline),
     m_nms_info(std::move(nms_info))
 {}
 
@@ -221,6 +232,10 @@ Expected<PipelineBuffer> ConvertNmsToDetectionsElement::action(PipelineBuffer &&
     if (HAILO_SHUTDOWN_EVENT_SIGNALED == buffer.status()) {
         return make_unexpected(buffer.status());
     }
+
+    if (!buffer) {
+        input.get_exec_done_cb()(buffer.status());
+    }
     CHECK_EXPECTED(buffer, "{} (D2H) failed with status={}", name(), buffer.status());
 
     buffer->set_metadata(input.get_metadata());
@@ -234,6 +249,9 @@ Expected<PipelineBuffer> ConvertNmsToDetectionsElement::action(PipelineBuffer &&
 
     m_duration_collector.complete_measurement();
 
+    auto exec_done_cb = input.get_exec_done_cb();
+    exec_done_cb(HAILO_SUCCESS);
+
     return buffer.release();
 }
 
@@ -241,7 +259,8 @@ Expected<std::shared_ptr<FillNmsFormatElement>> FillNmsFormatElement::create(con
         const hailo_format_t &dst_format, const net_flow::NmsPostProcessConfig nms_config, const std::string &name,
         hailo_pipeline_elem_stats_flags_t elem_flags, std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
         std::chrono::milliseconds timeout, hailo_vstream_stats_flags_t vstream_flags, EventPtr shutdown_event,
-        size_t buffer_pool_size, PipelineDirection pipeline_direction, bool is_last_copy_element)
+        size_t buffer_pool_size, PipelineDirection pipeline_direction, bool is_last_copy_element,
+        std::shared_ptr<AsyncPipeline> async_pipeline)
 {
     auto frame_size = HailoRTCommon::get_nms_host_frame_size(nms_info, dst_format);
     auto buffer_pool_expected = BufferPool::create(frame_size, buffer_pool_size, shutdown_event, elem_flags, vstream_flags, is_last_copy_element);
@@ -252,7 +271,7 @@ Expected<std::shared_ptr<FillNmsFormatElement>> FillNmsFormatElement::create(con
     CHECK_EXPECTED(duration_collector);
 
     auto fill_nms_format_element = make_shared_nothrow<FillNmsFormatElement>(std::move(nms_config),
-        name, duration_collector.release(), std::move(pipeline_status), buffer_pool, timeout, pipeline_direction);
+        name, duration_collector.release(), std::move(pipeline_status), buffer_pool, timeout, pipeline_direction, async_pipeline);
     CHECK_AS_EXPECTED(nullptr != fill_nms_format_element, HAILO_OUT_OF_HOST_MEMORY);
 
     LOGGER__INFO("Created {}", fill_nms_format_element->name());
@@ -262,18 +281,21 @@ Expected<std::shared_ptr<FillNmsFormatElement>> FillNmsFormatElement::create(con
 
 Expected<std::shared_ptr<FillNmsFormatElement>> FillNmsFormatElement::create(const hailo_nms_info_t nms_info,
         const hailo_format_t &dst_format, const net_flow::NmsPostProcessConfig nms_config, const std::string &name,
-        const ElementBuildParams &build_params, PipelineDirection pipeline_direction, bool is_last_copy_element)
+        const ElementBuildParams &build_params, PipelineDirection pipeline_direction, bool is_last_copy_element,
+        std::shared_ptr<AsyncPipeline> async_pipeline)
 {
     return FillNmsFormatElement::create(nms_info, dst_format, nms_config, name, build_params.elem_stats_flags,
         build_params.pipeline_status, build_params.timeout, build_params.vstream_stats_flags,
-        build_params.shutdown_event, build_params.buffer_pool_size, pipeline_direction, is_last_copy_element);
+        build_params.shutdown_event, build_params.buffer_pool_size_edges, pipeline_direction, is_last_copy_element,
+        async_pipeline);
 }
 
 FillNmsFormatElement::FillNmsFormatElement(const net_flow::NmsPostProcessConfig &&nms_config, const std::string &name,
                                    DurationCollector &&duration_collector,
                                    std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
-                                   BufferPoolPtr buffer_pool, std::chrono::milliseconds timeout, PipelineDirection pipeline_direction) :
-    FilterElement(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction, buffer_pool, timeout),
+                                   BufferPoolPtr buffer_pool, std::chrono::milliseconds timeout, PipelineDirection pipeline_direction,
+                                   std::shared_ptr<AsyncPipeline> async_pipeline) :
+    FilterElement(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction, buffer_pool, timeout, async_pipeline),
     m_nms_config(std::move(nms_config))
 {}
 
@@ -305,6 +327,10 @@ Expected<PipelineBuffer> FillNmsFormatElement::action(PipelineBuffer &&input, Pi
     if (HAILO_SHUTDOWN_EVENT_SIGNALED == buffer_expected.status()) {
         return make_unexpected(buffer_expected.status());
     }
+
+    if (!buffer_expected) {
+        input.get_exec_done_cb()(buffer_expected.status());
+    }
     CHECK_EXPECTED(buffer_expected, "{} (D2H) failed with status={}", name(), buffer_expected.status());
     auto buffer = buffer_expected.release();
 
@@ -319,6 +345,9 @@ Expected<PipelineBuffer> FillNmsFormatElement::action(PipelineBuffer &&input, Pi
 
     m_duration_collector.complete_measurement();
 
+    auto exec_done_cb = input.get_exec_done_cb();
+    exec_done_cb(HAILO_SUCCESS);
+
     return buffer;
 }
 
@@ -326,8 +355,8 @@ Expected<std::shared_ptr<PostInferElement>> PostInferElement::create(const hailo
     const hailo_format_t &src_format, const hailo_3d_image_shape_t &dst_image_shape, const hailo_format_t &dst_format,
     const std::vector<hailo_quant_info_t> &dst_quant_infos, const hailo_nms_info_t &nms_info, const std::string &name,
     hailo_pipeline_elem_stats_flags_t elem_flags, std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
-    std::chrono::milliseconds timeout, hailo_vstream_stats_flags_t vstream_flags, EventPtr shutdown_event,
-    size_t buffer_pool_size, PipelineDirection pipeline_direction, bool is_last_copy_element)
+    std::chrono::milliseconds timeout, hailo_vstream_stats_flags_t vstream_flags, EventPtr shutdown_event, size_t buffer_pool_size,
+    PipelineDirection pipeline_direction, bool is_last_copy_element, std::shared_ptr<AsyncPipeline> async_pipeline)
 {
     auto frame_size = (dst_format.order == HAILO_FORMAT_ORDER_HAILO_NMS) ? HailoRTCommon::get_nms_host_frame_size(nms_info, dst_format) : HailoRTCommon::get_frame_size(dst_image_shape, dst_format);
     auto buffer_pool_expected = BufferPool::create(frame_size, buffer_pool_size, shutdown_event, elem_flags, vstream_flags, is_last_copy_element);
@@ -340,8 +369,8 @@ Expected<std::shared_ptr<PostInferElement>> PostInferElement::create(const hailo
     auto duration_collector = DurationCollector::create(elem_flags);
     CHECK_EXPECTED(duration_collector);
 
-    auto post_infer_elem_ptr = make_shared_nothrow<PostInferElement>(transform_context.release(),
-        name, duration_collector.release(), std::move(pipeline_status), buffer_pool_expected.release(), timeout, pipeline_direction);
+    auto post_infer_elem_ptr = make_shared_nothrow<PostInferElement>(transform_context.release(), name,
+        duration_collector.release(), std::move(pipeline_status), buffer_pool_expected.release(), timeout, pipeline_direction, async_pipeline);
     CHECK_AS_EXPECTED(nullptr != post_infer_elem_ptr, HAILO_OUT_OF_HOST_MEMORY);
 
     LOGGER__INFO("Created {}", post_infer_elem_ptr->name());
@@ -352,30 +381,31 @@ Expected<std::shared_ptr<PostInferElement>> PostInferElement::create(const hailo
 Expected<std::shared_ptr<PostInferElement>> PostInferElement::create(const hailo_3d_image_shape_t &src_image_shape, const hailo_format_t &src_format,
         const hailo_3d_image_shape_t &dst_image_shape, const hailo_format_t &dst_format, const std::vector<hailo_quant_info_t> &dst_quant_infos, const hailo_nms_info_t &nms_info,
         const std::string &name, const hailo_vstream_params_t &vstream_params, std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
-        EventPtr shutdown_event, PipelineDirection pipeline_direction, bool is_last_copy_element)
+        EventPtr shutdown_event, PipelineDirection pipeline_direction, bool is_last_copy_element, std::shared_ptr<AsyncPipeline> async_pipeline)
 {
     return PostInferElement::create(src_image_shape, src_format, dst_image_shape, dst_format, dst_quant_infos, nms_info,
         name, vstream_params.pipeline_elements_stats_flags, pipeline_status, std::chrono::milliseconds(vstream_params.timeout_ms),
-        vstream_params.vstream_stats_flags, shutdown_event, vstream_params.queue_size, pipeline_direction, is_last_copy_element);
+        vstream_params.vstream_stats_flags, shutdown_event, vstream_params.queue_size, pipeline_direction, is_last_copy_element, async_pipeline);
 }
 
 Expected<std::shared_ptr<PostInferElement>> PostInferElement::create(const hailo_3d_image_shape_t &src_image_shape,
     const hailo_format_t &src_format, const hailo_3d_image_shape_t &dst_image_shape, const hailo_format_t &dst_format,
     const std::vector<hailo_quant_info_t> &dst_quant_infos, const hailo_nms_info_t &nms_info, const std::string &name,
-    const ElementBuildParams &build_params, PipelineDirection pipeline_direction, bool is_last_copy_element)
+    const ElementBuildParams &build_params, PipelineDirection pipeline_direction, bool is_last_copy_element,
+    std::shared_ptr<AsyncPipeline> async_pipeline)
 {
     return PostInferElement::create(src_image_shape, src_format, dst_image_shape, dst_format,
         dst_quant_infos, nms_info, name, build_params.elem_stats_flags, build_params.pipeline_status,
-        build_params.timeout, build_params.vstream_stats_flags, build_params.shutdown_event, build_params.buffer_pool_size,
-        pipeline_direction, is_last_copy_element);
+        build_params.timeout, build_params.vstream_stats_flags, build_params.shutdown_event, build_params.buffer_pool_size_edges,
+        pipeline_direction, is_last_copy_element, async_pipeline);
 }
 
 PostInferElement::PostInferElement(std::unique_ptr<OutputTransformContext> &&transform_context, const std::string &name,
                                    DurationCollector &&duration_collector,
                                    std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
                                    BufferPoolPtr buffer_pool, std::chrono::milliseconds timeout,
-                                   PipelineDirection pipeline_direction) :
-    FilterElement(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction, buffer_pool, timeout),
+                                   PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline) :
+    FilterElement(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction, buffer_pool, timeout, async_pipeline),
     m_transform_context(std::move(transform_context))
 {}
 
@@ -415,6 +445,10 @@ Expected<PipelineBuffer> PostInferElement::action(PipelineBuffer &&input, Pipeli
     if (HAILO_SHUTDOWN_EVENT_SIGNALED == buffer.status()) {
         return make_unexpected(buffer.status());
     }
+
+    if (!buffer) {
+        input.get_exec_done_cb()(buffer.status());
+    }
     CHECK_EXPECTED(buffer, "{} (D2H) failed with status={}", name(), buffer.status());
 
     // Note: The latency to be measured starts as the buffer is read from the HW (it's 'input' in this case)
@@ -424,6 +458,11 @@ Expected<PipelineBuffer> PostInferElement::action(PipelineBuffer &&input, Pipeli
     m_duration_collector.start_measurement();
     const auto status = m_transform_context->transform(input.as_view(), dst);
     m_duration_collector.complete_measurement();
+
+    auto exec_done_cb = input.get_exec_done_cb();
+    exec_done_cb(status);
+    buffer->set_action_status(status);
+
     CHECK_SUCCESS_AS_EXPECTED(status);
 
     return buffer.release();
@@ -448,7 +487,8 @@ static hailo_nms_info_t fuse_nms_info(const std::vector<hailo_nms_info_t> &nms_i
 Expected<std::shared_ptr<RemoveOverlappingBboxesElement>> RemoveOverlappingBboxesElement::create(
         const net_flow::NmsPostProcessConfig nms_config, const std::string &name, hailo_pipeline_elem_stats_flags_t elem_flags,
         std::shared_ptr<std::atomic<hailo_status>> pipeline_status, std::chrono::milliseconds timeout, hailo_vstream_stats_flags_t vstream_flags,
-        EventPtr shutdown_event, size_t buffer_pool_size, PipelineDirection pipeline_direction, bool is_last_copy_element)
+        EventPtr shutdown_event, size_t buffer_pool_size, PipelineDirection pipeline_direction, bool is_last_copy_element,
+        std::shared_ptr<AsyncPipeline> async_pipeline)
 {
     // The actual data will be in the metadata
     auto frame_size = 0;
@@ -460,7 +500,7 @@ Expected<std::shared_ptr<RemoveOverlappingBboxesElement>> RemoveOverlappingBboxe
     CHECK_EXPECTED(duration_collector);
 
     auto convert_nms_removed_overlapping_elem_ptr = make_shared_nothrow<RemoveOverlappingBboxesElement>(std::move(nms_config),
-        name, duration_collector.release(), std::move(pipeline_status), buffer_pool, timeout, pipeline_direction);
+        name, duration_collector.release(), std::move(pipeline_status), buffer_pool, timeout, pipeline_direction, async_pipeline);
     CHECK_AS_EXPECTED(nullptr != convert_nms_removed_overlapping_elem_ptr, HAILO_OUT_OF_HOST_MEMORY);
 
     LOGGER__INFO("Created {}", convert_nms_removed_overlapping_elem_ptr->name());
@@ -469,19 +509,20 @@ Expected<std::shared_ptr<RemoveOverlappingBboxesElement>> RemoveOverlappingBboxe
 }
 
 Expected<std::shared_ptr<RemoveOverlappingBboxesElement>> RemoveOverlappingBboxesElement::create(const net_flow::NmsPostProcessConfig nms_config,
-    const std::string &name, const ElementBuildParams &build_params, PipelineDirection pipeline_direction, bool is_last_copy_element)
+    const std::string &name, const ElementBuildParams &build_params, PipelineDirection pipeline_direction, bool is_last_copy_element,
+    std::shared_ptr<AsyncPipeline> async_pipeline)
 {
     return RemoveOverlappingBboxesElement::create(nms_config, name,
         build_params.elem_stats_flags, build_params.pipeline_status, build_params.timeout, build_params.vstream_stats_flags,
-        build_params.shutdown_event, build_params.buffer_pool_size, pipeline_direction, is_last_copy_element);
+        build_params.shutdown_event, build_params.buffer_pool_size_edges, pipeline_direction, is_last_copy_element, async_pipeline);
 }
 
 RemoveOverlappingBboxesElement::RemoveOverlappingBboxesElement(const net_flow::NmsPostProcessConfig &&nms_config, const std::string &name,
                                    DurationCollector &&duration_collector,
                                    std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
                                    BufferPoolPtr buffer_pool, std::chrono::milliseconds timeout,
-                                   PipelineDirection pipeline_direction) :
-    FilterElement(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction, buffer_pool, timeout),
+                                   PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline) :
+    FilterElement(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction, buffer_pool, timeout, async_pipeline),
     m_nms_config(std::move(nms_config))
 {}
 
@@ -513,6 +554,10 @@ Expected<PipelineBuffer> RemoveOverlappingBboxesElement::action(PipelineBuffer &
     if (HAILO_SHUTDOWN_EVENT_SIGNALED == buffer.status()) {
         return make_unexpected(buffer.status());
     }
+
+    if (!buffer) {
+        input.get_exec_done_cb()(buffer.status());
+    }
     CHECK_EXPECTED(buffer, "{} (D2H) failed with status={}", name(), buffer.status());
 
     buffer->set_metadata(input.get_metadata());
@@ -524,13 +569,17 @@ Expected<PipelineBuffer> RemoveOverlappingBboxesElement::action(PipelineBuffer &
         detections_pipeline_data->m_detections_classes_count, m_nms_config.nms_iou_th);
     m_duration_collector.complete_measurement();
 
+    auto exec_done_cb = input.get_exec_done_cb();
+    exec_done_cb(HAILO_SUCCESS);
+
     return buffer.release();
 }
 
 Expected<std::shared_ptr<NmsPostProcessMuxElement>> NmsPostProcessMuxElement::create(std::shared_ptr<net_flow::Op> nms_op,
     const std::string &name, std::chrono::milliseconds timeout, size_t buffer_pool_size,
     hailo_pipeline_elem_stats_flags_t elem_flags, hailo_vstream_stats_flags_t vstream_flags, EventPtr shutdown_event,
-    std::shared_ptr<std::atomic<hailo_status>> pipeline_status, PipelineDirection pipeline_direction, bool is_last_copy_element)
+    std::shared_ptr<std::atomic<hailo_status>> pipeline_status, PipelineDirection pipeline_direction, bool is_last_copy_element,
+    std::shared_ptr<AsyncPipeline> async_pipeline)
 {
     assert(nms_op->outputs_metadata().size() == 1);
     auto vstream_info = nms_op->metadata()->get_output_vstream_info();
@@ -546,7 +595,7 @@ Expected<std::shared_ptr<NmsPostProcessMuxElement>> NmsPostProcessMuxElement::cr
     CHECK_EXPECTED(duration_collector);
 
     auto nms_elem_ptr = make_shared_nothrow<NmsPostProcessMuxElement>(nms_op, buffer_pool.release(),
-        name, timeout, duration_collector.release(), std::move(pipeline_status), pipeline_direction);
+        name, timeout, duration_collector.release(), std::move(pipeline_status), pipeline_direction, async_pipeline);
     CHECK_AS_EXPECTED(nullptr != nms_elem_ptr, HAILO_OUT_OF_HOST_MEMORY);
 
     LOGGER__INFO("Created {}", nms_elem_ptr->name());
@@ -554,29 +603,31 @@ Expected<std::shared_ptr<NmsPostProcessMuxElement>> NmsPostProcessMuxElement::cr
 }
 
 Expected<std::shared_ptr<NmsPostProcessMuxElement>> NmsPostProcessMuxElement::create(std::shared_ptr<net_flow::Op> nms_op,
-    const std::string &name, const ElementBuildParams &build_params, PipelineDirection pipeline_direction, bool is_last_copy_element)
+    const std::string &name, const ElementBuildParams &build_params, PipelineDirection pipeline_direction, bool is_last_copy_element,
+    std::shared_ptr<AsyncPipeline> async_pipeline)
 {
     return NmsPostProcessMuxElement::create(nms_op, name, build_params.timeout,
-        build_params.buffer_pool_size, build_params.elem_stats_flags, build_params.vstream_stats_flags,
-        build_params.shutdown_event, build_params.pipeline_status, pipeline_direction, is_last_copy_element);
+        build_params.buffer_pool_size_edges, build_params.elem_stats_flags, build_params.vstream_stats_flags,
+        build_params.shutdown_event, build_params.pipeline_status, pipeline_direction, is_last_copy_element, async_pipeline);
 }
 
 Expected<std::shared_ptr<NmsPostProcessMuxElement>> NmsPostProcessMuxElement::create(std::shared_ptr<net_flow::Op> nms_op,
-       const std::string &name, const hailo_vstream_params_t &vstream_params,
-        EventPtr shutdown_event, std::shared_ptr<std::atomic<hailo_status>> pipeline_status, PipelineDirection pipeline_direction, bool is_last_copy_element)
+       const std::string &name, const hailo_vstream_params_t &vstream_params, EventPtr shutdown_event,
+       std::shared_ptr<std::atomic<hailo_status>> pipeline_status, PipelineDirection pipeline_direction, bool is_last_copy_element,
+       std::shared_ptr<AsyncPipeline> async_pipeline)
 {
     return NmsPostProcessMuxElement::create(nms_op, name, std::chrono::milliseconds(vstream_params.timeout_ms),
         vstream_params.queue_size, vstream_params.pipeline_elements_stats_flags, vstream_params.vstream_stats_flags, shutdown_event,
-        pipeline_status, pipeline_direction, is_last_copy_element);
+        pipeline_status, pipeline_direction, is_last_copy_element, async_pipeline);
 }
 
 NmsPostProcessMuxElement::NmsPostProcessMuxElement(std::shared_ptr<net_flow::Op> nms_op, BufferPoolPtr &&pool,
                                                    const std::string &name, std::chrono::milliseconds timeout,
                                                    DurationCollector &&duration_collector,
                                                    std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
-                                                   PipelineDirection pipeline_direction) :
+                                                   PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline) :
     BaseMuxElement(nms_op->inputs_metadata().size(), name, timeout, std::move(duration_collector), std::move(pipeline_status),
-        std::move(pool), pipeline_direction),
+        std::move(pool), pipeline_direction, async_pipeline),
     m_nms_op(nms_op)
 {}
 
@@ -599,12 +650,26 @@ Expected<PipelineBuffer> NmsPostProcessMuxElement::action(std::vector<PipelineBu
     if (HAILO_SHUTDOWN_EVENT_SIGNALED == acquired_buffer.status()) {
         return make_unexpected(acquired_buffer.status());
     }
+
+    if (!acquired_buffer) {
+        for (auto &input : input_buffers) {
+            auto exec_done_cb = input.get_exec_done_cb();
+            exec_done_cb(acquired_buffer.status());
+        }
+    }
     CHECK_EXPECTED(acquired_buffer);
-    outputs.insert({"", acquired_buffer.value().as_view()}); // TODO: fill with correct name
+    outputs.insert({"", acquired_buffer->as_view()}); // TODO: fill with correct name
     m_duration_collector.start_measurement();
 
     auto post_process_result = m_nms_op->execute(inputs, outputs);
     m_duration_collector.complete_measurement();
+
+    for (auto &input : input_buffers) {
+        auto exec_done_cb = input.get_exec_done_cb();
+        exec_done_cb(post_process_result);
+    }
+    acquired_buffer->set_action_status(post_process_result);
+
     CHECK_SUCCESS_AS_EXPECTED(post_process_result);
     return acquired_buffer;
 }
@@ -612,7 +677,8 @@ Expected<PipelineBuffer> NmsPostProcessMuxElement::action(std::vector<PipelineBu
 Expected<std::shared_ptr<NmsMuxElement>> NmsMuxElement::create(const std::vector<hailo_nms_info_t> &nms_infos,
     const std::string &name, std::chrono::milliseconds timeout, size_t buffer_pool_size,
     hailo_pipeline_elem_stats_flags_t elem_flags, hailo_vstream_stats_flags_t vstream_flags, EventPtr shutdown_event,
-    std::shared_ptr<std::atomic<hailo_status>> pipeline_status, PipelineDirection pipeline_direction, bool is_last_copy_element)
+    std::shared_ptr<std::atomic<hailo_status>> pipeline_status, PipelineDirection pipeline_direction, bool is_last_copy_element,
+    std::shared_ptr<AsyncPipeline> async_pipeline)
 {
     const auto &fused_info = fuse_nms_info(nms_infos);
     auto buffer_pool = BufferPool::create(HailoRTCommon::get_nms_hw_frame_size(fused_info),
@@ -623,7 +689,7 @@ Expected<std::shared_ptr<NmsMuxElement>> NmsMuxElement::create(const std::vector
     CHECK_EXPECTED(duration_collector);
 
     auto nms_elem_ptr = make_shared_nothrow<NmsMuxElement>(nms_infos, fused_info, buffer_pool.release(),
-        name, timeout, duration_collector.release(), std::move(pipeline_status), pipeline_direction);
+        name, timeout, duration_collector.release(), std::move(pipeline_status), pipeline_direction, async_pipeline);
     CHECK_AS_EXPECTED(nullptr != nms_elem_ptr, HAILO_OUT_OF_HOST_MEMORY);
 
     LOGGER__INFO("Created {}", nms_elem_ptr->name());
@@ -633,24 +699,27 @@ Expected<std::shared_ptr<NmsMuxElement>> NmsMuxElement::create(const std::vector
 
 Expected<std::shared_ptr<NmsMuxElement>> NmsMuxElement::create(const std::vector<hailo_nms_info_t> &nms_infos, const std::string &name,
         const hailo_vstream_params_t &vstream_params, EventPtr shutdown_event, std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
-        PipelineDirection pipeline_direction, bool is_last_copy_element)
+        PipelineDirection pipeline_direction, bool is_last_copy_element, std::shared_ptr<AsyncPipeline> async_pipeline)
 {
     return NmsMuxElement::create(nms_infos, name, std::chrono::milliseconds(vstream_params.timeout_ms), vstream_params.queue_size,
         vstream_params.pipeline_elements_stats_flags, vstream_params.vstream_stats_flags, shutdown_event, pipeline_status, pipeline_direction,
-        is_last_copy_element);
+        is_last_copy_element, async_pipeline);
 }
 
 Expected<std::shared_ptr<NmsMuxElement>> NmsMuxElement::create(const std::vector<hailo_nms_info_t> &nms_infos,
-    const std::string &name, const ElementBuildParams &build_params, PipelineDirection pipeline_direction, bool is_last_copy_element)
+    const std::string &name, const ElementBuildParams &build_params, PipelineDirection pipeline_direction, bool is_last_copy_element,
+    std::shared_ptr<AsyncPipeline> async_pipeline)
 {
-    return NmsMuxElement::create(nms_infos, name, build_params.timeout, build_params.buffer_pool_size, build_params.elem_stats_flags,
-        build_params.vstream_stats_flags, build_params.shutdown_event, build_params.pipeline_status, pipeline_direction, is_last_copy_element);
+    return NmsMuxElement::create(nms_infos, name, build_params.timeout, build_params.buffer_pool_size_edges, build_params.elem_stats_flags,
+        build_params.vstream_stats_flags, build_params.shutdown_event, build_params.pipeline_status, pipeline_direction, is_last_copy_element,
+        async_pipeline);
 }
 
 NmsMuxElement::NmsMuxElement(const std::vector<hailo_nms_info_t> &nms_infos, const hailo_nms_info_t &fused_nms_info, BufferPoolPtr &&pool,
                              const std::string &name, std::chrono::milliseconds timeout, DurationCollector &&duration_collector,
-                             std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, PipelineDirection pipeline_direction) :
-    BaseMuxElement(nms_infos.size(), name, timeout, std::move(duration_collector), std::move(pipeline_status), std::move(pool), pipeline_direction),
+                             std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, PipelineDirection pipeline_direction,
+                             std::shared_ptr<AsyncPipeline> async_pipeline) :
+    BaseMuxElement(nms_infos.size(), name, timeout, std::move(duration_collector), std::move(pipeline_status), std::move(pool), pipeline_direction, async_pipeline),
     m_nms_infos(nms_infos),
     m_fused_nms_info(fused_nms_info)
 {}
@@ -681,6 +750,13 @@ Expected<PipelineBuffer> NmsMuxElement::action(std::vector<PipelineBuffer> &&inp
     if (HAILO_SHUTDOWN_EVENT_SIGNALED == acquired_buffer.status()) {
         return make_unexpected(acquired_buffer.status());
     }
+
+    if (!acquired_buffer) {
+        for (auto &input : inputs) {
+            auto exec_done_cb = input.get_exec_done_cb();
+            exec_done_cb(acquired_buffer.status());
+        }
+    }    
     CHECK_AS_EXPECTED(HAILO_TIMEOUT != acquired_buffer.status(), HAILO_TIMEOUT,
         "{} failed with status={} (timeout={}ms)", name(), HAILO_TIMEOUT, m_timeout.count());
     CHECK_EXPECTED(acquired_buffer);
@@ -688,6 +764,13 @@ Expected<PipelineBuffer> NmsMuxElement::action(std::vector<PipelineBuffer> &&inp
     m_duration_collector.start_measurement();
     const auto status = fuse_buffers(input_views, m_nms_infos, acquired_buffer.value().as_view());
     m_duration_collector.complete_measurement();
+
+    for (auto &input : inputs) {
+        auto exec_done_cb = input.get_exec_done_cb();
+        exec_done_cb(status);
+    }
+    acquired_buffer->set_action_status(status);
+
     CHECK_SUCCESS_AS_EXPECTED(status);
 
     return acquired_buffer.release();
@@ -696,13 +779,12 @@ Expected<PipelineBuffer> NmsMuxElement::action(std::vector<PipelineBuffer> &&inp
 Expected<std::shared_ptr<TransformDemuxElement>> TransformDemuxElement::create(std::shared_ptr<OutputDemuxer> demuxer,
     const std::string &name, std::chrono::milliseconds timeout, size_t buffer_pool_size, hailo_pipeline_elem_stats_flags_t elem_flags,
     hailo_vstream_stats_flags_t vstream_flags, EventPtr shutdown_event, std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
-    PipelineDirection pipeline_direction)
+    PipelineDirection pipeline_direction, bool is_last_copy_element, std::shared_ptr<AsyncPipeline> async_pipeline)
 {
     std::vector<BufferPoolPtr> pools;
     pools.reserve(demuxer->get_edges_stream_info().size());
-
     for (const auto& mux_edge : demuxer->get_edges_stream_info()) {
-        auto buffer_pool = BufferPool::create(mux_edge.hw_frame_size, buffer_pool_size, shutdown_event, elem_flags, vstream_flags);
+        auto buffer_pool = BufferPool::create(mux_edge.hw_frame_size, buffer_pool_size, shutdown_event, elem_flags, vstream_flags, is_last_copy_element);
         CHECK_EXPECTED(buffer_pool, "Failed creating BufferPool");
         pools.push_back(buffer_pool.release());
     }
@@ -710,8 +792,9 @@ Expected<std::shared_ptr<TransformDemuxElement>> TransformDemuxElement::create(s
     auto duration_collector = DurationCollector::create(elem_flags);
     CHECK_EXPECTED(duration_collector);
 
+
     auto demux_elem_ptr = make_shared_nothrow<TransformDemuxElement>(demuxer, std::move(pools), name, timeout,
-        duration_collector.release(), std::move(pipeline_status), pipeline_direction);
+        duration_collector.release(), std::move(pipeline_status), pipeline_direction, async_pipeline);
     CHECK_AS_EXPECTED(nullptr != demux_elem_ptr, HAILO_OUT_OF_HOST_MEMORY);
 
     return demux_elem_ptr;
@@ -719,44 +802,25 @@ Expected<std::shared_ptr<TransformDemuxElement>> TransformDemuxElement::create(s
 
 Expected<std::shared_ptr<TransformDemuxElement>> TransformDemuxElement::create(std::shared_ptr<OutputDemuxer> demuxer,
     const std::string &name, const ElementBuildParams &build_params,
-    PipelineDirection pipeline_direction)
+    PipelineDirection pipeline_direction, bool is_last_copy_element, std::shared_ptr<AsyncPipeline> async_pipeline)
 {
-    return TransformDemuxElement::create(demuxer, name, build_params.timeout, build_params.buffer_pool_size, build_params.elem_stats_flags,
-        build_params.vstream_stats_flags, build_params.shutdown_event, build_params.pipeline_status, pipeline_direction);
+    return TransformDemuxElement::create(demuxer, name, build_params.timeout, build_params.buffer_pool_size_edges, build_params.elem_stats_flags,
+        build_params.vstream_stats_flags, build_params.shutdown_event, build_params.pipeline_status, pipeline_direction, is_last_copy_element, async_pipeline);
 }
 
 TransformDemuxElement::TransformDemuxElement(std::shared_ptr<OutputDemuxer> demuxer, std::vector<BufferPoolPtr> &&pools,
-                                             const std::string &name, std::chrono::milliseconds timeout,
-                                             DurationCollector &&duration_collector,
-                                             std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
-                                             PipelineDirection pipeline_direction) :
+    const std::string &name, std::chrono::milliseconds timeout, DurationCollector &&duration_collector,
+    std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, PipelineDirection pipeline_direction,
+    std::shared_ptr<AsyncPipeline> async_pipeline) :
     BaseDemuxElement(demuxer->get_edges_stream_info().size(), name, timeout, std::move(duration_collector),
-                     std::move(pipeline_status), std::move(pools), pipeline_direction),
-    m_demuxer(demuxer)
+        std::move(pipeline_status), std::move(pools), pipeline_direction, async_pipeline),
+    m_demuxer(demuxer) 
 {}
 
-PixBufferElement::PixBufferElement(const std::string &name, std::chrono::milliseconds timeout,
-    DurationCollector &&duration_collector, std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
-    size_t sources_count, hailo_format_order_t order) :
-        BaseDemuxElement(sources_count, name, timeout, std::move(duration_collector), std::move(pipeline_status),
-            {}, PipelineDirection::PUSH),
-            m_order(order)
-{}
-
-Expected<std::shared_ptr<PixBufferElement>> PixBufferElement::create(const std::string &name,
-    std::chrono::milliseconds timeout, DurationCollector &&duration_collector,
-    std::shared_ptr<std::atomic<hailo_status>> pipeline_status, size_t sources_count, hailo_format_order_t order)
-{
-    auto pix_buffer_splitter_elem_ptr = make_shared_nothrow<PixBufferElement>(name, timeout,
-        std::move(duration_collector), std::move(pipeline_status), sources_count, order);
-    CHECK_AS_EXPECTED(nullptr != pix_buffer_splitter_elem_ptr, HAILO_OUT_OF_HOST_MEMORY);
-    return pix_buffer_splitter_elem_ptr;
-}
-
 std::vector<AccumulatorPtr> TransformDemuxElement::get_queue_size_accumulators()
 {
     std::vector<AccumulatorPtr> result;
-    for (const auto& pool : m_pools) {
+    for (const auto &pool : m_pools) {
         if (nullptr != pool->get_queue_size_accumulator()) {
             result.emplace_back(pool->get_queue_size_accumulator());
         }
@@ -778,20 +842,55 @@ Expected<std::vector<PipelineBuffer>> TransformDemuxElement::action(PipelineBuff
         if (HAILO_SHUTDOWN_EVENT_SIGNALED == acquired_buffer.status()) {
             return make_unexpected(acquired_buffer.status());
         }
+
+        if (!acquired_buffer) {
+                input.get_exec_done_cb()(acquired_buffer.status());
+        } 
         CHECK_EXPECTED(acquired_buffer, "Failed to acquire buffer");
         outputs.emplace_back(acquired_buffer.release());
-
         raw_buffers.push_back(outputs.back().as_view());
     }
 
     m_duration_collector.start_measurement();
     const auto status = m_demuxer->transform_demux(input.as_view(), raw_buffers);
     m_duration_collector.complete_measurement();
+
+    auto exec_done_cb = input.get_exec_done_cb();
+    exec_done_cb(status);
+    for (auto &output : outputs) {
+        output.set_action_status(status);
+    }
+
     CHECK_SUCCESS_AS_EXPECTED(status);
 
     return outputs;
 }
 
+PixBufferElement::PixBufferElement(const std::string &name, std::chrono::milliseconds timeout,
+    DurationCollector &&duration_collector, std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
+    hailo_format_order_t order, std::shared_ptr<AsyncPipeline> async_pipeline) :
+        BaseDemuxElement(((order == HAILO_FORMAT_ORDER_I420) ? NUMBER_OF_PLANES_I420 : NUMBER_OF_PLANES_NV12_NV21),
+            name, timeout, std::move(duration_collector), std::move(pipeline_status),
+            {}, PipelineDirection::PUSH, async_pipeline),
+        m_order(order)
+{}
+
+Expected<bool> PixBufferElement::can_push_buffer_upstream(const std::string &pad_name)
+{
+    return m_sinks[0].prev()->element().can_push_buffer_upstream(pad_name);
+}
+
+Expected<std::shared_ptr<PixBufferElement>> PixBufferElement::create(const std::string &name,
+    std::chrono::milliseconds timeout, DurationCollector &&duration_collector,
+    std::shared_ptr<std::atomic<hailo_status>> pipeline_status, hailo_format_order_t order,
+    std::shared_ptr<AsyncPipeline> async_pipeline)
+{
+    auto pix_buffer_splitter_elem_ptr = make_shared_nothrow<PixBufferElement>(name, timeout,
+        std::move(duration_collector), std::move(pipeline_status), order, async_pipeline);
+    CHECK_AS_EXPECTED(nullptr != pix_buffer_splitter_elem_ptr, HAILO_OUT_OF_HOST_MEMORY);
+    return pix_buffer_splitter_elem_ptr;
+}
+
 Expected<std::vector<PipelineBuffer>> PixBufferElement::action(PipelineBuffer &&input)
 {
     // splits the planes into buffers
@@ -799,6 +898,10 @@ Expected<std::vector<PipelineBuffer>> PixBufferElement::action(PipelineBuffer &&
     std::vector<PipelineBuffer> outputs;
 
     auto input_pix_buffer_expected = input.as_hailo_pix_buffer(m_order);
+
+    if (!input_pix_buffer_expected) {
+        input.get_exec_done_cb()(input_pix_buffer_expected.status());
+    }
     CHECK_EXPECTED(input_pix_buffer_expected);
     auto input_pix_buffer = input_pix_buffer_expected.release();
 
@@ -807,8 +910,20 @@ Expected<std::vector<PipelineBuffer>> PixBufferElement::action(PipelineBuffer &&
             outputs.emplace_back(PipelineBuffer(PipelineBuffer::Type::FLUSH));
         }
     } else {
-        for (uint32_t i = 0; i < input_pix_buffer.number_of_planes; i++){
-            outputs.emplace_back(MemoryView(input_pix_buffer.planes[i].user_ptr, input_pix_buffer.planes[i].bytes_used));
+        auto shared_counter = make_shared_nothrow<std::atomic_uint32_t>(input_pix_buffer.number_of_planes);
+        if (!shared_counter) {
+            input.get_exec_done_cb()(HAILO_OUT_OF_HOST_MEMORY);
+        }
+        CHECK_NOT_NULL_AS_EXPECTED(shared_counter, HAILO_OUT_OF_HOST_MEMORY);
+
+        for (uint32_t i = 0; i < input_pix_buffer.number_of_planes; i++) {
+            outputs.emplace_back(MemoryView(input_pix_buffer.planes[i].user_ptr, input_pix_buffer.planes[i].bytes_used),
+                [shared_counter, input_cb = input.get_exec_done_cb()](hailo_status status)
+                {
+                    if (--*shared_counter == 0) {
+                        input_cb(status);
+                    }
+                });
         }
     }
 
@@ -820,7 +935,7 @@ Expected<std::shared_ptr<ArgmaxPostProcessElement>> ArgmaxPostProcessElement::cr
     const std::string &name, hailo_pipeline_elem_stats_flags_t elem_flags,
     std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
     size_t buffer_pool_size, std::chrono::milliseconds timeout, hailo_vstream_stats_flags_t vstream_flags,
-    EventPtr shutdown_event, PipelineDirection pipeline_direction, bool is_last_copy_element)
+    EventPtr shutdown_event, PipelineDirection pipeline_direction, bool is_last_copy_element, std::shared_ptr<AsyncPipeline> async_pipeline)
 {
     auto out_metadata = argmax_op->outputs_metadata().begin()->second;
     auto buffer_size = HailoRTCommon::get_frame_size(out_metadata.shape, out_metadata.format);
@@ -830,24 +945,26 @@ Expected<std::shared_ptr<ArgmaxPostProcessElement>> ArgmaxPostProcessElement::cr
     auto duration_collector = DurationCollector::create(elem_flags);
     CHECK_EXPECTED(duration_collector);
     auto argmax_elem_ptr = make_shared_nothrow<ArgmaxPostProcessElement>(argmax_op,
-        name, duration_collector.release(), std::move(pipeline_status), timeout, buffer_pool.release(), pipeline_direction);
+        name, duration_collector.release(), std::move(pipeline_status), timeout, buffer_pool.release(), pipeline_direction, async_pipeline);
     CHECK_AS_EXPECTED(nullptr != argmax_elem_ptr, HAILO_OUT_OF_HOST_MEMORY);
     LOGGER__INFO("Created {}", argmax_elem_ptr->name());
     return argmax_elem_ptr;
 }
 
 Expected<std::shared_ptr<ArgmaxPostProcessElement>> ArgmaxPostProcessElement::create(std::shared_ptr<net_flow::Op> argmax_op,
-    const std::string &name, const ElementBuildParams &build_params, PipelineDirection pipeline_direction, bool is_last_copy_element)
+    const std::string &name, const ElementBuildParams &build_params, PipelineDirection pipeline_direction, bool is_last_copy_element,
+    std::shared_ptr<AsyncPipeline> async_pipeline)
 {
     return ArgmaxPostProcessElement::create(argmax_op, name,
-        build_params.elem_stats_flags, build_params.pipeline_status, build_params.buffer_pool_size, build_params.timeout,
-        build_params.vstream_stats_flags, build_params.shutdown_event, pipeline_direction, is_last_copy_element);
+        build_params.elem_stats_flags, build_params.pipeline_status, build_params.buffer_pool_size_edges, build_params.timeout,
+        build_params.vstream_stats_flags, build_params.shutdown_event, pipeline_direction, is_last_copy_element, async_pipeline);
 }
 
 ArgmaxPostProcessElement::ArgmaxPostProcessElement(std::shared_ptr<net_flow::Op> argmax_op, const std::string &name,
     DurationCollector &&duration_collector, std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
-    std::chrono::milliseconds timeout, BufferPoolPtr buffer_pool, PipelineDirection pipeline_direction) :
-    FilterElement(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction, buffer_pool, timeout),
+    std::chrono::milliseconds timeout, BufferPoolPtr buffer_pool, PipelineDirection pipeline_direction,
+    std::shared_ptr<AsyncPipeline> async_pipeline) :
+    FilterElement(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction, buffer_pool, timeout, async_pipeline),
     m_argmax_op(argmax_op)
 {}
 
@@ -886,6 +1003,10 @@ Expected<PipelineBuffer> ArgmaxPostProcessElement::action(PipelineBuffer &&input
     if (HAILO_SHUTDOWN_EVENT_SIGNALED == buffer.status()) {
         return make_unexpected(buffer.status());
     }
+
+    if (!buffer) {
+        input.get_exec_done_cb()(buffer.status());
+    }
     CHECK_EXPECTED(buffer, "{} (D2H) failed with status={}", name(), buffer.status());
 
     std::map<std::string, MemoryView> inputs;
@@ -896,16 +1017,22 @@ Expected<PipelineBuffer> ArgmaxPostProcessElement::action(PipelineBuffer &&input
     outputs.insert({output_name, buffer->as_view()});
     m_duration_collector.start_measurement();
     auto post_process_result = m_argmax_op->execute(inputs, outputs);
-    CHECK_SUCCESS_AS_EXPECTED(post_process_result);
     m_duration_collector.complete_measurement();
 
+    auto exec_done_cb = input.get_exec_done_cb();
+    exec_done_cb(post_process_result);
+    buffer->set_action_status(post_process_result);
+
+    CHECK_SUCCESS_AS_EXPECTED(post_process_result);
+
     return buffer.release();
 }
 
 Expected<std::shared_ptr<SoftmaxPostProcessElement>> SoftmaxPostProcessElement::create(std::shared_ptr<net_flow::Op> softmax_op,
     const std::string &name, hailo_pipeline_elem_stats_flags_t elem_flags,
     std::shared_ptr<std::atomic<hailo_status>> pipeline_status, size_t buffer_pool_size, std::chrono::milliseconds timeout,
-    hailo_vstream_stats_flags_t vstream_flags, EventPtr shutdown_event, PipelineDirection pipeline_direction, bool is_last_copy_element)
+    hailo_vstream_stats_flags_t vstream_flags, EventPtr shutdown_event, PipelineDirection pipeline_direction, bool is_last_copy_element,
+    std::shared_ptr<AsyncPipeline> async_pipeline)
 {
     auto out_metadata = softmax_op->outputs_metadata().begin()->second;
     auto buffer_size = HailoRTCommon::get_frame_size(out_metadata.shape, out_metadata.format);
@@ -915,23 +1042,24 @@ Expected<std::shared_ptr<SoftmaxPostProcessElement>> SoftmaxPostProcessElement::
     auto duration_collector = DurationCollector::create(elem_flags);
     CHECK_EXPECTED(duration_collector);
     auto softmax_elem_ptr = make_shared_nothrow<SoftmaxPostProcessElement>(softmax_op,
-        name, duration_collector.release(), std::move(pipeline_status), timeout, buffer_pool.release(), pipeline_direction);
+        name, duration_collector.release(), std::move(pipeline_status), timeout, buffer_pool.release(), pipeline_direction, async_pipeline);
     CHECK_AS_EXPECTED(nullptr != softmax_elem_ptr, HAILO_OUT_OF_HOST_MEMORY);
     LOGGER__INFO("Created {}", softmax_elem_ptr->name());
     return softmax_elem_ptr;
 }
 
 Expected<std::shared_ptr<SoftmaxPostProcessElement>> SoftmaxPostProcessElement::create(std::shared_ptr<net_flow::Op> softmax_op,
-    const std::string &name, const ElementBuildParams &build_params, PipelineDirection pipeline_direction, bool is_last_copy_element)
+    const std::string &name, const ElementBuildParams &build_params, PipelineDirection pipeline_direction, bool is_last_copy_element,
+    std::shared_ptr<AsyncPipeline> async_pipeline)
 {
-    return SoftmaxPostProcessElement::create(softmax_op, name, build_params.elem_stats_flags, build_params.pipeline_status, build_params.buffer_pool_size,
-        build_params.timeout, build_params.vstream_stats_flags, build_params.shutdown_event, pipeline_direction, is_last_copy_element);
+    return SoftmaxPostProcessElement::create(softmax_op, name, build_params.elem_stats_flags, build_params.pipeline_status, build_params.buffer_pool_size_edges,
+        build_params.timeout, build_params.vstream_stats_flags, build_params.shutdown_event, pipeline_direction, is_last_copy_element, async_pipeline);
 }
 
 SoftmaxPostProcessElement::SoftmaxPostProcessElement(std::shared_ptr<net_flow::Op> softmax_op, const std::string &name,
     DurationCollector &&duration_collector, std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
-    std::chrono::milliseconds timeout, BufferPoolPtr buffer_pool, PipelineDirection pipeline_direction) :
-    FilterElement(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction, buffer_pool, timeout),
+    std::chrono::milliseconds timeout, BufferPoolPtr buffer_pool, PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline) :
+    FilterElement(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction, buffer_pool, timeout, async_pipeline),
     m_softmax_op(softmax_op)
 {}
 
@@ -970,6 +1098,10 @@ Expected<PipelineBuffer> SoftmaxPostProcessElement::action(PipelineBuffer &&inpu
     if (HAILO_SHUTDOWN_EVENT_SIGNALED == buffer.status()) {
         return make_unexpected(buffer.status());
     }
+
+    if (!buffer) {
+        input.get_exec_done_cb()(buffer.status());
+    }
     CHECK_EXPECTED(buffer, "{} (D2H) failed with status={}", name(), buffer.status());
 
     std::map<std::string, MemoryView> inputs;
@@ -980,9 +1112,14 @@ Expected<PipelineBuffer> SoftmaxPostProcessElement::action(PipelineBuffer &&inpu
     outputs.insert({output_name, buffer->as_view()});
     m_duration_collector.start_measurement();
     auto post_process_result = m_softmax_op->execute(inputs, outputs);
-    CHECK_SUCCESS_AS_EXPECTED(post_process_result);
     m_duration_collector.complete_measurement();
 
+    auto exec_done_cb = input.get_exec_done_cb();
+    exec_done_cb(post_process_result);
+    buffer->set_action_status(post_process_result);
+
+    CHECK_SUCCESS_AS_EXPECTED(post_process_result);
+
     return buffer.release();
 }
 
@@ -1227,7 +1364,7 @@ hailo_status InputVStream::write(const MemoryView &buffer)
 
 hailo_status InputVStream::write(const hailo_pix_buffer_t &buffer)
 {
-    // If only one plane is passed, address it s memview
+    // If only one plane is passed, address it as memview
     if (1 == buffer.number_of_planes) {
         return write(MemoryView(buffer.planes[0].user_ptr, buffer.planes[0].bytes_used));
     }
@@ -1751,6 +1888,10 @@ hailo_status InputVStreamImpl::flush()
 {
     assert(1 == m_entry_element->sinks().size());
     auto status =  m_entry_element->sinks()[0].run_push(PipelineBuffer(PipelineBuffer::Type::FLUSH));
+    if (HAILO_STREAM_ABORTED_BY_USER == status) {
+        LOGGER__INFO("Sending to VStream was aborted!");
+        return HAILO_STREAM_ABORTED_BY_USER;
+    }
     CHECK_SUCCESS(status);
 
     status = m_entry_element->flush();
@@ -1987,7 +2128,11 @@ OutputVStreamInternal::OutputVStreamInternal(const hailo_vstream_info_t &vstream
                                              AccumulatorPtr pipeline_latency_accumulator,
                                              EventPtr core_op_activated_event, hailo_status &output_status) :
     BaseVStream(vstream_info, quant_infos, vstream_params, pipeline_entry, std::move(pipeline), std::move(pipeline_status),
-                shutdown_event, pipeline_latency_accumulator, std::move(core_op_activated_event), output_status){}
+                shutdown_event, pipeline_latency_accumulator, std::move(core_op_activated_event), output_status)
+{
+    // Reversing the order of pipeline-elements, for the destruction flow to work in the right order (from user-side to hw-side)
+    std::reverse(m_pipeline.begin(), m_pipeline.end());
+}
 
 Expected<std::shared_ptr<OutputVStreamImpl>> OutputVStreamImpl::create(const hailo_vstream_info_t &vstream_info,
     const std::vector<hailo_quant_info_t> &quant_infos, const hailo_vstream_params_t &vstream_params,
@@ -2039,19 +2184,6 @@ OutputVStreamImpl::OutputVStreamImpl(const hailo_vstream_info_t &vstream_info, c
         return;
     }
 
-    for (auto &element : m_pipeline) {
-        element->set_on_cant_pull_callback([this] () {
-            if (m_cant_read_callback) {
-                m_cant_read_callback();
-            }
-        });
-        element->set_on_can_pull_callback([this] () {
-            if (m_can_read_callback) {
-                m_can_read_callback();
-            }
-        });
-    }
-
     LOGGER__INFO("Creating {}...", name());
 }
 
@@ -2087,57 +2219,51 @@ hailo_status OutputVStreamImpl::read(MemoryView buffer)
     return status;
 }
 
-Expected<std::shared_ptr<net_flow::NmsOpMetadata>> OutputVStreamImpl::get_nms_metadata_from_pipeline() const
+hailo_status OutputVStreamImpl::set_nms_score_threshold(float32_t threshold)
 {
-    CHECK_AS_EXPECTED(HailoRTCommon::is_nms(m_vstream_info), HAILO_INVALID_OPERATION,
-        "Output vstream '{}' is not NMS, there is no NMS op", name());
-
+    auto status = HAILO_INVALID_OPERATION; // Assuming there is no valid element
     for (auto &elem : m_pipeline) {
-        if (auto nms_pp_elem = std::dynamic_pointer_cast<NmsPostProcessMuxElement>(elem)) {
-            // Assuming we have only 1 nms PP on the pipeline
-            auto nms_metadata = std::dynamic_pointer_cast<net_flow::NmsOpMetadata>(nms_pp_elem->get_op()->metadata());
-            CHECK_NOT_NULL_AS_EXPECTED(nms_metadata, HAILO_INVALID_OPERATION);
-            return nms_metadata;
+        auto elem_status = elem->set_nms_score_threshold(threshold);
+        if (HAILO_SUCCESS == elem_status) {
+            status = elem_status; // 1 element is enough to call this setter successful
         }
     }
-    LOGGER__ERROR("There is no NmsPostProcess in the '{}' pipeline. Unable to get nms op", name());
-    return make_unexpected(HAILO_INVALID_OPERATION);
-}
+    CHECK_SUCCESS(status, "Unable to set NMS score threshold in {}", name());
 
-hailo_status OutputVStreamImpl::set_nms_score_threshold(float32_t threshold)
-{
-    auto nms_metadata_expected = get_nms_metadata_from_pipeline();
-    CHECK_EXPECTED_AS_STATUS(nms_metadata_expected, "Unable to set nms score threshold in {}", name());
-    auto nms_metadata = nms_metadata_expected.release();
-
-    nms_metadata->nms_config().nms_score_th = threshold;
     return HAILO_SUCCESS;
 }
 
 hailo_status OutputVStreamImpl::set_nms_iou_threshold(float32_t threshold)
 {
-    auto nms_metadata_expected = get_nms_metadata_from_pipeline();
-    CHECK_EXPECTED_AS_STATUS(nms_metadata_expected, "Unable to set nms IoU threshold in {}", name());
-    auto nms_metadata = nms_metadata_expected.release();
+    auto status = HAILO_INVALID_OPERATION; // Assuming there is no valid element
+    for (auto &elem : m_pipeline) {
+        auto elem_status = elem->set_nms_iou_threshold(threshold);
+        if (HAILO_SUCCESS == elem_status) {
+            status = elem_status; // 1 element is enough to call this setter successful
+        }
+    }
+    CHECK_SUCCESS(status, "Unable to set NMS IoU threshold in {}", name());
 
-    nms_metadata->nms_config().nms_iou_th = threshold;
     return HAILO_SUCCESS;
 }
 
 hailo_status OutputVStreamImpl::set_nms_max_proposals_per_class(uint32_t max_proposals_per_class)
 {
-    auto nms_metadata_expected = get_nms_metadata_from_pipeline();
-    CHECK_EXPECTED_AS_STATUS(nms_metadata_expected, "Unable to set nms max proposals per class in {}", name());
-    auto nms_metadata = nms_metadata_expected.release();
+    auto status = HAILO_INVALID_OPERATION; // Assuming there is no valid element
+    for (auto &elem : m_pipeline) {
+        auto elem_status = elem->set_nms_max_proposals_per_class(max_proposals_per_class);
+        if (HAILO_SUCCESS == elem_status) {
+            status = elem_status; // 1 element is enough to call this setter successful
+        }
+    }
+    CHECK_SUCCESS(status, "Unable to set NMS max proposals per class in {}", name());
 
-    nms_metadata->nms_config().max_proposals_per_class = max_proposals_per_class;
     // Update vstream info
     m_vstream_info.nms_shape.max_bboxes_per_class = max_proposals_per_class;
 
     return HAILO_SUCCESS;
 }
 
-
 #ifdef HAILO_SUPPORT_MULTI_PROCESS
 Expected<std::shared_ptr<OutputVStreamClient>> OutputVStreamClient::create(const VStreamIdentifier &&identifier)
 {
@@ -2344,7 +2470,7 @@ hailo_status OutputVStreamClient::set_nms_max_proposals_per_class(uint32_t max_p
 
 #endif // HAILO_SUPPORT_MULTI_PROCESS
 
-Expected<std::shared_ptr<HwReadElement>> HwReadElement::create(std::shared_ptr<OutputStream> stream, const std::string &name, std::chrono::milliseconds timeout,
+Expected<std::shared_ptr<HwReadElement>> HwReadElement::create(std::shared_ptr<OutputStreamBase> stream, const std::string &name, std::chrono::milliseconds timeout,
     size_t buffer_pool_size, hailo_pipeline_elem_stats_flags_t elem_flags, hailo_vstream_stats_flags_t vstream_flags, EventPtr shutdown_event,
     std::shared_ptr<std::atomic<hailo_status>> pipeline_status, PipelineDirection pipeline_direction)
 {
@@ -2352,7 +2478,7 @@ Expected<std::shared_ptr<HwReadElement>> HwReadElement::create(std::shared_ptr<O
     CHECK_EXPECTED(buffer_pool, "Failed creating BufferPool for {}", name);
 
     // On HwReadElement the stream always owns the buffer, hence, we set the mode explicitly.
-    auto status = dynamic_cast<OutputStreamBase&>(*stream).set_buffer_mode(StreamBufferMode::OWNING);
+    auto status = stream->set_buffer_mode(StreamBufferMode::OWNING);
     CHECK_SUCCESS_AS_EXPECTED(status);
 
     auto duration_collector = DurationCollector::create(elem_flags);
@@ -2367,11 +2493,11 @@ Expected<std::shared_ptr<HwReadElement>> HwReadElement::create(std::shared_ptr<O
     return hw_read_elem_ptr;
 }
 
-HwReadElement::HwReadElement(std::shared_ptr<OutputStream> stream, BufferPoolPtr buffer_pool, const std::string &name,
+HwReadElement::HwReadElement(std::shared_ptr<OutputStreamBase> stream, BufferPoolPtr buffer_pool, const std::string &name,
                              std::chrono::milliseconds timeout, DurationCollector &&duration_collector,
                              EventPtr shutdown_event, std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
                              PipelineDirection pipeline_direction) :
-    SourceElement(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction),
+    SourceElement(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction, nullptr),
     m_stream(stream),
     m_pool(buffer_pool),
     m_timeout(timeout),
@@ -2395,7 +2521,7 @@ std::string HwReadElement::description() const
 hailo_status HwReadElement::execute_post_deactivate(bool should_clear_abort)
 {
     if (should_clear_abort) {
-        auto status = m_stream->clear_abort();
+        auto status = execute_clear_abort();
         CHECK(((HAILO_SUCCESS == status) || (HAILO_STREAM_NOT_ACTIVATED == status)), status,
             "Failed to clear abort stream in {}", name());
     }
@@ -2414,18 +2540,12 @@ hailo_status HwReadElement::execute_flush()
 
 hailo_status HwReadElement::execute_abort()
 {
-    auto status = m_stream->abort();
-    CHECK(((status == HAILO_SUCCESS) || (status == HAILO_STREAM_NOT_ACTIVATED)), status,
-        "Failed to execute abort stream in {}", name());
-    return HAILO_SUCCESS;
+    return m_stream->abort_impl();
 }
 
 hailo_status HwReadElement::execute_clear_abort()
 {
-    auto status = m_stream->clear_abort();
-    CHECK(((status == HAILO_SUCCESS) || (status == HAILO_STREAM_NOT_ACTIVATED)), status,
-        "Failed to execute clear_abort stream in {}", name());
-    return HAILO_SUCCESS;
+    return m_stream->clear_abort_impl();
 }
 
 hailo_status HwReadElement::execute_wait_for_finish()
@@ -2511,7 +2631,7 @@ hailo_status HwReadElement::execute_deactivate()
         LOGGER__ERROR("Signaling {} shutdown event failed with {}", name(), signal_shutdown_status);
     }
 
-    auto abort_status = m_stream->abort();
+    auto abort_status = execute_abort();
     if ((HAILO_SUCCESS != abort_status) && (HAILO_STREAM_NOT_ACTIVATED != abort_status)) {
         LOGGER__ERROR("Abort {} failed with {}", name(), abort_status);
         return abort_status;
@@ -2520,7 +2640,7 @@ hailo_status HwReadElement::execute_deactivate()
     return signal_shutdown_status;
 }
 
-Expected<std::shared_ptr<HwWriteElement>> HwWriteElement::create(std::shared_ptr<InputStream> stream, const std::string &name,
+Expected<std::shared_ptr<HwWriteElement>> HwWriteElement::create(std::shared_ptr<InputStreamBase> stream, const std::string &name,
     hailo_pipeline_elem_stats_flags_t elem_flags, std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
     PipelineDirection pipeline_direction)
 {
@@ -2531,7 +2651,7 @@ Expected<std::shared_ptr<HwWriteElement>> HwWriteElement::create(std::shared_ptr
     CHECK_EXPECTED(got_flush_event);
 
     // On HwWriteElement the stream always owns the buffer, hence, we set the mode explicitly.
-    auto status = dynamic_cast<InputStreamBase&>(*stream).set_buffer_mode(StreamBufferMode::OWNING);
+    auto status = stream->set_buffer_mode(StreamBufferMode::OWNING);
     CHECK_SUCCESS_AS_EXPECTED(status);
 
     auto hw_write_elem_ptr = make_shared_nothrow<HwWriteElement>(stream, name,
@@ -2543,9 +2663,9 @@ Expected<std::shared_ptr<HwWriteElement>> HwWriteElement::create(std::shared_ptr
     return hw_write_elem_ptr;
 }
 
-HwWriteElement::HwWriteElement(std::shared_ptr<InputStream> stream, const std::string &name, DurationCollector &&duration_collector,
+HwWriteElement::HwWriteElement(std::shared_ptr<InputStreamBase> stream, const std::string &name, DurationCollector &&duration_collector,
                                std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, EventPtr got_flush_event, PipelineDirection pipeline_direction) :
-    SinkElement(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction),
+    SinkElement(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction, nullptr),
     m_stream(stream), m_got_flush_event(got_flush_event)
 {}
 
@@ -2607,7 +2727,7 @@ hailo_status HwWriteElement::execute_deactivate()
         LOGGER__ERROR("flush has failed in {} with status {}", name(), flush_status);
     }
 
-    auto abort_status = m_stream->abort();
+    auto abort_status = execute_abort();
     CHECK(((abort_status == HAILO_SUCCESS) || (abort_status == HAILO_STREAM_NOT_ACTIVATED)), abort_status,
         "Failed to abort stream in {}", name());
     return HAILO_SUCCESS;
@@ -2616,7 +2736,7 @@ hailo_status HwWriteElement::execute_deactivate()
 hailo_status HwWriteElement::execute_post_deactivate(bool should_clear_abort)
 {
     if (should_clear_abort) {
-        auto status = m_stream->clear_abort();
+        auto status = execute_clear_abort();
         CHECK(((status == HAILO_SUCCESS) || (status == HAILO_STREAM_NOT_ACTIVATED)), status,
             "Failed to clear abort stream in {}", name());
     }
@@ -2641,18 +2761,12 @@ hailo_status HwWriteElement::execute_flush()
 
 hailo_status HwWriteElement::execute_abort()
 {
-    auto status = m_stream->abort();
-    CHECK(((status == HAILO_SUCCESS) || (status == HAILO_STREAM_NOT_ACTIVATED)), status,
-        "Failed to execute abort stream in {}", name());
-    return HAILO_SUCCESS;
+    return m_stream->abort_impl();
 }
 
 hailo_status HwWriteElement::execute_clear_abort()
 {
-    auto status = m_stream->clear_abort();
-    CHECK(((status == HAILO_SUCCESS) || (status == HAILO_STREAM_NOT_ACTIVATED)), status,
-        "Failed to execute clear_abort stream in {}", name());
-    return HAILO_SUCCESS;
+    return m_stream->clear_abort_impl();
 }
 
 hailo_status HwWriteElement::execute_wait_for_finish()
@@ -2670,13 +2784,13 @@ std::string HwWriteElement::description() const
 
 Expected<std::shared_ptr<LastAsyncElement>> LastAsyncElement::create(const std::string &name,
     hailo_pipeline_elem_stats_flags_t elem_flags, std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
-    PipelineDirection pipeline_direction)
+    std::shared_ptr<AsyncPipeline> async_pipeline, PipelineDirection pipeline_direction)
 {
     auto duration_collector = DurationCollector::create(elem_flags);
     CHECK_EXPECTED(duration_collector);
 
     auto last_async_elem_ptr = make_shared_nothrow<LastAsyncElement>(name,
-        duration_collector.release(), std::move(pipeline_status), pipeline_direction);
+        duration_collector.release(), std::move(pipeline_status), pipeline_direction, async_pipeline);
     CHECK_NOT_NULL_AS_EXPECTED(last_async_elem_ptr, HAILO_OUT_OF_HOST_MEMORY);
 
     LOGGER__INFO("Created {}", last_async_elem_ptr->name());
@@ -2685,16 +2799,16 @@ Expected<std::shared_ptr<LastAsyncElement>> LastAsyncElement::create(const std::
 }
 
 Expected<std::shared_ptr<LastAsyncElement>> LastAsyncElement::create(const std::string &name,
-    const ElementBuildParams &build_params, PipelineDirection pipeline_direction)
+    const ElementBuildParams &build_params, std::shared_ptr<AsyncPipeline> async_pipeline, PipelineDirection pipeline_direction)
 {
     return LastAsyncElement::create(name, build_params.elem_stats_flags,
-        build_params.pipeline_status, pipeline_direction);
+        build_params.pipeline_status, async_pipeline, pipeline_direction);
 }
 
 LastAsyncElement::LastAsyncElement(const std::string &name, DurationCollector &&duration_collector,
                                std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
-                               PipelineDirection pipeline_direction):
-    SinkElement(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction)
+                               PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline):
+    SinkElement(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction, async_pipeline)
 {}
 
 Expected<PipelineBuffer> LastAsyncElement::run_pull(PipelineBuffer &&/*optional*/, const PipelinePad &/*source*/)
@@ -2710,8 +2824,7 @@ hailo_status LastAsyncElement::run_push(PipelineBuffer &&/*optional*/, const Pip
 void LastAsyncElement::run_push_async(PipelineBuffer &&buffer, const PipelinePad &/*sink*/)
 {
     auto exec_done_cb = buffer.get_exec_done_cb();
-    CompletionInfoAsyncInferInternal completion_info{buffer.action_status()};
-    exec_done_cb(completion_info);
+    exec_done_cb(buffer.action_status());
 }
 
 std::string LastAsyncElement::description() const
@@ -2738,32 +2851,55 @@ hailo_status LastAsyncElement::enqueue_execution_buffer(MemoryView mem_view, con
     return m_sinks[0].prev()->element().enqueue_execution_buffer(mem_view, exec_done, m_sinks[0].prev()->name());
 }
 
-Expected<bool> LastAsyncElement::are_buffer_pools_full()
+Expected<bool> LastAsyncElement::can_push_buffer_upstream(const uint32_t /*source_index*/)
+{
+    auto source_index = m_sinks[0].prev()->element().get_source_index_from_source_name(m_sinks[0].prev()->name());
+    CHECK_EXPECTED(source_index);
+    return m_sinks[0].prev()->element().can_push_buffer_upstream(*source_index);
+}
+
+hailo_status LastAsyncElement::fill_buffer_pool(bool is_dma_able, size_t num_of_buffers, const uint32_t /*source_index*/)
+{
+    auto source_index = m_sinks[0].prev()->element().get_source_index_from_source_name(m_sinks[0].prev()->name());
+    CHECK_EXPECTED_AS_STATUS(source_index);
+    return m_sinks[0].prev()->element().fill_buffer_pool(is_dma_able, num_of_buffers, *source_index);
+}
+
+Expected<bool> LastAsyncElement::can_push_buffer_upstream(const std::string &/*source_name*/)
 {
-    return m_sinks[0].prev()->element().are_buffer_pools_full();
+    return m_sinks[0].prev()->element().can_push_buffer_upstream(m_sinks[0].prev()->name());
 }
 
-hailo_status LastAsyncElement::fill_buffer_pools(bool is_dma_able) {
-    return m_sinks[0].prev()->element().fill_buffer_pools(is_dma_able);
+hailo_status LastAsyncElement::fill_buffer_pool(bool is_dma_able, size_t num_of_buffers, const std::string &/*source_name*/)
+{
+    return m_sinks[0].prev()->element().fill_buffer_pool(is_dma_able, num_of_buffers, m_sinks[0].prev()->name());
 }
 
-Expected<std::shared_ptr<AsyncHwElement>> AsyncHwElement::create(const std::vector<std::shared_ptr<InputStream>> &input_streams,
-    const std::vector<std::shared_ptr<OutputStream>> &output_streams, std::chrono::milliseconds timeout, size_t buffer_pool_size,
-    hailo_pipeline_elem_stats_flags_t elem_flags, hailo_vstream_stats_flags_t vstream_flags, EventPtr shutdown_event, const std::string &name,
-    std::shared_ptr<std::atomic<hailo_status>> pipeline_status, PipelineDirection pipeline_direction, bool is_last_copy_element)
+Expected<std::shared_ptr<AsyncHwElement>> AsyncHwElement::create(const std::unordered_map<std::string, hailo_stream_info_t> &named_stream_infos,
+    std::chrono::milliseconds timeout, size_t buffer_pool_size, hailo_pipeline_elem_stats_flags_t elem_flags,
+    hailo_vstream_stats_flags_t vstream_flags, EventPtr shutdown_event, const std::string &name,
+    std::shared_ptr<std::atomic<hailo_status>> pipeline_status, std::shared_ptr<ConfiguredNetworkGroup> net_group,
+    PipelineDirection pipeline_direction, bool is_last_copy_element, std::shared_ptr<AsyncPipeline> async_pipeline)
 {
-    std::unordered_map<std::string, BufferPoolPtr> output_streams_pools;
-    for (const auto &output_stream : output_streams) {
-        auto buffer_pool = BufferPool::create(output_stream->get_frame_size(), buffer_pool_size, shutdown_event, elem_flags, vstream_flags, is_last_copy_element);
-        CHECK_EXPECTED(buffer_pool);
-        output_streams_pools[output_stream->name()] = buffer_pool.release();
+    std::vector<BufferPoolPtr> output_streams_pools;
+    for (const auto &stream_info_pair : named_stream_infos) {
+        if (HAILO_D2H_STREAM == stream_info_pair.second.direction) {
+            auto buffer_pool = BufferPool::create(stream_info_pair.second.hw_frame_size, buffer_pool_size, shutdown_event, elem_flags, vstream_flags,
+                is_last_copy_element);
+            CHECK_EXPECTED(buffer_pool);
+            output_streams_pools.emplace_back(buffer_pool.release());
+        }
     }
 
     auto duration_collector = DurationCollector::create(elem_flags);
     CHECK_EXPECTED(duration_collector);
 
-    auto elem_ptr = make_shared_nothrow<AsyncHwElement>(input_streams, output_streams, timeout, std::move(output_streams_pools), name,
-        duration_collector.release(), std::move(pipeline_status), pipeline_direction);
+    auto min_buffer_pool_size = net_group->get_min_buffer_pool_size();
+    CHECK_EXPECTED(min_buffer_pool_size);
+
+    auto elem_ptr = make_shared_nothrow<AsyncHwElement>(named_stream_infos, timeout, std::move(output_streams_pools), name,
+        duration_collector.release(), std::move(pipeline_status), pipeline_direction, async_pipeline, net_group,
+        min_buffer_pool_size.release());
     CHECK_AS_EXPECTED(nullptr != elem_ptr, HAILO_OUT_OF_HOST_MEMORY);
 
     LOGGER__INFO("Created {}", elem_ptr->name());
@@ -2771,32 +2907,33 @@ Expected<std::shared_ptr<AsyncHwElement>> AsyncHwElement::create(const std::vect
     return elem_ptr;
 }
 
-AsyncHwElement::AsyncHwElement(const std::vector<std::shared_ptr<InputStream>> &input_streams, const std::vector<std::shared_ptr<OutputStream>> &output_streams,
-                               std::chrono::milliseconds timeout, std::unordered_map<std::string, BufferPoolPtr> &&output_streams_pools, const std::string &name,
-                               DurationCollector &&duration_collector, std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
-                               PipelineDirection pipeline_direction) :
-    PipelineElement(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction),
-    m_timeout(timeout),
-    m_output_streams_pools(std::move(output_streams_pools))
-{
-    m_sinks.reserve(input_streams.size());
-    m_sink_has_arrived.reserve(input_streams.size());
-    uint32_t i = 0;
-    for (auto &input : input_streams) {
-        m_sinks.emplace_back(*this, name, PipelinePad::Type::SINK);
-        const auto &sink_name = m_sinks[i++].name();
-        m_sink_name_to_input[sink_name] = input;
-        m_sink_name_to_index[sink_name] = static_cast<uint32_t>(m_sinks.size() - 1);
-        m_sink_has_arrived[sink_name] = false;
-    }
-
-    m_sources.reserve(output_streams.size());
-    i = 0;
-    for (auto &output : output_streams) {
-        m_sources.emplace_back(*this, name, PipelinePad::Type::SOURCE);
-        const auto &source_name = m_sources[i++].name();
-        m_source_name_to_output[source_name] = output;
-        m_source_name_to_index[source_name] = static_cast<uint32_t>(m_sources.size() - 1);
+AsyncHwElement::AsyncHwElement(const std::unordered_map<std::string, hailo_stream_info_t> &named_stream_infos, std::chrono::milliseconds timeout,
+    std::vector<BufferPoolPtr> &&output_streams_pools, const std::string &name, DurationCollector &&duration_collector,
+    std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, PipelineDirection pipeline_direction,
+    std::shared_ptr<AsyncPipeline> async_pipeline, std::shared_ptr<ConfiguredNetworkGroup> net_group,
+    const size_t max_ongoing_transfers) :
+        PipelineElementInternal(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction, async_pipeline),
+        m_timeout(timeout),
+        m_pools(std::move(output_streams_pools)),
+        m_net_group(net_group),
+        m_max_ongoing_transfers(max_ongoing_transfers)
+{
+    uint32_t sinks_count = 0;
+    uint32_t sources_count = 0;
+    for (const auto &stream_info_pair : named_stream_infos) {
+        if (HAILO_D2H_STREAM == stream_info_pair.second.direction) {
+            m_sources.emplace_back(*this, name, PipelinePad::Type::SOURCE);
+            const auto &source_name = m_sources[sources_count++].name();
+            m_source_name_to_stream_name[source_name] = stream_info_pair.first;
+
+            m_source_name_to_index[source_name] = static_cast<uint32_t>(m_sources.size() - 1);
+        } else {
+            m_sinks.emplace_back(*this, name, PipelinePad::Type::SINK);
+            const auto &sink_name = m_sinks[sinks_count++].name();
+            m_sink_name_to_stream_name[sink_name] = stream_info_pair.first;
+            m_sink_name_to_index[sink_name] = static_cast<uint32_t>(m_sinks.size() - 1);
+            m_sink_has_arrived[sink_name] = false;
+        }
     }
 }
 
@@ -2814,15 +2951,16 @@ bool AsyncHwElement::has_all_sinks_arrived()
 // (normally, the run_push_async of the next elements will be called by the LL async read_done)
 void AsyncHwElement::handle_error_in_hw_async_elem(hailo_status error_status)
 {
-    for (auto &name_output_stream_pair : m_source_name_to_output) {
-        auto source_id = get_source_index_from_output_stream_name(name_output_stream_pair.second->name());
-        auto expected_buffer = m_output_streams_pools[name_output_stream_pair.second->name()]->acquire_buffer_ptr(m_timeout);
-
+    for (auto &name_output_stream_pair : m_source_name_to_index) {
+        auto source_index = name_output_stream_pair.second;
+        assert(source_index < m_pools.size());
+        assert(source_index < m_sources.size());
+        auto expected_buffer = m_pools[source_index]->acquire_buffer_ptr(m_timeout);
         if (HAILO_SUCCESS == expected_buffer.status()) {
             expected_buffer->get()->set_action_status(error_status);
-            m_sources[m_source_name_to_index[name_output_stream_pair.first]].next()->run_push_async(std::move(*expected_buffer.value()));
+            m_sources[source_index].next()->run_push_async(std::move(*expected_buffer.value()));
         } else {
-            m_sources[m_source_name_to_index[name_output_stream_pair.first]].next()->run_push_async(PipelineBuffer(error_status));
+            m_sources[source_index].next()->run_push_async(PipelineBuffer(error_status));
         }
     }
 
@@ -2836,53 +2974,99 @@ void AsyncHwElement::handle_error_in_hw_async_elem(hailo_status error_status)
 
 void AsyncHwElement::run_push_async(PipelineBuffer &&buffer, const PipelinePad &sink)
 {
-    assert(contains(m_sink_name_to_input, sink.name()));
+    assert(contains(m_sink_name_to_stream_name, sink.name()));
 
     std::unique_lock<std::mutex> lock(m_mutex);
     m_sink_has_arrived[sink.name()] = true;
     m_input_buffers[sink.name()] = std::move(buffer);
 
     if (has_all_sinks_arrived()) {
+        hailo_status all_buffers_status = HAILO_SUCCESS;
         for (auto &input_buffer : m_input_buffers) {
             if (HAILO_SUCCESS != input_buffer.second.action_status()) {
-                handle_error_in_hw_async_elem(input_buffer.second.action_status());
-
-                // Manual unlocking is done before notifying, to avoid waking up the waiting thread only to block again
-                lock.unlock();
-                m_cv.notify_all();
+                all_buffers_status = input_buffer.second.action_status();
+                break;  // error from one buffer is enough
             }
-            auto input_stream = m_sink_name_to_input[input_buffer.first];
+        }
 
-            InputStream::TransferDoneCallback write_done = [exec_done_cb = input_buffer.second.get_exec_done_cb()] (const InputStream::CompletionInfo &completion_info) {
-                if (HAILO_SUCCESS != completion_info.status) {
-                    LOGGER__ERROR("Got an unexpected status on callback. status={}", completion_info.status);
+        if (HAILO_SUCCESS != all_buffers_status) {
+            handle_error_in_hw_async_elem(all_buffers_status);
+            // Manual unlocking is done before notifying, to avoid waking up the waiting thread only to block again
+            lock.unlock();
+            m_cv.notify_all();
+        } else {
+            std::unordered_map<std::string, std::shared_ptr<PipelineBuffer>> source_name_to_output_buffer;
+            for (auto &name_to_index_pair : m_source_name_to_index) {
+                auto expected_buffer = m_pools[name_to_index_pair.second]->acquire_buffer_ptr(m_timeout);
+                if (HAILO_SUCCESS != expected_buffer.status()) {
+                    handle_non_recoverable_async_error(expected_buffer.status());
+                    m_input_buffers.clear();
+                    // Manual unlocking is done before notifying, to avoid waking up the waiting thread only to block again
+                    lock.unlock();
+                    m_cv.notify_all();
+                    return;
                 }
-                CompletionInfoAsyncInferInternal completion_info_async_infer{completion_info.status};
-                exec_done_cb(completion_info_async_infer);
-            };
+                source_name_to_output_buffer[name_to_index_pair.first] = expected_buffer.release();
+            }
+
+            NamedBuffersCallbacks named_buffers_callbacks;
+
+            for (auto &input_buffer : m_input_buffers) {
+                const auto &stream_name = m_sink_name_to_stream_name.at(input_buffer.first);
+                named_buffers_callbacks.emplace(stream_name, std::make_pair(input_buffer.second.as_view(), input_buffer.second.get_exec_done_cb()));
+            }
 
-            auto status = input_stream->write_async(input_buffer.second.data(), input_buffer.second.size(), write_done);
-            if (HAILO_SUCCESS != status) {
+            for (auto &output_buffer : source_name_to_output_buffer) {
+                const auto &stream_name = m_source_name_to_stream_name.at(output_buffer.first);
+                named_buffers_callbacks.emplace(stream_name, std::make_pair(output_buffer.second->as_view(),
+                    [this, buffer = output_buffer.second, source_name = output_buffer.first](hailo_status status){
+                        buffer->set_action_status(status);
+                        if (HAILO_SUCCESS == m_pipeline_status->load()) {
+                            assert(contains(m_source_name_to_index, source_name));
+                            // If pipeline_status is not success, someone already handled this error and no reason for this buffer to be pushed
+                            assert(contains(m_source_name_to_index, source_name));
+                            m_sources[m_source_name_to_index[source_name]].next()->run_push_async(std::move(*buffer));
+                        }
+                }));
+            }
+
+            auto done_cb = [](hailo_status){};
+            auto status = m_net_group->wait_for_callbacks_to_maintain_below_threshold(m_max_ongoing_transfers);
+            if (HAILO_SUCCESS != status ) {
                 handle_non_recoverable_async_error(status);
             }
-        }
 
-        read_async_on_all_streams();
+            status = m_net_group->infer_async(named_buffers_callbacks, done_cb);
+            if (HAILO_SUCCESS != status ) {
+                handle_non_recoverable_async_error(status);
+            }
 
-        for (const auto &curr_sink : m_sinks) {
-            m_sink_has_arrived[curr_sink.name()] = false;
-        }
-        m_input_buffers.clear();
+            for (const auto &curr_sink : m_sinks) {
+                m_sink_has_arrived[curr_sink.name()] = false;
+            }
+            m_input_buffers.clear();
 
-        // Manual unlocking is done before notifying, to avoid waking up the waiting thread only to block again
-        lock.unlock();
-        m_cv.notify_all();
+            // Manual unlocking is done before notifying, to avoid waking up the waiting thread only to block again
+            lock.unlock();
+            m_cv.notify_all();
+        }
     } else {
-        auto cv_status = m_cv.wait_for(lock, m_timeout);
-        if (std::cv_status::timeout == cv_status) {
+        bool done = m_cv.wait_for(lock, m_timeout, [&](){
+            if (m_pipeline_status->load() != HAILO_SUCCESS) {
+                return true; // so we can exit this flow
+            }
+            return !m_sink_has_arrived[sink.name()];
+        });
+
+        if (!done) {
             LOGGER__ERROR("Waiting for other threads in AsyncHwElement {} has reached a timeout (timeout={}ms)", name(), m_timeout.count());
             handle_non_recoverable_async_error(HAILO_TIMEOUT);
         }
+
+        if (m_pipeline_status->load() == HAILO_STREAM_ABORTED_BY_USER) {
+            lock.unlock();
+            m_cv.notify_all();
+        }
     }
 }
 
@@ -2891,76 +3075,78 @@ hailo_status AsyncHwElement::run_push(PipelineBuffer &&/*optional*/, const Pipel
     return HAILO_INVALID_OPERATION;
 }
 
-void AsyncHwElement::read_async_on_all_streams()
+hailo_status AsyncHwElement::enqueue_execution_buffer(MemoryView mem_view, const TransferDoneCallbackAsyncInfer &exec_done, const std::string &source_name)
 {
-    std::unordered_map<std::string, std::shared_ptr<PipelineBuffer>> name_to_buffer_map;
-    for (auto &name_output_stream_pair : m_source_name_to_output) {
-        auto expected_buffer = m_output_streams_pools[name_output_stream_pair.second->name()]->acquire_buffer_ptr(m_timeout);
-        if (HAILO_SUCCESS != expected_buffer.status()) {
-            handle_non_recoverable_async_error(expected_buffer.status());
-            return;
-        }
-        name_to_buffer_map[name_output_stream_pair.first] = expected_buffer.release();
-    }
+    CHECK(contains(m_source_name_to_index, source_name), HAILO_INTERNAL_FAILURE);
+    auto source_index = m_source_name_to_index[source_name];
 
-    for (auto &name_output_stream_pair : m_source_name_to_output) {
-        auto mem_view = name_to_buffer_map[name_output_stream_pair.first]->as_view();
-        OutputStream::TransferDoneCallback read_done = [this, source_name = name_output_stream_pair.first, buffer = name_to_buffer_map[name_output_stream_pair.first]] (const OutputStream::CompletionInfo &completion_info) {
-            buffer->set_action_status(completion_info.status);
-            m_sources[m_source_name_to_index[source_name]].next()->run_push_async(std::move(*buffer));
-        };
-        auto status = name_output_stream_pair.second->read_async(mem_view, read_done);
-        if (HAILO_SUCCESS != status) {
-            handle_non_recoverable_async_error(status);
-            return;
-        }
+    auto status = m_pools[source_index]->enqueue_buffer(mem_view, exec_done);
+    CHECK_SUCCESS(status);
+
+    return HAILO_SUCCESS;
+}
+
+hailo_status AsyncHwElement::execute_dequeue_user_buffers(hailo_status error_status)
+{
+    for (auto pool : m_pools) {
+        auto status = empty_buffer_pool(pool, error_status, m_timeout);
+        CHECK_SUCCESS(status);
     }
+    return PipelineElement::execute_dequeue_user_buffers(error_status);
 }
 
-hailo_status AsyncHwElement::enqueue_execution_buffer(MemoryView mem_view, const TransferDoneCallbackAsyncInfer &exec_done, const std::string &source_name)
+Expected<bool> AsyncHwElement::can_push_buffer_upstream(const uint32_t source_index)
 {
-    CHECK(contains(m_source_name_to_output, source_name), HAILO_INTERNAL_FAILURE);
+    CHECK_AS_EXPECTED(source_index < m_pools.size(), HAILO_NOT_FOUND);
+    return !m_pools[source_index]->is_full();
+}
 
-    auto status = m_output_streams_pools[m_source_name_to_output[source_name]->name()]->enqueue_buffer(mem_view, exec_done);
-    CHECK_SUCCESS(status);
+hailo_status AsyncHwElement::fill_buffer_pool(bool is_dma_able, size_t num_of_buffers, const uint32_t source_index)
+{
+    CHECK(source_index < m_pools.size(), HAILO_NOT_FOUND);
+    CHECK_SUCCESS(m_pools[source_index]->allocate_buffers(is_dma_able, num_of_buffers));
 
     return HAILO_SUCCESS;
 }
 
-Expected<bool> AsyncHwElement::are_buffer_pools_full()
+Expected<bool> AsyncHwElement::can_push_buffer_upstream(const std::string &source_name)
 {
-    for (const auto &output_streams_pool : m_output_streams_pools) {
-        if (output_streams_pool.second->is_full()) {
-            return true;
-        }
-    }
-    return false;
+    auto source_index = get_source_index_from_source_name(source_name);
+    CHECK_EXPECTED(source_index);
+    return can_push_buffer_upstream(*source_index);
 }
 
-hailo_status AsyncHwElement::fill_buffer_pools(bool is_dma_able) {
-    for (auto &pool : m_output_streams_pools) {
-        auto status = pool.second->allocate_buffers(is_dma_able);
-        CHECK_SUCCESS(status);
-    }
-    return HAILO_SUCCESS;
+hailo_status AsyncHwElement::fill_buffer_pool(bool is_dma_able, size_t num_of_buffers, const std::string &source_name)
+{
+    auto source_index = get_source_index_from_source_name(source_name);
+    CHECK_EXPECTED_AS_STATUS(source_index);
+    return fill_buffer_pool(is_dma_able, num_of_buffers, *source_index);
 }
 
 Expected<uint32_t> AsyncHwElement::get_source_index_from_output_stream_name(const std::string &output_stream_name)
 {
-    for (auto &name_output_stream_pair : m_source_name_to_output) {
-        if (name_output_stream_pair.second->name() == output_stream_name) {
-            uint32_t ret_val = m_source_name_to_index.at(name_output_stream_pair.first);
+    for (const auto &name_pair : m_source_name_to_stream_name) {
+        if (name_pair.second == output_stream_name) {
+            assert(contains(m_source_name_to_index, name_pair.first));
+            uint32_t ret_val = m_source_name_to_index.at(name_pair.first);
             return ret_val;
         }
     }
     return make_unexpected(HAILO_NOT_FOUND);
 }
 
+Expected<uint32_t> AsyncHwElement::get_source_index_from_source_name(const std::string &source_name)
+{
+    CHECK_AS_EXPECTED(contains(m_source_name_to_index, source_name), HAILO_NOT_FOUND, "couldnt find src '{}'", source_name);
+    auto ret_val = m_source_name_to_index.at(source_name);
+    return ret_val;
+}
+
 Expected<uint32_t> AsyncHwElement::get_sink_index_from_input_stream_name(const std::string &input_stream_name)
 {
-    for (auto &name_input_stream_pair : m_sink_name_to_input) {
-        if (name_input_stream_pair.second->name() == input_stream_name) {
-            return Expected<uint32_t>(m_sink_name_to_index.at(name_input_stream_pair.first));
+    for (const auto &name_pair : m_sink_name_to_stream_name) {
+        if (name_pair.second == input_stream_name) {
+            return Expected<uint32_t>(m_sink_name_to_index.at(name_pair.first));
         }
     }
     return make_unexpected(HAILO_INVALID_ARGUMENT);
@@ -2981,13 +3167,41 @@ std::vector<PipelinePad*> AsyncHwElement::execution_pads()
     return result;
 }
 
+hailo_status AsyncHwElement::execute_terminate(hailo_status error_status)
+{
+    if (m_is_terminated) {
+        return HAILO_SUCCESS;
+    }
+
+    if (!m_is_terminating_element) {
+        {
+            // There is a case where the other thread is halted (via context switch) before the wait_for() function,
+            // then we call notify_all() here, and then the wait_for() is called - resulting in a timeout.
+            // notify_all() only works on threads which are already waiting, so that's why we acquire the lock here.
+            std::unique_lock<std::mutex> lock(m_mutex);
+        }
+        m_cv.notify_all();
+    }
+
+    // Checking success of shutdown is best effort (terminate should be called even if shutdown fails)
+    auto shutdown_status = m_net_group->shutdown();
+    auto wait_for_callbacks_finish_status = m_net_group->wait_for_callbacks_finish();
+    auto terminate_status = PipelineElement::execute_terminate(error_status);
+    CHECK_SUCCESS(shutdown_status);
+    CHECK_SUCCESS(wait_for_callbacks_finish_status);
+    CHECK_SUCCESS(terminate_status);
+
+    return HAILO_SUCCESS;
+}
+
 Expected<std::shared_ptr<CopyBufferElement>> CopyBufferElement::create(const std::string &name,
-    std::shared_ptr<std::atomic<hailo_status>> pipeline_status, std::chrono::milliseconds timeout, PipelineDirection pipeline_direction)
+    std::shared_ptr<std::atomic<hailo_status>> pipeline_status, std::chrono::milliseconds timeout, PipelineDirection pipeline_direction,
+    std::shared_ptr<AsyncPipeline> async_pipeline)
 {
     auto duration_collector = DurationCollector::create(HAILO_PIPELINE_ELEM_STATS_NONE);
     CHECK_EXPECTED(duration_collector);
     auto elem_ptr = make_shared_nothrow<CopyBufferElement>(name, duration_collector.release(), std::move(pipeline_status),
-        timeout, pipeline_direction);
+        timeout, pipeline_direction, async_pipeline);
     CHECK_AS_EXPECTED(nullptr != elem_ptr, HAILO_OUT_OF_HOST_MEMORY);
 
     LOGGER__INFO("Created {}", elem_ptr->name());
@@ -2997,8 +3211,8 @@ Expected<std::shared_ptr<CopyBufferElement>> CopyBufferElement::create(const std
 
 CopyBufferElement::CopyBufferElement(const std::string &name, DurationCollector &&duration_collector, 
                                      std::shared_ptr<std::atomic<hailo_status>> pipeline_status, std::chrono::milliseconds timeout,
-                                     PipelineDirection pipeline_direction) :
-    FilterElement(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction, nullptr, timeout)
+                                     PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline) :
+    FilterElement(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction, nullptr, timeout, async_pipeline)
 {}
 
 PipelinePad &CopyBufferElement::next_pad()
@@ -3020,10 +3234,10 @@ Expected<PipelineBuffer> CopyBufferElement::action(PipelineBuffer &&input, Pipel
 }
 
 Expected<std::pair<std::vector<InputVStream>, std::vector<OutputVStream>>> VStreamsBuilder::create_vstreams(
-    ConfiguredNetworkGroup &net_group, bool quantized, hailo_format_type_t format_type,
+    ConfiguredNetworkGroup &net_group, bool /*unused*/, hailo_format_type_t format_type,
     const std::string &network_name)
 {
-    const auto params = HailoRTDefaults::get_vstreams_params(quantized, format_type);
+    const auto params = HailoRTDefaults::get_vstreams_params({}, format_type);
     return create_vstreams(net_group, params, network_name);
 }
 
@@ -3081,13 +3295,13 @@ Expected<std::vector<OutputVStream>> VStreamsBuilder::create_output_vstreams(Con
 }
 
 Expected<std::vector<InputVStream>> VStreamsBuilderUtils::create_inputs(
-    std::vector<std::shared_ptr<InputStream>> input_streams, const hailo_vstream_info_t &vstream_info,
+    std::vector<std::shared_ptr<InputStreamBase>> input_streams, const hailo_vstream_info_t &vstream_info,
     const hailo_vstream_params_t &vstream_params)
 {
     CHECK_AS_EXPECTED(!input_streams.empty(), HAILO_INVALID_ARGUMENT, "input streams can't be empty");
     // if input streams has more than 1 value, it will be handled by handle_pix_buffer_splitter_flow. For all other purposes,
     // assuming there is only 1 stream is valid
-    std::shared_ptr<InputStream> input_stream = input_streams.front();
+    std::shared_ptr<InputStreamBase> input_stream = input_streams.front();
 
     // TODO (HRT-4522): Support this measurement
     CHECK_AS_EXPECTED(!(vstream_params.vstream_stats_flags & HAILO_VSTREAM_STATS_MEASURE_FPS), HAILO_NOT_IMPLEMENTED,
@@ -3113,7 +3327,7 @@ Expected<std::vector<InputVStream>> VStreamsBuilderUtils::create_inputs(
 
     auto user_timeout = std::chrono::milliseconds(vstream_params.timeout_ms);
 
-    if(input_streams.size() > 1) {
+    if (input_streams.size() > 1) {
         CHECK_SUCCESS_AS_EXPECTED(handle_pix_buffer_splitter_flow(input_streams, vstream_info,
             std::move(elements), vstreams, vstream_params, shutdown_event, pipeline_status, core_op_activated_event,
             pipeline_latency_accumulator.value()));
@@ -3124,10 +3338,9 @@ Expected<std::vector<InputVStream>> VStreamsBuilderUtils::create_inputs(
         CHECK_EXPECTED(hw_write_elem);
         elements.insert(elements.begin(), hw_write_elem.value());
 
-        auto input_stream_base = std::static_pointer_cast<InputStreamBase>(input_stream);
         auto should_transform = InputTransformContext::is_transformation_required(input_stream->get_info().shape,
             vstream_params.user_buffer_format, input_stream->get_info().hw_shape, input_stream->get_info().format,
-            input_stream_base->get_quant_infos());
+            input_stream->get_quant_infos());
         CHECK_EXPECTED(should_transform);
 
         if (should_transform.value()) {
@@ -3140,7 +3353,7 @@ Expected<std::vector<InputVStream>> VStreamsBuilderUtils::create_inputs(
             CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(queue_elem.value(), hw_write_elem.value()));
 
             auto pre_infer_elem = PreInferElement::create(input_stream->get_info().shape, vstream_params.user_buffer_format,
-                input_stream->get_info().hw_shape, input_stream->get_info().format, input_stream_base->get_quant_infos(),
+                input_stream->get_info().hw_shape, input_stream->get_info().format, input_stream->get_quant_infos(),
                 PipelineObject::create_element_name("PreInferElement", input_stream->get_info().name, input_stream->get_info().index),
                 vstream_params, shutdown_event, pipeline_status);
             CHECK_EXPECTED(pre_infer_elem);
@@ -3148,13 +3361,13 @@ Expected<std::vector<InputVStream>> VStreamsBuilderUtils::create_inputs(
             CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(pre_infer_elem.value(), queue_elem.value()));
 
             input_stream->set_timeout(user_timeout);
-            auto vstream = InputVStream::create(vstream_info, input_stream_base->get_quant_infos(), vstream_params, pre_infer_elem.release(), hw_write_elem.release(), std::move(elements),
+            auto vstream = InputVStream::create(vstream_info, input_stream->get_quant_infos(), vstream_params, pre_infer_elem.release(), hw_write_elem.release(), std::move(elements),
                 std::move(pipeline_status), shutdown_event, core_op_activated_event, pipeline_latency_accumulator.release());
             CHECK_EXPECTED(vstream);
             vstreams.emplace_back(vstream.release());
         } else {
             input_stream->set_timeout(user_timeout);
-            auto vstream = InputVStream::create(vstream_info, input_stream_base->get_quant_infos(), vstream_params, hw_write_elem.value(), hw_write_elem.value(), std::move(elements),
+            auto vstream = InputVStream::create(vstream_info, input_stream->get_quant_infos(), vstream_params, hw_write_elem.value(), hw_write_elem.value(), std::move(elements),
                 std::move(pipeline_status), shutdown_event, core_op_activated_event, pipeline_latency_accumulator.release());
             CHECK_EXPECTED(vstream);
             vstreams.emplace_back(vstream.release());
@@ -3168,7 +3381,7 @@ Expected<std::vector<InputVStream>> VStreamsBuilderUtils::create_inputs(
     return vstreams;
 }
 
-Expected<std::vector<OutputVStream>> VStreamsBuilderUtils::create_outputs(std::shared_ptr<OutputStream> output_stream,
+Expected<std::vector<OutputVStream>> VStreamsBuilderUtils::create_outputs(std::shared_ptr<OutputStreamBase> output_stream,
     NameToVStreamParamsMap &vstreams_params_map, const std::map<std::string, hailo_vstream_info_t> &output_vstream_infos)
 {
     std::vector<std::shared_ptr<PipelineElement>> elements;
@@ -3226,10 +3439,9 @@ Expected<std::vector<OutputVStream>> VStreamsBuilderUtils::create_outputs(std::s
         auto pipeline_latency_accumulator = create_pipeline_latency_accumulator(vstream_params);
         CHECK_EXPECTED(pipeline_latency_accumulator);
 
-        auto output_stream_base = std::static_pointer_cast<OutputStreamBase>(output_stream);
         auto should_transform = OutputTransformContext::is_transformation_required(output_stream->get_info().hw_shape, 
             output_stream->get_info().format, output_stream->get_info().shape, 
-            vstream_params.user_buffer_format, output_stream_base->get_quant_infos());
+            vstream_params.user_buffer_format, output_stream->get_quant_infos());
         CHECK_EXPECTED(should_transform);
 
         if (should_transform.value()) {
@@ -3247,13 +3459,13 @@ Expected<std::vector<OutputVStream>> VStreamsBuilderUtils::create_outputs(std::s
             CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(post_infer_element.value(), user_buffer_queue_element.value()));
             output_stream->set_timeout(std::chrono::milliseconds(HAILO_INFINITE));
             hw_read_queue_element->get()->set_timeout(std::chrono::milliseconds(HAILO_INFINITE));
-            auto vstream = OutputVStream::create(vstream_info->second, output_stream_base->get_quant_infos(), vstream_params, user_buffer_queue_element.release(), std::move(elements),
+            auto vstream = OutputVStream::create(vstream_info->second, output_stream->get_quant_infos(), vstream_params, user_buffer_queue_element.release(), std::move(elements),
                 std::move(pipeline_status), shutdown_event, core_op_activated_event, pipeline_latency_accumulator.release());
             CHECK_EXPECTED(vstream);
             vstreams.emplace_back(vstream.release());
         } else {
             output_stream->set_timeout(std::chrono::milliseconds(vstream_params.timeout_ms));
-            auto vstream = OutputVStream::create(vstream_info->second, output_stream_base->get_quant_infos(), vstream_params, hw_read_element.release(), std::move(elements),
+            auto vstream = OutputVStream::create(vstream_info->second, output_stream->get_quant_infos(), vstream_params, hw_read_element.release(), std::move(elements),
                 std::move(pipeline_status), shutdown_event, core_op_activated_event, pipeline_latency_accumulator.release());
             CHECK_EXPECTED(vstream);
             vstreams.emplace_back(vstream.release());
@@ -3267,7 +3479,7 @@ Expected<std::vector<OutputVStream>> VStreamsBuilderUtils::create_outputs(std::s
     return vstreams;
 }
 
-Expected<std::vector<OutputVStream>> VStreamsBuilderUtils::create_output_post_process_iou(std::shared_ptr<OutputStream> output_stream,
+Expected<std::vector<OutputVStream>> VStreamsBuilderUtils::create_output_post_process_iou(std::shared_ptr<OutputStreamBase> output_stream,
     hailo_vstream_params_t vstream_params, const net_flow::PostProcessOpMetadataPtr &iou_op_metadata)
 {
     std::vector<std::shared_ptr<PipelineElement>> elements;
@@ -3341,8 +3553,7 @@ Expected<std::vector<OutputVStream>> VStreamsBuilderUtils::create_output_post_pr
     auto output_vstream_info = iou_op_metadata->get_output_vstream_info();
     CHECK_EXPECTED(output_vstream_info);
 
-    auto output_stream_base = std::static_pointer_cast<OutputStreamBase>(output_stream);
-    auto vstream = OutputVStream::create(output_vstream_info.value(), output_stream_base->get_quant_infos(), vstream_params, user_buffer_queue_element.release(), std::move(elements),
+    auto vstream = OutputVStream::create(output_vstream_info.value(), output_stream->get_quant_infos(), vstream_params, user_buffer_queue_element.release(), std::move(elements),
         std::move(pipeline_status), shutdown_event, core_op_activated_event, pipeline_latency_accumulator.release());
     CHECK_EXPECTED(vstream);
     vstreams.emplace_back(vstream.release());
@@ -3354,7 +3565,7 @@ Expected<std::vector<OutputVStream>> VStreamsBuilderUtils::create_output_post_pr
     return vstreams;
 }
 
-Expected<std::vector<OutputVStream>> VStreamsBuilderUtils::create_output_post_process_softmax(std::shared_ptr<OutputStream> output_stream,
+Expected<std::vector<OutputVStream>> VStreamsBuilderUtils::create_output_post_process_softmax(std::shared_ptr<OutputStreamBase> output_stream,
     const NameToVStreamParamsMap &vstreams_params_map, const hailo_vstream_info_t &output_vstream_info,
     const net_flow::PostProcessOpMetadataPtr &softmax_op_metadata)
 {
@@ -3394,12 +3605,6 @@ Expected<std::vector<OutputVStream>> VStreamsBuilderUtils::create_output_post_pr
     auto op_input_format = softmax_op_metadata->inputs_metadata().begin()->second.format;
     auto vstream_params = vstreams_params_map.begin()->second;
     vstream_params.user_buffer_format = net_flow::SoftmaxOpMetadata::expand_output_format_autos(vstream_params.user_buffer_format, op_input_format);
-    if (HAILO_FORMAT_FLAGS_QUANTIZED & vstream_params.user_buffer_format.flags) {
-        vstream_params.user_buffer_format.flags &= ~HAILO_FORMAT_FLAGS_QUANTIZED;
-        // TODO: Delete override when changing CLI default flags
-        LOGGER__WARNING("The output_vstream {} format flag is marked as quantized, which is not supported with {}. "
-            "flag has been automatically set to False.", vstreams_params_map.begin()->first, softmax_op_metadata->get_name());
-    }
 
     auto pipeline_latency_accumulator = create_pipeline_latency_accumulator(vstream_params);
     CHECK_EXPECTED(pipeline_latency_accumulator);
@@ -3432,8 +3637,7 @@ Expected<std::vector<OutputVStream>> VStreamsBuilderUtils::create_output_post_pr
     output_stream->set_timeout(std::chrono::milliseconds(HAILO_INFINITE));
     hw_read_queue_element->get()->set_timeout(std::chrono::milliseconds(HAILO_INFINITE));
 
-    auto output_stream_base = std::static_pointer_cast<OutputStreamBase>(output_stream);
-    auto vstream = OutputVStream::create(output_vstream_info, output_stream_base->get_quant_infos(), vstream_params, user_buffer_queue_element.release(), std::move(elements),
+    auto vstream = OutputVStream::create(output_vstream_info, output_stream->get_quant_infos(), vstream_params, user_buffer_queue_element.release(), std::move(elements),
         std::move(pipeline_status), shutdown_event, core_op_activated_event, pipeline_latency_accumulator.release());
     CHECK_EXPECTED(vstream);
     vstreams.emplace_back(vstream.release());
@@ -3474,6 +3678,7 @@ Expected<std::vector<OutputVStream>> VStreamsBuilderUtils::create_output_vstream
         auto &op_metadata = post_process_ops_metadata.at(op_name);
         switch (op_metadata->type()) {
         case net_flow::OperationType::YOLOX:
+        case net_flow::OperationType::YOLOV8:
         case net_flow::OperationType::SSD:
         case net_flow::OperationType::YOLOV5:
         case net_flow::OperationType::YOLOV5SEG:
@@ -3483,12 +3688,6 @@ Expected<std::vector<OutputVStream>> VStreamsBuilderUtils::create_output_vstream
             auto updated_outputs_metadata = op_metadata->outputs_metadata();
             updated_outputs_metadata.begin()->second.format =
                 net_flow::NmsOpMetadata::expand_output_format_autos_by_op_type(vstream_params.user_buffer_format, op_metadata->type());
-            if (HAILO_FORMAT_FLAGS_QUANTIZED & updated_outputs_metadata.begin()->second.format.flags) {
-                updated_outputs_metadata.begin()->second.format.flags &= ~HAILO_FORMAT_FLAGS_QUANTIZED;
-                // TODO: Delete override when changing CLI default flags
-                LOGGER__WARNING("The output_vstream {} format flag is marked as quantized, which is not supported with {}. "
-                    "flag has been automatically set to False.", updated_outputs_metadata.begin()->first, op_metadata->get_name());
-            }
             op_metadata->set_outputs_metadata(updated_outputs_metadata);
             CHECK_SUCCESS_AS_EXPECTED(op_metadata->validate_format_info());
 
@@ -3503,6 +3702,15 @@ Expected<std::vector<OutputVStream>> VStreamsBuilderUtils::create_output_vstream
                 op = op_expected.release();
                 break;
             }
+            case (net_flow::OperationType::YOLOV8):
+            {
+                auto metadata = std::dynamic_pointer_cast<net_flow::Yolov8OpMetadata>(op_metadata);
+                assert(nullptr != metadata);
+                auto op_expected = net_flow::YOLOV8PostProcessOp::create(metadata);
+                CHECK_EXPECTED(op_expected);
+                op = op_expected.release();
+                break;
+            }
             case (net_flow::OperationType::YOLOV5):
             {
                 auto metadata = std::dynamic_pointer_cast<net_flow::Yolov5OpMetadata>(op_metadata);
@@ -3648,7 +3856,7 @@ Expected<std::vector<OutputVStream>> VStreamsBuilderUtils::create_output_post_pr
     return vstreams;
 }
 
-Expected<std::shared_ptr<HwReadElement>> VStreamsBuilderUtils::add_hw_read_element(std::shared_ptr<OutputStream> &output_stream,
+Expected<std::shared_ptr<HwReadElement>> VStreamsBuilderUtils::add_hw_read_element(std::shared_ptr<OutputStreamBase> &output_stream,
         std::shared_ptr<std::atomic<hailo_status>> &pipeline_status, std::vector<std::shared_ptr<PipelineElement>> &elements,
         const std::string &element_name, EventPtr &shutdown_event, size_t buffer_pool_size,
         const hailo_pipeline_elem_stats_flags_t &hw_read_element_stats_flags, const hailo_vstream_stats_flags_t &hw_read_stream_stats_flags)
@@ -3661,7 +3869,7 @@ Expected<std::shared_ptr<HwReadElement>> VStreamsBuilderUtils::add_hw_read_eleme
     return hw_read_elem;
 }
 
-Expected<std::shared_ptr<PullQueueElement>> VStreamsBuilderUtils::add_pull_queue_element(std::shared_ptr<OutputStream> &output_stream,
+Expected<std::shared_ptr<PullQueueElement>> VStreamsBuilderUtils::add_pull_queue_element(std::shared_ptr<OutputStreamBase> &output_stream,
     std::shared_ptr<std::atomic<hailo_status>> &pipeline_status, std::vector<std::shared_ptr<PipelineElement>> &elements,
     const std::string &element_name, EventPtr &shutdown_event, const hailo_vstream_params_t &vstream_params)
 {
@@ -3673,7 +3881,7 @@ Expected<std::shared_ptr<PullQueueElement>> VStreamsBuilderUtils::add_pull_queue
     return pull_queue_elem;
 }
 
-Expected<std::shared_ptr<ArgmaxPostProcessElement>> VStreamsBuilderUtils::add_argmax_element(std::shared_ptr<OutputStream> &output_stream,
+Expected<std::shared_ptr<ArgmaxPostProcessElement>> VStreamsBuilderUtils::add_argmax_element(std::shared_ptr<OutputStreamBase> &output_stream,
     std::shared_ptr<std::atomic<hailo_status>> &pipeline_status, std::vector<std::shared_ptr<PipelineElement>> &elements,
     const std::string &element_name, hailo_vstream_params_t &vstream_params, const net_flow::PostProcessOpMetadataPtr &argmax_op_metadata,
     size_t buffer_pool_size, std::chrono::milliseconds timeout, const hailo_vstream_stats_flags_t &vstream_flags, EventPtr &shutdown_event)
@@ -3699,7 +3907,7 @@ Expected<std::shared_ptr<ArgmaxPostProcessElement>> VStreamsBuilderUtils::add_ar
     return argmax_element;
 }
 
-Expected<std::shared_ptr<SoftmaxPostProcessElement>> VStreamsBuilderUtils::add_softmax_element(std::shared_ptr<OutputStream> &output_stream,
+Expected<std::shared_ptr<SoftmaxPostProcessElement>> VStreamsBuilderUtils::add_softmax_element(std::shared_ptr<OutputStreamBase> &output_stream,
     std::shared_ptr<std::atomic<hailo_status>> &pipeline_status, std::vector<std::shared_ptr<PipelineElement>> &elements,
     const std::string &element_name, hailo_vstream_params_t &vstream_params, const net_flow::PostProcessOpMetadataPtr &softmax_op_metadata,
     size_t buffer_pool_size, std::chrono::milliseconds timeout, const hailo_vstream_stats_flags_t &vstream_flags, EventPtr &shutdown_event)
@@ -3728,7 +3936,7 @@ Expected<std::shared_ptr<SoftmaxPostProcessElement>> VStreamsBuilderUtils::add_s
     return softmax_element;
 }
 
-Expected<std::shared_ptr<ConvertNmsToDetectionsElement>> VStreamsBuilderUtils::add_nms_to_detections_convert_element(std::shared_ptr<OutputStream> &output_stream,
+Expected<std::shared_ptr<ConvertNmsToDetectionsElement>> VStreamsBuilderUtils::add_nms_to_detections_convert_element(std::shared_ptr<OutputStreamBase> &output_stream,
     std::shared_ptr<std::atomic<hailo_status>> &pipeline_status, std::vector<std::shared_ptr<PipelineElement>> &elements,
     const std::string &element_name, hailo_vstream_params_t &vstream_params, const net_flow::PostProcessOpMetadataPtr &op_metadata,
     size_t buffer_pool_size, std::chrono::milliseconds timeout, const hailo_vstream_stats_flags_t &vstream_flags, EventPtr &shutdown_event)
@@ -3744,7 +3952,7 @@ Expected<std::shared_ptr<ConvertNmsToDetectionsElement>> VStreamsBuilderUtils::a
     return nms_to_detections_element;
 }
 
-Expected<std::shared_ptr<RemoveOverlappingBboxesElement>> VStreamsBuilderUtils::add_remove_overlapping_bboxes_element(std::shared_ptr<OutputStream> &output_stream,
+Expected<std::shared_ptr<RemoveOverlappingBboxesElement>> VStreamsBuilderUtils::add_remove_overlapping_bboxes_element(std::shared_ptr<OutputStreamBase> &output_stream,
     std::shared_ptr<std::atomic<hailo_status>> &pipeline_status, std::vector<std::shared_ptr<PipelineElement>> &elements,
     const std::string &element_name, hailo_vstream_params_t &vstream_params, const net_flow::PostProcessOpMetadataPtr &op_metadata,
     size_t buffer_pool_size, std::chrono::milliseconds timeout, const hailo_vstream_stats_flags_t &vstream_flags, EventPtr &shutdown_event)
@@ -3760,7 +3968,7 @@ Expected<std::shared_ptr<RemoveOverlappingBboxesElement>> VStreamsBuilderUtils::
     return remove_overlapping_bboxes_element;
 }
 
-Expected<std::shared_ptr<FillNmsFormatElement>> VStreamsBuilderUtils::add_fill_nms_format_element(std::shared_ptr<OutputStream> &output_stream,
+Expected<std::shared_ptr<FillNmsFormatElement>> VStreamsBuilderUtils::add_fill_nms_format_element(std::shared_ptr<OutputStreamBase> &output_stream,
     std::shared_ptr<std::atomic<hailo_status>> &pipeline_status, std::vector<std::shared_ptr<PipelineElement>> &elements,
     const std::string &element_name, hailo_vstream_params_t &vstream_params, const net_flow::PostProcessOpMetadataPtr &op_metadata,
     size_t buffer_pool_size, std::chrono::milliseconds timeout, const hailo_vstream_stats_flags_t &vstream_flags, EventPtr &shutdown_event)
@@ -3776,7 +3984,7 @@ Expected<std::shared_ptr<FillNmsFormatElement>> VStreamsBuilderUtils::add_fill_n
     return fill_nms_format_element;
 }
 
-Expected<std::shared_ptr<UserBufferQueueElement>> VStreamsBuilderUtils::add_user_buffer_queue_element(std::shared_ptr<OutputStream> &output_stream,
+Expected<std::shared_ptr<UserBufferQueueElement>> VStreamsBuilderUtils::add_user_buffer_queue_element(std::shared_ptr<OutputStreamBase> &output_stream,
     std::shared_ptr<std::atomic<hailo_status>> &pipeline_status, std::vector<std::shared_ptr<PipelineElement>> &elements,
     const std::string &element_name, EventPtr &shutdown_event, const hailo_vstream_params_t &vstream_params)
 {
@@ -3788,13 +3996,12 @@ Expected<std::shared_ptr<UserBufferQueueElement>> VStreamsBuilderUtils::add_user
     return post_argmax_queue_element;
 }
 
-Expected<std::shared_ptr<PostInferElement>> VStreamsBuilderUtils::add_post_infer_element(std::shared_ptr<OutputStream> &output_stream,
+Expected<std::shared_ptr<PostInferElement>> VStreamsBuilderUtils::add_post_infer_element(std::shared_ptr<OutputStreamBase> &output_stream,
     std::shared_ptr<std::atomic<hailo_status>> &pipeline_status, std::vector<std::shared_ptr<PipelineElement>> &elements,
     const std::string &element_name, const hailo_vstream_params_t &vstream_params, EventPtr shutdown_event)
 {
-    auto output_stream_base = std::static_pointer_cast<OutputStreamBase>(output_stream);
     auto post_infer_element = PostInferElement::create(output_stream->get_info().hw_shape, output_stream->get_info().format,
-        output_stream->get_info().shape, vstream_params.user_buffer_format, output_stream_base->get_quant_infos(), output_stream->get_info().nms_info,
+        output_stream->get_info().shape, vstream_params.user_buffer_format, output_stream->get_quant_infos(), output_stream->get_info().nms_info,
         PipelineObject::create_element_name(element_name, output_stream->name(), output_stream->get_info().index),
         vstream_params, pipeline_status, shutdown_event);
     CHECK_EXPECTED(post_infer_element);
@@ -3802,7 +4009,7 @@ Expected<std::shared_ptr<PostInferElement>> VStreamsBuilderUtils::add_post_infer
     return post_infer_element;
 }
 
-Expected<std::vector<OutputVStream>> VStreamsBuilderUtils::create_output_post_process_argmax(std::shared_ptr<OutputStream> output_stream,
+Expected<std::vector<OutputVStream>> VStreamsBuilderUtils::create_output_post_process_argmax(std::shared_ptr<OutputStreamBase> output_stream,
     const NameToVStreamParamsMap &vstreams_params_map, const hailo_vstream_info_t &output_vstream_info,
     const net_flow::PostProcessOpMetadataPtr &argmax_op_metadata)
 {
@@ -3870,8 +4077,7 @@ Expected<std::vector<OutputVStream>> VStreamsBuilderUtils::create_output_post_pr
 
     output_stream->set_timeout(std::chrono::milliseconds(HAILO_INFINITE));
     hw_read_queue_element->get()->set_timeout(std::chrono::milliseconds(HAILO_INFINITE));
-    auto output_stream_base = std::static_pointer_cast<OutputStreamBase>(output_stream);
-    auto vstream = OutputVStream::create(output_vstream_info, output_stream_base->get_quant_infos(), vstream_params, post_argmax_queue_element.release(), std::move(elements),
+    auto vstream = OutputVStream::create(output_vstream_info, output_stream->get_quant_infos(), vstream_params, post_argmax_queue_element.release(), std::move(elements),
         std::move(pipeline_status), shutdown_event, core_op_activated_event, pipeline_latency_accumulator.release());
     CHECK_EXPECTED(vstream);
     vstreams.emplace_back(vstream.release());
@@ -3883,16 +4089,16 @@ Expected<std::vector<OutputVStream>> VStreamsBuilderUtils::create_output_post_pr
     return vstreams;
 }
 
-hailo_status VStreamsBuilderUtils::handle_pix_buffer_splitter_flow(std::vector<std::shared_ptr<InputStream>> streams,
+hailo_status VStreamsBuilderUtils::handle_pix_buffer_splitter_flow(std::vector<std::shared_ptr<InputStreamBase>> streams,
     const hailo_vstream_info_t &vstream_info, std::vector<std::shared_ptr<PipelineElement>> &&base_elements,
     std::vector<InputVStream> &vstreams, const hailo_vstream_params_t &vstream_params, EventPtr shutdown_event,
     std::shared_ptr<std::atomic<hailo_status>> pipeline_status, EventPtr &core_op_activated_event,
     AccumulatorPtr accumalator)
 {
-    // sorting the streams based on their plane index
-    auto compartor = [](std::shared_ptr<InputStream> a, std::shared_ptr<InputStream> b) {
-        return static_cast<InputStreamBase&>(*a).get_layer_info().plane_index <
-        static_cast<InputStreamBase&>(*b).get_layer_info().plane_index; };
+    // sorting the streams based on their plane index -> we count on order to know which plane belongs to which stream
+    auto compartor = [](std::shared_ptr<InputStreamBase> a, std::shared_ptr<InputStreamBase> b) {
+        return a->get_layer_info().plane_index < b->get_layer_info().plane_index;
+    };
     std::sort(streams.begin(), streams.end(), compartor);
 
     auto duration_collector_expected = DurationCollector::create(vstream_params.pipeline_elements_stats_flags);
@@ -3900,7 +4106,7 @@ hailo_status VStreamsBuilderUtils::handle_pix_buffer_splitter_flow(std::vector<s
 
     auto planes_splitter = PixBufferElement::create(PipelineObject::create_element_name("PixBufferElement",
         vstream_info.name, 0), std::chrono::milliseconds(HAILO_INFINITE), duration_collector_expected.release(),
-        pipeline_status, streams.size(), vstream_info.format.order);
+        pipeline_status, vstream_info.format.order);
     CHECK_EXPECTED_AS_STATUS(planes_splitter);
     base_elements.push_back(planes_splitter.value());
 
@@ -3961,7 +4167,7 @@ hailo_status VStreamsBuilderUtils::handle_pix_buffer_splitter_flow(std::vector<s
     return HAILO_SUCCESS;
 }
 
-hailo_status VStreamsBuilderUtils::add_demux(std::shared_ptr<OutputStream> output_stream, NameToVStreamParamsMap &vstreams_params_map,
+hailo_status VStreamsBuilderUtils::add_demux(std::shared_ptr<OutputStreamBase> output_stream, NameToVStreamParamsMap &vstreams_params_map,
     std::vector<std::shared_ptr<PipelineElement>> &&base_elements, std::vector<OutputVStream> &vstreams,
     std::shared_ptr<HwReadElement> hw_read_elem, EventPtr shutdown_event, std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
     const std::map<std::string, hailo_vstream_info_t> &output_vstream_infos)
@@ -4223,7 +4429,7 @@ hailo_status VStreamsBuilderUtils::add_nms_post_process(OutputStreamPtrVector &o
     CHECK_EXPECTED_AS_STATUS(nms_elem);
 
     hailo_format_t nms_src_format;
-    nms_src_format.flags = HAILO_FORMAT_FLAGS_QUANTIZED;
+    nms_src_format.flags = HAILO_FORMAT_FLAGS_NONE;
     nms_src_format.order = HAILO_FORMAT_ORDER_NHCW;
     nms_src_format.type = first_stream_info.format.type;
 
@@ -4231,9 +4437,8 @@ hailo_status VStreamsBuilderUtils::add_nms_post_process(OutputStreamPtrVector &o
         const auto &curr_stream_info = output_streams[i]->get_info();
         output_streams[i]->set_timeout(HAILO_INFINITE_TIMEOUT);
 
-        auto output_stream_base = std::static_pointer_cast<OutputStreamBase>(output_streams[i]);
         auto should_transform = OutputTransformContext::is_transformation_required(curr_stream_info.hw_shape, curr_stream_info.format,
-            curr_stream_info.hw_shape, nms_src_format, output_stream_base->get_quant_infos());
+            curr_stream_info.hw_shape, nms_src_format, output_streams[i]->get_quant_infos());
         CHECK_EXPECTED_AS_STATUS(should_transform);
 
         CHECK(!(should_transform.value()), HAILO_INVALID_ARGUMENT, "Unexpected transformation required for {}", curr_stream_info.name);
@@ -4266,8 +4471,7 @@ hailo_status VStreamsBuilderUtils::add_nms_post_process(OutputStreamPtrVector &o
     }
 
     // If user uses HailoRT++ we can assume he won't use Output Scale by Feature
-    auto output_stream_base = std::static_pointer_cast<OutputStreamBase>(output_streams[0]);
-    auto vstream = OutputVStream::create(vstream_info->second, output_stream_base->get_quant_infos(), vstreams_params, nms_elem.release(), std::move(elements),
+    auto vstream = OutputVStream::create(vstream_info->second, output_streams[0]->get_quant_infos(), vstreams_params, nms_elem.release(), std::move(elements),
         std::move(pipeline_status), shutdown_event, core_op_activated_event, pipeline_latency_accumulator.release());
     CHECK_EXPECTED_AS_STATUS(vstream);
     vstreams.emplace_back(vstream.release());
diff --git a/hailort/libhailort/src/net_flow/pipeline/vstream_internal.hpp b/hailort/libhailort/src/net_flow/pipeline/vstream_internal.hpp
index cc62a33..a78932b 100644
--- a/hailort/libhailort/src/net_flow/pipeline/vstream_internal.hpp
+++ b/hailort/libhailort/src/net_flow/pipeline/vstream_internal.hpp
@@ -28,7 +28,8 @@
 
 #include "hailo/expected.hpp"
 #include "hailo/transform.hpp"
-#include "hailo/stream.hpp"
+
+#include "stream_common/stream_internal.hpp"
 
 #include "hef/hef_internal.hpp"
 #include "net_flow/pipeline/pipeline.hpp"
@@ -202,16 +203,6 @@ public:
 
     virtual hailo_status read(MemoryView buffer) override;
 
-    void set_on_vstream_cant_read_callback(std::function<void()> callback)
-    {
-        m_cant_read_callback = callback;
-    }
-
-    void set_on_vstream_can_read_callback(std::function<void()> callback)
-    {
-        m_can_read_callback = callback;
-    }
-
     virtual hailo_status set_nms_score_threshold(float32_t threshold) override;
     virtual hailo_status set_nms_iou_threshold(float32_t threshold) override;
     virtual hailo_status set_nms_max_proposals_per_class(uint32_t max_proposals_per_class) override;
@@ -221,11 +212,6 @@ private:
         std::shared_ptr<PipelineElement> pipeline_entry, std::vector<std::shared_ptr<PipelineElement>> &&pipeline,
         std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, EventPtr shutdown_event, AccumulatorPtr pipeline_latency_accumulator,
         EventPtr core_op_activated_event, hailo_status &output_status);
-
-    Expected<std::shared_ptr<net_flow::NmsOpMetadata>> get_nms_metadata_from_pipeline() const;
-
-    std::function<void()> m_cant_read_callback;
-    std::function<void()> m_can_read_callback;
 };
 
 #ifdef HAILO_SUPPORT_MULTI_PROCESS
@@ -329,17 +315,19 @@ public:
         const hailo_3d_image_shape_t &dst_image_shape, const hailo_format_t &dst_format, const std::vector<hailo_quant_info_t> &dst_quant_infos,
         const std::string &name, std::chrono::milliseconds timeout, size_t buffer_pool_size, hailo_pipeline_elem_stats_flags_t elem_flags,
         hailo_vstream_stats_flags_t vstream_flags, EventPtr shutdown_event, std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
-        PipelineDirection pipeline_direction = PipelineDirection::PUSH, bool is_dma_able = false);
+        PipelineDirection pipeline_direction = PipelineDirection::PUSH, bool is_dma_able = false, std::shared_ptr<AsyncPipeline> async_pipeline = nullptr);
     static Expected<std::shared_ptr<PreInferElement>> create(const hailo_3d_image_shape_t &src_image_shape, const hailo_format_t &src_format,
         const hailo_3d_image_shape_t &dst_image_shape, const hailo_format_t &dst_format, const std::vector<hailo_quant_info_t> &dst_quant_infos, const std::string &name,
         const hailo_vstream_params_t &vstream_params, EventPtr shutdown_event, std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
-        PipelineDirection pipeline_direction = PipelineDirection::PUSH, bool is_dma_able = false);
+        PipelineDirection pipeline_direction = PipelineDirection::PUSH, bool is_dma_able = false, std::shared_ptr<AsyncPipeline> async_pipeline = nullptr);
     static Expected<std::shared_ptr<PreInferElement>> create(const hailo_3d_image_shape_t &src_image_shape, const hailo_format_t &src_format,
         const hailo_3d_image_shape_t &dst_image_shape, const hailo_format_t &dst_format, const std::vector<hailo_quant_info_t> &dst_quant_infos,
-        const std::string &name, const ElementBuildParams &build_params, PipelineDirection pipeline_direction = PipelineDirection::PUSH, bool is_dma_able = false);
+        const std::string &name, const ElementBuildParams &build_params, PipelineDirection pipeline_direction = PipelineDirection::PUSH, bool is_dma_able = false,
+        std::shared_ptr<AsyncPipeline> async_pipeline = nullptr);
     PreInferElement(std::unique_ptr<InputTransformContext> &&transform_context, BufferPoolPtr buffer_pool,
         const std::string &name, std::chrono::milliseconds timeout, DurationCollector &&duration_collector,
-        std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, PipelineDirection pipeline_direction);
+        std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, PipelineDirection pipeline_direction,
+        std::shared_ptr<AsyncPipeline> async_pipeline);
     virtual ~PreInferElement() = default;
 
     virtual Expected<PipelineBuffer> run_pull(PipelineBuffer &&optional, const PipelinePad &source) override;
@@ -360,18 +348,25 @@ public:
         const net_flow::NmsPostProcessConfig nms_config, const std::string &name,
         hailo_pipeline_elem_stats_flags_t elem_flags, std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
         std::chrono::milliseconds timeout, hailo_vstream_stats_flags_t vstream_flags, EventPtr shutdown_event,
-        size_t buffer_pool_size, PipelineDirection pipeline_direction = PipelineDirection::PULL, bool is_last_copy_element = false);
+        size_t buffer_pool_size, PipelineDirection pipeline_direction = PipelineDirection::PULL, bool is_last_copy_element = false,
+        std::shared_ptr<AsyncPipeline> async_pipeline = nullptr);
     static Expected<std::shared_ptr<RemoveOverlappingBboxesElement>> create(const net_flow::NmsPostProcessConfig nms_config,
         const std::string &name, const ElementBuildParams &build_params, PipelineDirection pipeline_direction = PipelineDirection::PULL, 
-        bool is_last_copy_element = false);
+        bool is_last_copy_element = false, std::shared_ptr<AsyncPipeline> async_pipeline = nullptr);
     RemoveOverlappingBboxesElement(const net_flow::NmsPostProcessConfig &&nms_config, const std::string &name, DurationCollector &&duration_collector,
         std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, BufferPoolPtr buffer_pool, std::chrono::milliseconds timeout,
-        PipelineDirection pipeline_direction);
+        PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline);
     virtual ~RemoveOverlappingBboxesElement() = default;
     virtual hailo_status run_push(PipelineBuffer &&buffer, const PipelinePad &sink) override;
     virtual PipelinePad &next_pad() override;
     virtual std::string description() const override;
 
+    virtual hailo_status set_nms_iou_threshold(float32_t threshold)
+    {
+        m_nms_config.nms_iou_th = threshold;
+        return HAILO_SUCCESS;
+    }
+
 protected:
     virtual Expected<PipelineBuffer> action(PipelineBuffer &&input, PipelineBuffer &&optional) override;
 
@@ -387,18 +382,21 @@ public:
         const std::vector<hailo_quant_info_t> &dst_quant_infos, const hailo_nms_info_t &nms_info, const std::string &name,
         hailo_pipeline_elem_stats_flags_t elem_flags, std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
         std::chrono::milliseconds timeout, hailo_vstream_stats_flags_t vstream_flags, EventPtr shutdown_event,
-        size_t buffer_pool_size, PipelineDirection pipeline_direction = PipelineDirection::PULL, bool is_last_copy_element = false);
+        size_t buffer_pool_size, PipelineDirection pipeline_direction = PipelineDirection::PULL, bool is_last_copy_element = false,
+        std::shared_ptr<AsyncPipeline> async_pipeline = nullptr);
     static Expected<std::shared_ptr<PostInferElement>> create(const hailo_3d_image_shape_t &src_image_shape, const hailo_format_t &src_format,
         const hailo_3d_image_shape_t &dst_image_shape, const hailo_format_t &dst_format, const std::vector<hailo_quant_info_t> &dst_quant_info, const hailo_nms_info_t &nms_info,
         const std::string &name, const hailo_vstream_params_t &vstream_params, std::shared_ptr<std::atomic<hailo_status>> pipeline_status, EventPtr shutdown_event,
-        PipelineDirection pipeline_direction = PipelineDirection::PULL, bool is_last_copy_element = false);
+        PipelineDirection pipeline_direction = PipelineDirection::PULL, bool is_last_copy_element = false,
+        std::shared_ptr<AsyncPipeline> async_pipeline = nullptr);
     static Expected<std::shared_ptr<PostInferElement>> create(const hailo_3d_image_shape_t &src_image_shape,
         const hailo_format_t &src_format, const hailo_3d_image_shape_t &dst_image_shape, const hailo_format_t &dst_format,
         const std::vector<hailo_quant_info_t> &dst_quant_infos, const hailo_nms_info_t &nms_info, const std::string &name,
-        const ElementBuildParams &build_params, PipelineDirection pipeline_direction = PipelineDirection::PULL, bool is_last_copy_element = false);
+        const ElementBuildParams &build_params, PipelineDirection pipeline_direction = PipelineDirection::PULL, bool is_last_copy_element = false,
+        std::shared_ptr<AsyncPipeline> async_pipeline = nullptr);
     PostInferElement(std::unique_ptr<OutputTransformContext> &&transform_context, const std::string &name,
         DurationCollector &&duration_collector, std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, BufferPoolPtr buffer_pool,
-        std::chrono::milliseconds timeout, PipelineDirection pipeline_direction = PipelineDirection::PULL);
+        std::chrono::milliseconds timeout, PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline);
     virtual ~PostInferElement() = default;
     virtual hailo_status run_push(PipelineBuffer &&buffer, const PipelinePad &sink) override;
     virtual Expected<PipelineBuffer> run_pull(PipelineBuffer &&optional, const PipelinePad &source) override;
@@ -418,13 +416,14 @@ public:
     static Expected<std::shared_ptr<ConvertNmsToDetectionsElement>> create(const hailo_nms_info_t &nms_info, const std::string &name,
         hailo_pipeline_elem_stats_flags_t elem_flags, std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
         std::chrono::milliseconds timeout, hailo_vstream_stats_flags_t vstream_flags, EventPtr shutdown_event,
-        size_t buffer_pool_size, PipelineDirection pipeline_direction = PipelineDirection::PULL, bool is_last_copy_element = false);
+        size_t buffer_pool_size, PipelineDirection pipeline_direction = PipelineDirection::PULL, bool is_last_copy_element = false,
+        std::shared_ptr<AsyncPipeline> async_pipeline = nullptr);
     static Expected<std::shared_ptr<ConvertNmsToDetectionsElement>> create(
         const hailo_nms_info_t &nms_info, const std::string &name, const ElementBuildParams &build_params,
-        PipelineDirection pipeline_direction = PipelineDirection::PULL, bool is_last_copy_element = false);
+        PipelineDirection pipeline_direction = PipelineDirection::PULL, bool is_last_copy_element = false, std::shared_ptr<AsyncPipeline> async_pipeline = nullptr);
     ConvertNmsToDetectionsElement(const hailo_nms_info_t &&nms_info, const std::string &name, DurationCollector &&duration_collector,
         std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, BufferPoolPtr buffer_pool, std::chrono::milliseconds timeout,
-        PipelineDirection pipeline_direction);
+        PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline);
     virtual ~ConvertNmsToDetectionsElement() = default;
     virtual hailo_status run_push(PipelineBuffer &&buffer, const PipelinePad &sink) override;
     virtual PipelinePad &next_pad() override;
@@ -444,18 +443,26 @@ public:
         const hailo_format_t &dst_format, const net_flow::NmsPostProcessConfig nms_config, const std::string &name,
         hailo_pipeline_elem_stats_flags_t elem_flags, std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
         std::chrono::milliseconds timeout, hailo_vstream_stats_flags_t vstream_flags, EventPtr shutdown_event,
-        size_t buffer_pool_size, PipelineDirection pipeline_direction = PipelineDirection::PULL, bool is_last_copy_element = false);
+        size_t buffer_pool_size, PipelineDirection pipeline_direction = PipelineDirection::PULL, bool is_last_copy_element = false,
+        std::shared_ptr<AsyncPipeline> async_pipeline = nullptr);
     static Expected<std::shared_ptr<FillNmsFormatElement>> create(const hailo_nms_info_t nms_info,
         const hailo_format_t &dst_format, const net_flow::NmsPostProcessConfig nms_config, const std::string &name,
-        const ElementBuildParams &build_params, PipelineDirection pipeline_direction = PipelineDirection::PULL, bool is_last_copy_element = false);
+        const ElementBuildParams &build_params, PipelineDirection pipeline_direction = PipelineDirection::PULL, bool is_last_copy_element = false,
+        std::shared_ptr<AsyncPipeline> async_pipeline = nullptr);
     FillNmsFormatElement(const net_flow::NmsPostProcessConfig &&nms_config, const std::string &name, DurationCollector &&duration_collector,
         std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, BufferPoolPtr buffer_pool, std::chrono::milliseconds timeout,
-        PipelineDirection pipeline_direction);
+        PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline);
     virtual ~FillNmsFormatElement() = default;
     virtual hailo_status run_push(PipelineBuffer &&buffer, const PipelinePad &sink) override;
     virtual PipelinePad &next_pad() override;
     virtual std::string description() const override;
 
+    virtual hailo_status set_nms_max_proposals_per_class(uint32_t max_proposals_per_class) override
+    {
+        m_nms_config.max_proposals_per_class = max_proposals_per_class;
+        return HAILO_SUCCESS;
+    }
+
 protected:
     virtual Expected<PipelineBuffer> action(PipelineBuffer &&input, PipelineBuffer &&optional) override;
 
@@ -470,13 +477,14 @@ public:
         const std::string &name, hailo_pipeline_elem_stats_flags_t elem_flags,
         std::shared_ptr<std::atomic<hailo_status>> pipeline_status, size_t buffer_pool_size, std::chrono::milliseconds timeout,
         hailo_vstream_stats_flags_t vstream_flags, EventPtr shutdown_event, PipelineDirection pipeline_direction = PipelineDirection::PULL,
-        bool is_last_copy_element = false);
+        bool is_last_copy_element = false, std::shared_ptr<AsyncPipeline> async_pipeline = nullptr);
     static Expected<std::shared_ptr<ArgmaxPostProcessElement>> create(std::shared_ptr<net_flow::Op> argmax_op,
         const std::string &name, const ElementBuildParams &build_params, PipelineDirection pipeline_direction = PipelineDirection::PULL,
-        bool is_last_copy_element = false);
+        bool is_last_copy_element = false, std::shared_ptr<AsyncPipeline> async_pipeline = nullptr);
     ArgmaxPostProcessElement(std::shared_ptr<net_flow::Op> argmax_op, const std::string &name,
         DurationCollector &&duration_collector, std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
-        std::chrono::milliseconds timeout, BufferPoolPtr buffer_pool, PipelineDirection pipeline_direction = PipelineDirection::PULL);
+        std::chrono::milliseconds timeout, BufferPoolPtr buffer_pool, PipelineDirection pipeline_direction,
+        std::shared_ptr<AsyncPipeline> async_pipeline);
     virtual ~ArgmaxPostProcessElement() = default;
     virtual hailo_status run_push(PipelineBuffer &&buffer, const PipelinePad &sink) override;
     virtual Expected<PipelineBuffer> run_pull(PipelineBuffer &&optional, const PipelinePad &source) override;
@@ -497,13 +505,15 @@ public:
         const std::string &name, hailo_pipeline_elem_stats_flags_t elem_flags,
         std::shared_ptr<std::atomic<hailo_status>> pipeline_status, size_t buffer_pool_size, std::chrono::milliseconds timeout,
         hailo_vstream_stats_flags_t vstream_flags, EventPtr shutdown_event,
-        PipelineDirection pipeline_direction = PipelineDirection::PULL, bool is_last_copy_element = false);
+        PipelineDirection pipeline_direction = PipelineDirection::PULL, bool is_last_copy_element = false,
+        std::shared_ptr<AsyncPipeline> async_pipeline = nullptr);
     static Expected<std::shared_ptr<SoftmaxPostProcessElement>> create(std::shared_ptr<net_flow::Op> softmax_op,
         const std::string &name, const ElementBuildParams &build_params, PipelineDirection pipeline_direction = PipelineDirection::PULL,
-        bool is_last_copy_element = false);
+        bool is_last_copy_element = false, std::shared_ptr<AsyncPipeline> async_pipeline = nullptr);
     SoftmaxPostProcessElement(std::shared_ptr<net_flow::Op> softmax_op, const std::string &name,
         DurationCollector &&duration_collector, std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
-        std::chrono::milliseconds timeout, BufferPoolPtr buffer_pool, PipelineDirection pipeline_direction = PipelineDirection::PULL);
+        std::chrono::milliseconds timeout, BufferPoolPtr buffer_pool, PipelineDirection pipeline_direction,
+        std::shared_ptr<AsyncPipeline> async_pipeline);
     virtual ~SoftmaxPostProcessElement() = default;
     virtual Expected<PipelineBuffer> run_pull(PipelineBuffer &&optional, const PipelinePad &source) override;
     virtual hailo_status run_push(PipelineBuffer &&buffer, const PipelinePad &sink) override;
@@ -523,16 +533,20 @@ public:
     static Expected<std::shared_ptr<NmsPostProcessMuxElement>> create(std::shared_ptr<net_flow::Op> nms_op,
         const std::string &name, std::chrono::milliseconds timeout, size_t buffer_pool_size,
         hailo_pipeline_elem_stats_flags_t elem_flags, hailo_vstream_stats_flags_t vstream_flags, EventPtr shutdown_event,
-        std::shared_ptr<std::atomic<hailo_status>> pipeline_status, PipelineDirection pipeline_direction = PipelineDirection::PULL, bool is_last_copy_element = false);
+        std::shared_ptr<std::atomic<hailo_status>> pipeline_status, PipelineDirection pipeline_direction = PipelineDirection::PULL, bool is_last_copy_element = false,
+        std::shared_ptr<AsyncPipeline> async_pipeline = nullptr);
     static Expected<std::shared_ptr<NmsPostProcessMuxElement>> create(std::shared_ptr<net_flow::Op> nms_op,
-        const std::string &name, const ElementBuildParams &build_params, PipelineDirection pipeline_direction = PipelineDirection::PULL, bool is_last_copy_element = false);
+        const std::string &name, const ElementBuildParams &build_params, PipelineDirection pipeline_direction = PipelineDirection::PULL, bool is_last_copy_element = false,
+        std::shared_ptr<AsyncPipeline> async_pipeline = nullptr);
     static Expected<std::shared_ptr<NmsPostProcessMuxElement>> create(std::shared_ptr<net_flow::Op> nms_op,
         const std::string &name, const hailo_vstream_params_t &vstream_params,
         EventPtr shutdown_event, std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
-        PipelineDirection pipeline_direction = PipelineDirection::PULL, bool is_last_copy_element = false);
+        PipelineDirection pipeline_direction = PipelineDirection::PULL, bool is_last_copy_element = false,
+        std::shared_ptr<AsyncPipeline> async_pipeline = nullptr);
     NmsPostProcessMuxElement(std::shared_ptr<net_flow::Op> nms_op, BufferPoolPtr &&pool, const std::string &name,
         std::chrono::milliseconds timeout, DurationCollector &&duration_collector,
-        std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, PipelineDirection pipeline_direction);
+        std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, PipelineDirection pipeline_direction,
+        std::shared_ptr<AsyncPipeline> async_pipeline);
 
     virtual std::vector<AccumulatorPtr> get_queue_size_accumulators() override;
     void add_sink_name(const std::string &name) // TODO: remove this (HRT-8875)
@@ -542,6 +556,33 @@ public:
 
     std::shared_ptr<net_flow::Op> get_op() { return m_nms_op; }
 
+    virtual hailo_status set_nms_score_threshold(float32_t threshold)
+    {
+        auto nms_metadata = std::dynamic_pointer_cast<net_flow::NmsOpMetadata>(get_op()->metadata());
+        assert(nullptr != nms_metadata);
+        nms_metadata->nms_config().nms_score_th = threshold;
+
+        return HAILO_SUCCESS;
+    }
+
+    virtual hailo_status set_nms_iou_threshold(float32_t threshold)
+    {
+        auto nms_metadata = std::dynamic_pointer_cast<net_flow::NmsOpMetadata>(get_op()->metadata());
+        assert(nullptr != nms_metadata);
+        nms_metadata->nms_config().nms_iou_th = threshold;
+
+        return HAILO_SUCCESS;
+    }
+
+    virtual hailo_status set_nms_max_proposals_per_class(uint32_t max_proposals_per_class)
+    {
+        auto nms_metadata = std::dynamic_pointer_cast<net_flow::NmsOpMetadata>(get_op()->metadata());
+        assert(nullptr != nms_metadata);
+        nms_metadata->nms_config().max_proposals_per_class = max_proposals_per_class;
+
+        return HAILO_SUCCESS;
+    }
+
 protected:
     virtual Expected<PipelineBuffer> action(std::vector<PipelineBuffer> &&inputs, PipelineBuffer &&optional) override;
 
@@ -556,16 +597,17 @@ public:
     static Expected<std::shared_ptr<NmsMuxElement>> create(const std::vector<hailo_nms_info_t> &nms_infos,
         const std::string &name, std::chrono::milliseconds timeout, size_t buffer_pool_size, hailo_pipeline_elem_stats_flags_t elem_flags,
         hailo_vstream_stats_flags_t vstream_flags, EventPtr shutdown_event, std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
-        PipelineDirection pipeline_direction = PipelineDirection::PULL, bool is_last_copy_element = false);
+        PipelineDirection pipeline_direction = PipelineDirection::PULL, bool is_last_copy_element = false, std::shared_ptr<AsyncPipeline> async_pipeline = nullptr);
     static Expected<std::shared_ptr<NmsMuxElement>> create(const std::vector<hailo_nms_info_t> &nms_infos, const std::string &name,
         const hailo_vstream_params_t &vstream_params, EventPtr shutdown_event, std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
-        PipelineDirection pipeline_direction = PipelineDirection::PULL, bool is_last_copy_element = false);
+        PipelineDirection pipeline_direction = PipelineDirection::PULL, bool is_last_copy_element = false,
+        std::shared_ptr<AsyncPipeline> async_pipeline = nullptr);
     static Expected<std::shared_ptr<NmsMuxElement>> create(const std::vector<hailo_nms_info_t> &nms_infos,
         const std::string &name, const ElementBuildParams &build_params, PipelineDirection pipeline_direction = PipelineDirection::PULL,
-        bool is_last_copy_element = false);
+        bool is_last_copy_element = false, std::shared_ptr<AsyncPipeline> async_pipeline = nullptr);
     NmsMuxElement(const std::vector<hailo_nms_info_t> &nms_infos, const hailo_nms_info_t &fused_nms_info, BufferPoolPtr &&pool, const std::string &name,
         std::chrono::milliseconds timeout, DurationCollector &&duration_collector, std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
-        PipelineDirection pipeline_direction = PipelineDirection::PULL);
+        PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline);
     const hailo_nms_info_t &get_fused_nms_info() const;
 
     virtual std::vector<AccumulatorPtr> get_queue_size_accumulators() override;
@@ -584,13 +626,14 @@ public:
     static Expected<std::shared_ptr<TransformDemuxElement>> create(std::shared_ptr<OutputDemuxer> demuxer,
         const std::string &name, std::chrono::milliseconds timeout, size_t buffer_pool_size, hailo_pipeline_elem_stats_flags_t elem_flags,
         hailo_vstream_stats_flags_t vstream_flags, EventPtr shutdown_event, std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
-        PipelineDirection pipeline_direction = PipelineDirection::PULL);
+        PipelineDirection pipeline_direction = PipelineDirection::PULL, bool is_last_copy_element = false,
+        std::shared_ptr<AsyncPipeline> async_pipeline = nullptr);
     static Expected<std::shared_ptr<TransformDemuxElement>> create(std::shared_ptr<OutputDemuxer> demuxer,
-        const std::string &name, const ElementBuildParams &build_params, PipelineDirection pipeline_direction = PipelineDirection::PULL);
+        const std::string &name, const ElementBuildParams &build_params, PipelineDirection pipeline_direction = PipelineDirection::PULL,
+        bool is_last_copy_element = false, std::shared_ptr<AsyncPipeline> async_pipeline = nullptr);
     TransformDemuxElement(std::shared_ptr<OutputDemuxer> demuxer, std::vector<BufferPoolPtr> &&pools, const std::string &name,
         std::chrono::milliseconds timeout, DurationCollector &&duration_collector, std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
-        PipelineDirection pipeline_direction);
-
+        PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline);
     virtual std::vector<AccumulatorPtr> get_queue_size_accumulators() override;
 
 protected:
@@ -605,23 +648,28 @@ class PixBufferElement : public BaseDemuxElement
 public:
     static Expected<std::shared_ptr<PixBufferElement>> create(const std::string &name,
         std::chrono::milliseconds timeout, DurationCollector &&duration_collector,
-        std::shared_ptr<std::atomic<hailo_status>> pipeline_status, size_t sources_count, hailo_format_order_t order);
+        std::shared_ptr<std::atomic<hailo_status>> pipeline_status, hailo_format_order_t order,
+        std::shared_ptr<AsyncPipeline> async_pipeline = nullptr);
 
     PixBufferElement(const std::string &name, std::chrono::milliseconds timeout, DurationCollector &&duration_collector,
-        std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, size_t sources_count, hailo_format_order_t order);
+        std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, hailo_format_order_t order,
+        std::shared_ptr<AsyncPipeline> async_pipeline);
+
+    virtual Expected<bool> can_push_buffer_upstream(const std::string &pad_name) override;
 
 protected:
     virtual Expected<std::vector<PipelineBuffer>> action(PipelineBuffer &&input);
     hailo_format_order_t m_order;
 };
 
+
 class HwReadElement : public SourceElement
 {
 public:
-    static Expected<std::shared_ptr<HwReadElement>> create(std::shared_ptr<OutputStream> stream, const std::string &name, std::chrono::milliseconds timeout,
+    static Expected<std::shared_ptr<HwReadElement>> create(std::shared_ptr<OutputStreamBase> stream, const std::string &name, std::chrono::milliseconds timeout,
         size_t buffer_pool_size, hailo_pipeline_elem_stats_flags_t elem_flags, hailo_vstream_stats_flags_t vstream_flags, EventPtr shutdown_event,
         std::shared_ptr<std::atomic<hailo_status>> pipeline_status, PipelineDirection pipeline_direction = PipelineDirection::PULL);
-    HwReadElement(std::shared_ptr<OutputStream> stream, BufferPoolPtr buffer_pool, const std::string &name, std::chrono::milliseconds timeout,
+    HwReadElement(std::shared_ptr<OutputStreamBase> stream, BufferPoolPtr buffer_pool, const std::string &name, std::chrono::milliseconds timeout,
         DurationCollector &&duration_collector, EventPtr shutdown_event, std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
         PipelineDirection pipeline_direction);
     virtual ~HwReadElement() = default;
@@ -643,7 +691,7 @@ public:
     virtual std::string description() const override;
 
 private:
-    std::shared_ptr<OutputStream> m_stream;
+    std::shared_ptr<OutputStreamBase> m_stream;
     BufferPoolPtr m_pool;
     std::chrono::milliseconds m_timeout;
     EventPtr m_shutdown_event;
@@ -653,10 +701,10 @@ private:
 class HwWriteElement : public SinkElement
 {
 public:
-    static Expected<std::shared_ptr<HwWriteElement>> create(std::shared_ptr<InputStream> stream, const std::string &name,
+    static Expected<std::shared_ptr<HwWriteElement>> create(std::shared_ptr<InputStreamBase> stream, const std::string &name,
         hailo_pipeline_elem_stats_flags_t elem_flags, std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
         PipelineDirection pipeline_direction = PipelineDirection::PUSH);
-    HwWriteElement(std::shared_ptr<InputStream> stream, const std::string &name, DurationCollector &&duration_collector,
+    HwWriteElement(std::shared_ptr<InputStreamBase> stream, const std::string &name, DurationCollector &&duration_collector,
         std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, EventPtr got_flush_event, PipelineDirection pipeline_direction);
     virtual ~HwWriteElement() = default;
 
@@ -674,7 +722,7 @@ public:
     virtual std::string description() const override;
 
 private:
-    std::shared_ptr<InputStream> m_stream;
+    std::shared_ptr<InputStreamBase> m_stream;
     EventPtr m_got_flush_event;
 };
 
@@ -683,12 +731,12 @@ class LastAsyncElement : public SinkElement
 public:
     static Expected<std::shared_ptr<LastAsyncElement>> create(const std::string &name,
         hailo_pipeline_elem_stats_flags_t elem_flags, std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
-        PipelineDirection pipeline_direction = PipelineDirection::PUSH);
-    static Expected<std::shared_ptr<LastAsyncElement>> create(const std::string &name,
-        const ElementBuildParams &build_params, PipelineDirection pipeline_direction = PipelineDirection::PUSH);
+        std::shared_ptr<AsyncPipeline> async_pipeline, PipelineDirection pipeline_direction = PipelineDirection::PUSH);
+    static Expected<std::shared_ptr<LastAsyncElement>> create(const std::string &name, const ElementBuildParams &build_params,
+        std::shared_ptr<AsyncPipeline> async_pipeline, PipelineDirection pipeline_direction = PipelineDirection::PUSH);
     LastAsyncElement(const std::string &name, DurationCollector &&duration_collector,
         std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
-        PipelineDirection pipeline_direction);
+        PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline);
     virtual ~LastAsyncElement() = default;
 
     virtual void run_push_async(PipelineBuffer &&buffer, const PipelinePad &sink) override;
@@ -699,21 +747,31 @@ public:
     virtual hailo_status execute_wait_for_finish() override;
 
     virtual hailo_status enqueue_execution_buffer(MemoryView mem_view, const TransferDoneCallbackAsyncInfer &exec_done, const std::string &source_name) override;
-    virtual Expected<bool> are_buffer_pools_full() override;
-    virtual hailo_status fill_buffer_pools(bool is_dma_able) override;
+
+    virtual Expected<bool> can_push_buffer_upstream(const uint32_t source_index) override;
+    virtual hailo_status fill_buffer_pool(bool is_dma_able, size_t num_of_buffers, const uint32_t source_index) override;
+    virtual Expected<bool> can_push_buffer_upstream(const std::string &source_name) override;
+    virtual hailo_status fill_buffer_pool(bool is_dma_able, size_t num_of_buffers, const std::string &source_name) override;
+
+    virtual hailo_status execute_post_deactivate(bool /*should_clear_abort*/) override { return HAILO_SUCCESS; };
+    virtual hailo_status execute_deactivate() override { return HAILO_SUCCESS; };
 };
 
 // Note: This element does infer - it sends writes to HW and reads the outputs
-class AsyncHwElement : public PipelineElement
+class AsyncHwElement : public PipelineElementInternal
 {
 public:
-    static Expected<std::shared_ptr<AsyncHwElement>> create(const std::vector<std::shared_ptr<InputStream>> &input_streams,
-        const std::vector<std::shared_ptr<OutputStream>> &output_streams, std::chrono::milliseconds timeout, size_t buffer_pool_size, hailo_pipeline_elem_stats_flags_t elem_flags,
-        hailo_vstream_stats_flags_t vstream_flags, EventPtr shutdown_event, const std::string &name, std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
-        PipelineDirection pipeline_direction = PipelineDirection::PUSH, bool is_last_copy_element = false);
-    AsyncHwElement(const std::vector<std::shared_ptr<InputStream>> &input_streams, const std::vector<std::shared_ptr<OutputStream>> &output_streams,
-        std::chrono::milliseconds timeout, std::unordered_map<std::string, BufferPoolPtr> &&output_streams_pools, const std::string &name,
-        DurationCollector &&duration_collector, std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, PipelineDirection pipeline_direction);
+    static Expected<std::shared_ptr<AsyncHwElement>> create(const std::unordered_map<std::string, hailo_stream_info_t> &named_stream_infos,
+        std::chrono::milliseconds timeout, size_t buffer_pool_size, hailo_pipeline_elem_stats_flags_t elem_flags,
+        hailo_vstream_stats_flags_t vstream_flags, EventPtr shutdown_event, const std::string &name,
+        std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
+        std::shared_ptr<ConfiguredNetworkGroup> net_group, PipelineDirection pipeline_direction = PipelineDirection::PUSH,
+        bool is_last_copy_element = false, std::shared_ptr<AsyncPipeline> async_pipeline = nullptr);
+    AsyncHwElement(const std::unordered_map<std::string, hailo_stream_info_t> &named_stream_infos, std::chrono::milliseconds timeout,
+        std::vector<BufferPoolPtr> &&output_streams_pools, const std::string &name, DurationCollector &&duration_collector,
+        std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, PipelineDirection pipeline_direction,
+        std::shared_ptr<AsyncPipeline> async_pipeline, std::shared_ptr<ConfiguredNetworkGroup> net_group,
+        const size_t max_ongoing_transfers);
     virtual ~AsyncHwElement() = default;
 
     virtual void run_push_async(PipelineBuffer &&buffer, const PipelinePad &sink) override;
@@ -721,24 +779,31 @@ public:
     virtual Expected<PipelineBuffer> run_pull(PipelineBuffer &&optional, const PipelinePad &source) override;
 
     virtual hailo_status enqueue_execution_buffer(MemoryView mem_view, const TransferDoneCallbackAsyncInfer &exec_done, const std::string &source_name) override;
-    virtual Expected<bool> are_buffer_pools_full() override;
-    virtual hailo_status fill_buffer_pools(bool is_dma_able) override;
+    virtual Expected<bool> can_push_buffer_upstream(const uint32_t source_index) override;
+    virtual hailo_status fill_buffer_pool(bool is_dma_able, size_t num_of_buffers, const uint32_t source_index) override;
+    virtual Expected<bool> can_push_buffer_upstream(const std::string &source_name) override;
+    virtual hailo_status fill_buffer_pool(bool is_dma_able, size_t num_of_buffers, const std::string &source_name) override;
 
     Expected<uint32_t> get_source_index_from_output_stream_name(const std::string &output_stream_name);
     Expected<uint32_t> get_sink_index_from_input_stream_name(const std::string &input_stream_name);
+    virtual Expected<uint32_t> get_source_index_from_source_name(const std::string &source_name) override;
 
 protected:
     virtual std::vector<PipelinePad*> execution_pads() override;
+    virtual hailo_status execute_terminate(hailo_status error_status) override;
 
 private:
-    void read_async_on_all_streams();
     void handle_error_in_hw_async_elem(hailo_status error_status);
     bool has_all_sinks_arrived();
+    virtual hailo_status execute_dequeue_user_buffers(hailo_status error_status) override;
 
     std::chrono::milliseconds m_timeout;
-    std::unordered_map<std::string, BufferPoolPtr> m_output_streams_pools;
-    std::unordered_map<std::string, std::shared_ptr<InputStream>> m_sink_name_to_input;
-    std::unordered_map<std::string, std::shared_ptr<OutputStream>> m_source_name_to_output;
+    std::vector<BufferPoolPtr> m_pools;
+    std::shared_ptr<ConfiguredNetworkGroup> m_net_group;
+    size_t m_max_ongoing_transfers;
+
+    std::unordered_map<std::string, std::string> m_sink_name_to_stream_name;
+    std::unordered_map<std::string, std::string> m_source_name_to_stream_name;
     std::unordered_map<std::string, bool> m_sink_has_arrived;
     std::unordered_map<std::string, PipelineBuffer> m_input_buffers;
     std::mutex m_mutex;
@@ -751,9 +816,9 @@ class CopyBufferElement : public FilterElement
 {
 public:
     static Expected<std::shared_ptr<CopyBufferElement>> create(const std::string &name, std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
-        std::chrono::milliseconds timeout, PipelineDirection pipeline_direction = PipelineDirection::PULL);
+        std::chrono::milliseconds timeout, PipelineDirection pipeline_direction = PipelineDirection::PULL, std::shared_ptr<AsyncPipeline> async_pipeline = nullptr);
     CopyBufferElement(const std::string &name, DurationCollector &&duration_collector, std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
-        std::chrono::milliseconds timeout, PipelineDirection pipeline_direction);
+        std::chrono::milliseconds timeout, PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline);
     virtual ~CopyBufferElement() = default;
     virtual PipelinePad &next_pad() override;
 
@@ -764,9 +829,9 @@ protected:
 class VStreamsBuilderUtils
 {
 public:
-    static Expected<std::vector<InputVStream>> create_inputs(std::vector<std::shared_ptr<InputStream>> input_streams, const hailo_vstream_info_t &input_vstream_infos,
+    static Expected<std::vector<InputVStream>> create_inputs(std::vector<std::shared_ptr<InputStreamBase>> input_streams, const hailo_vstream_info_t &input_vstream_infos,
         const hailo_vstream_params_t &vstreams_params);
-    static Expected<std::vector<OutputVStream>> create_outputs(std::shared_ptr<OutputStream> output_stream,
+    static Expected<std::vector<OutputVStream>> create_outputs(std::shared_ptr<OutputStreamBase> output_stream,
         NameToVStreamParamsMap &vstreams_params_map, const std::map<std::string, hailo_vstream_info_t> &output_vstream_infos);
     static InputVStream create_input(std::shared_ptr<InputVStreamInternal> input_vstream);
     static OutputVStream create_output(std::shared_ptr<OutputVStreamInternal> output_vstream);
@@ -781,57 +846,57 @@ public:
         hailo_vstream_params_t vstreams_params,
         const std::map<std::string, hailo_vstream_info_t> &output_vstream_infos,
         const std::shared_ptr<hailort::net_flow::Op> &nms_op);
-    static Expected<std::shared_ptr<HwReadElement>> add_hw_read_element(std::shared_ptr<OutputStream> &output_stream,
+    static Expected<std::shared_ptr<HwReadElement>> add_hw_read_element(std::shared_ptr<OutputStreamBase> &output_stream,
         std::shared_ptr<std::atomic<hailo_status>> &pipeline_status, std::vector<std::shared_ptr<PipelineElement>> &elements,
         const std::string &element_name, EventPtr &shutdown_event, size_t buffer_pool_size,
         const hailo_pipeline_elem_stats_flags_t &hw_read_element_stats_flags, const hailo_vstream_stats_flags_t &hw_read_stream_stats_flags);
 
-    static Expected<std::shared_ptr<PullQueueElement>> add_pull_queue_element(std::shared_ptr<OutputStream> &output_stream,
+    static Expected<std::shared_ptr<PullQueueElement>> add_pull_queue_element(std::shared_ptr<OutputStreamBase> &output_stream,
         std::shared_ptr<std::atomic<hailo_status>> &pipeline_status, std::vector<std::shared_ptr<PipelineElement>> &elements,
         const std::string &element_name, EventPtr &shutdown_event, const hailo_vstream_params_t &vstream_params);
 
     // Move all post-processes related elements to a dedicated model - HRT-11512
-    static Expected<std::shared_ptr<ArgmaxPostProcessElement>> add_argmax_element(std::shared_ptr<OutputStream> &output_stream,
+    static Expected<std::shared_ptr<ArgmaxPostProcessElement>> add_argmax_element(std::shared_ptr<OutputStreamBase> &output_stream,
         std::shared_ptr<std::atomic<hailo_status>> &pipeline_status, std::vector<std::shared_ptr<PipelineElement>> &elements,
         const std::string &element_name, hailo_vstream_params_t &vstream_params, const net_flow::PostProcessOpMetadataPtr &argmax_op,
         size_t buffer_pool_size, std::chrono::milliseconds timeout, const hailo_vstream_stats_flags_t &vstream_flags,
         EventPtr &shutdown_event);
 
-    static Expected<std::shared_ptr<SoftmaxPostProcessElement>> add_softmax_element(std::shared_ptr<OutputStream> &output_stream,
+    static Expected<std::shared_ptr<SoftmaxPostProcessElement>> add_softmax_element(std::shared_ptr<OutputStreamBase> &output_stream,
         std::shared_ptr<std::atomic<hailo_status>> &pipeline_status, std::vector<std::shared_ptr<PipelineElement>> &elements,
         const std::string &element_name, hailo_vstream_params_t &vstream_params, const net_flow::PostProcessOpMetadataPtr &softmax_op,
         size_t buffer_pool_size, std::chrono::milliseconds timeout, const hailo_vstream_stats_flags_t &vstream_flags,
         EventPtr &shutdown_event);
 
-    static Expected<std::shared_ptr<ConvertNmsToDetectionsElement>> add_nms_to_detections_convert_element(std::shared_ptr<OutputStream> &output_stream,
+    static Expected<std::shared_ptr<ConvertNmsToDetectionsElement>> add_nms_to_detections_convert_element(std::shared_ptr<OutputStreamBase> &output_stream,
         std::shared_ptr<std::atomic<hailo_status>> &pipeline_status, std::vector<std::shared_ptr<PipelineElement>> &elements, const std::string &element_name,
         hailo_vstream_params_t &vstream_params, const net_flow::PostProcessOpMetadataPtr &iou_op_metadata, size_t buffer_pool_size, std::chrono::milliseconds timeout,
         const hailo_vstream_stats_flags_t &vstream_flags, EventPtr &shutdown_event);
 
-    static Expected<std::shared_ptr<RemoveOverlappingBboxesElement>> add_remove_overlapping_bboxes_element(std::shared_ptr<OutputStream> &output_stream,
+    static Expected<std::shared_ptr<RemoveOverlappingBboxesElement>> add_remove_overlapping_bboxes_element(std::shared_ptr<OutputStreamBase> &output_stream,
         std::shared_ptr<std::atomic<hailo_status>> &pipeline_status, std::vector<std::shared_ptr<PipelineElement>> &elements,
         const std::string &element_name, hailo_vstream_params_t &vstream_params, const net_flow::PostProcessOpMetadataPtr &iou_op_metadata,
         size_t buffer_pool_size, std::chrono::milliseconds timeout, const hailo_vstream_stats_flags_t &vstream_flags, EventPtr &shutdown_event);
 
-    static Expected<std::shared_ptr<FillNmsFormatElement>> add_fill_nms_format_element(std::shared_ptr<OutputStream> &output_stream,
+    static Expected<std::shared_ptr<FillNmsFormatElement>> add_fill_nms_format_element(std::shared_ptr<OutputStreamBase> &output_stream,
         std::shared_ptr<std::atomic<hailo_status>> &pipeline_status, std::vector<std::shared_ptr<PipelineElement>> &elements,
         const std::string &element_name, hailo_vstream_params_t &vstream_params, const net_flow::PostProcessOpMetadataPtr &iou_op_metadata,
         size_t buffer_pool_size, std::chrono::milliseconds timeout, const hailo_vstream_stats_flags_t &vstream_flags, EventPtr &shutdown_event);
 
-    static Expected<std::shared_ptr<UserBufferQueueElement>> add_user_buffer_queue_element(std::shared_ptr<OutputStream> &output_stream,
+    static Expected<std::shared_ptr<UserBufferQueueElement>> add_user_buffer_queue_element(std::shared_ptr<OutputStreamBase> &output_stream,
         std::shared_ptr<std::atomic<hailo_status>> &pipeline_status, std::vector<std::shared_ptr<PipelineElement>> &elements,
         const std::string &element_name, EventPtr &shutdown_event, const hailo_vstream_params_t &vstream_params);
 
-    static Expected<std::shared_ptr<PostInferElement>> add_post_infer_element(std::shared_ptr<OutputStream> &output_stream,
+    static Expected<std::shared_ptr<PostInferElement>> add_post_infer_element(std::shared_ptr<OutputStreamBase> &output_stream,
         std::shared_ptr<std::atomic<hailo_status>> &pipeline_status, std::vector<std::shared_ptr<PipelineElement>> &elements,
         const std::string &element_name, const hailo_vstream_params_t &vstream_params, EventPtr shutdown_event);
 
-    static hailo_status add_demux(std::shared_ptr<OutputStream> output_stream, NameToVStreamParamsMap &vstreams_params_map,
+    static hailo_status add_demux(std::shared_ptr<OutputStreamBase> output_stream, NameToVStreamParamsMap &vstreams_params_map,
         std::vector<std::shared_ptr<PipelineElement>> &&elements, std::vector<OutputVStream> &vstreams,
         std::shared_ptr<HwReadElement> hw_read_elem, EventPtr shutdown_event, std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
         const std::map<std::string, hailo_vstream_info_t> &output_vstream_infos);
 
-    static hailo_status handle_pix_buffer_splitter_flow(std::vector<std::shared_ptr<InputStream>> streams,
+    static hailo_status handle_pix_buffer_splitter_flow(std::vector<std::shared_ptr<InputStreamBase>> streams,
         const hailo_vstream_info_t &vstream_info, std::vector<std::shared_ptr<PipelineElement>> &&base_elements,
         std::vector<InputVStream> &vstreams, const hailo_vstream_params_t &vstream_params, EventPtr shutdown_event,
         std::shared_ptr<std::atomic<hailo_status>> pipeline_status, EventPtr &core_op_activated_event,
@@ -850,14 +915,30 @@ public:
 
     static Expected<AccumulatorPtr> create_pipeline_latency_accumulator(const hailo_vstream_params_t &vstreams_params);
 
+    static hailo_format_t expand_user_buffer_format_autos_multi_planar(const hailo_vstream_info_t &vstream_info,
+        const hailo_format_t &user_buffer_format)
+    {
+        /* In multi planar case we compare to vstream_info instead of stream_info,
+            as the ll-streams formats doesnt indicate the format of the vstreams */
+        auto expanded_user_buffer_format = user_buffer_format;
+        if (HAILO_FORMAT_TYPE_AUTO == expanded_user_buffer_format.type) {
+            expanded_user_buffer_format.type = vstream_info.format.type;
+        }
+        if (HAILO_FORMAT_ORDER_AUTO == expanded_user_buffer_format.order) {
+            expanded_user_buffer_format.order = vstream_info.format.order;
+        }
+
+        return expanded_user_buffer_format;
+    }
+
 private:
-    static Expected<std::vector<OutputVStream>> create_output_post_process_argmax(std::shared_ptr<OutputStream> output_stream,
+    static Expected<std::vector<OutputVStream>> create_output_post_process_argmax(std::shared_ptr<OutputStreamBase> output_stream,
         const NameToVStreamParamsMap &vstreams_params_map, const hailo_vstream_info_t &output_vstream_info,
         const net_flow::PostProcessOpMetadataPtr &argmax_op_metadata);
-    static Expected<std::vector<OutputVStream>> create_output_post_process_softmax(std::shared_ptr<OutputStream> output_stream,
+    static Expected<std::vector<OutputVStream>> create_output_post_process_softmax(std::shared_ptr<OutputStreamBase> output_stream,
         const NameToVStreamParamsMap &vstreams_params_map, const hailo_vstream_info_t &output_vstream_info,
         const net_flow::PostProcessOpMetadataPtr &softmax_op_metadata);
-    static Expected<std::vector<OutputVStream>> create_output_post_process_iou(std::shared_ptr<OutputStream> output_stream,
+    static Expected<std::vector<OutputVStream>> create_output_post_process_iou(std::shared_ptr<OutputStreamBase> output_stream,
         hailo_vstream_params_t vstream_params, const net_flow::PostProcessOpMetadataPtr &iou_op_metadata);
 };
 
diff --git a/hailort/libhailort/src/network_group/network_group.cpp b/hailort/libhailort/src/network_group/network_group.cpp
index 715b0c6..dfc5916 100644
--- a/hailort/libhailort/src/network_group/network_group.cpp
+++ b/hailort/libhailort/src/network_group/network_group.cpp
@@ -14,6 +14,7 @@
 
 #include "common/utils.hpp"
 #include "common/runtime_statistics_internal.hpp"
+#include "common/os_utils.hpp"
 
 #include "network_group/network_group_internal.hpp"
 #include "hef/hef_internal.hpp"
@@ -103,6 +104,12 @@ private:
     bool m_is_activated;
 };
 
+ConfiguredNetworkGroup::ConfiguredNetworkGroup() :
+    m_infer_requests_mutex(),
+    m_ongoing_transfers(0),
+    m_cv()
+{}
+
 Expected<std::shared_ptr<ConfiguredNetworkGroup>> ConfiguredNetworkGroup::duplicate_network_group_client(uint32_t ng_handle, uint32_t vdevice_handle,
     const std::string &network_group_name)
 {
@@ -152,6 +159,37 @@ Expected<std::unique_ptr<ActivatedNetworkGroup>> ConfiguredNetworkGroup::activat
     return activate(HailoRTDefaults::get_active_network_group_params());
 }
 
+hailo_status ConfiguredNetworkGroup::wait_for_callbacks_finish()
+{
+    return wait_for_callbacks_to_maintain_below_threshold(1);
+}
+
+hailo_status ConfiguredNetworkGroup::wait_for_callbacks_to_maintain_below_threshold(const size_t threshold)
+{
+    std::unique_lock<std::mutex> lock(m_infer_requests_mutex);
+    bool done = m_cv.wait_for(lock, DEFAULT_TRANSFER_TIMEOUT, [&, threshold](){
+        return (m_ongoing_transfers.load() < threshold);
+    });
+    CHECK(done, HAILO_TIMEOUT, "Got timeout in `wait_for_callbacks_to_maintain_below_threshold`");
+
+    return HAILO_SUCCESS;
+}
+
+void ConfiguredNetworkGroup::decrease_ongoing_callbacks()
+{
+    {
+        std::unique_lock<std::mutex> lock(m_infer_requests_mutex);
+        m_ongoing_transfers--;
+    }
+    m_cv.notify_all();
+}
+
+void ConfiguredNetworkGroup::increase_ongoing_callbacks()
+{
+    std::unique_lock<std::mutex> lock(m_infer_requests_mutex);
+    m_ongoing_transfers++;
+}
+
 Expected<std::unique_ptr<ActivatedNetworkGroup>> ConfiguredNetworkGroupBase::activate(
     const hailo_activate_network_group_params_t &network_group_params)
 {
@@ -256,14 +294,65 @@ Expected<std::vector<net_flow::PostProcessOpMetadataPtr>> ConfiguredNetworkGroup
     return std::vector<net_flow::PostProcessOpMetadataPtr>(m_network_group_metadata.m_ops_metadata);
 }
 
-Expected<LayerInfo> ConfiguredNetworkGroupBase::get_layer_info(const std::string &stream_name)
+Expected<std::unique_ptr<LayerInfo>> ConfiguredNetworkGroupBase::get_layer_info(const std::string &stream_name)
+{
+    auto layer_info = get_core_op()->get_layer_info(stream_name);
+    CHECK_EXPECTED(layer_info);
+    auto res = make_unique_nothrow<LayerInfo>(layer_info.release());
+    CHECK_NOT_NULL_AS_EXPECTED(res, HAILO_OUT_OF_HOST_MEMORY);
+    return res;
+}
+
+Expected<std::shared_ptr<net_flow::NmsOpMetadata>> ConfiguredNetworkGroupBase::get_nms_meta_data(const std::string &edge_name)
+{
+    auto expected_ops_metadata = get_ops_metadata();
+    CHECK_EXPECTED(expected_ops_metadata);
+    auto ops_metadata = expected_ops_metadata.release();
+
+    auto matching_metadata = std::find_if(ops_metadata.begin(), ops_metadata.end(),
+        [&edge_name] (const auto &metadata) {
+            for (const auto &metadata_output_pair : metadata->outputs_metadata()) {
+                if (metadata_output_pair.first == edge_name) {
+                    return true;
+                }
+            }
+            return false;
+        });
+    CHECK_AS_EXPECTED(matching_metadata != ops_metadata.end(), HAILO_INVALID_ARGUMENT,
+        "There is no NMS post-process for '{}'", edge_name);
+    auto nms_metadata = std::dynamic_pointer_cast<net_flow::NmsOpMetadata>(*matching_metadata);
+    CHECK_NOT_NULL_AS_EXPECTED(nms_metadata, HAILO_INVALID_ARGUMENT);
+    return nms_metadata;
+}
+
+hailo_status ConfiguredNetworkGroupBase::set_nms_score_threshold(const std::string &edge_name, float32_t nms_score_threshold)
+{
+    auto expected_nms_op_metadata = get_nms_meta_data(edge_name);
+    CHECK_EXPECTED_AS_STATUS(expected_nms_op_metadata);
+    expected_nms_op_metadata.value()->nms_config().nms_score_th = nms_score_threshold;
+    return HAILO_SUCCESS;
+}
+
+hailo_status ConfiguredNetworkGroupBase::set_nms_iou_threshold(const std::string &edge_name, float32_t iou_threshold)
+{
+    auto expected_nms_op_metadata = get_nms_meta_data(edge_name);
+    CHECK_EXPECTED_AS_STATUS(expected_nms_op_metadata);
+    expected_nms_op_metadata.value()->nms_config().nms_iou_th = iou_threshold;
+    return HAILO_SUCCESS;
+}
+
+hailo_status ConfiguredNetworkGroupBase::set_nms_max_bboxes_per_class(const std::string &edge_name, uint32_t max_bboxes_per_class)
 {
-    return get_core_op()->get_layer_info(stream_name);
+    auto expected_nms_op_metadata = get_nms_meta_data(edge_name);
+    CHECK_EXPECTED_AS_STATUS(expected_nms_op_metadata);
+    expected_nms_op_metadata.value()->nms_config().max_proposals_per_class = max_bboxes_per_class;
+    return HAILO_SUCCESS;
 }
 
 ConfiguredNetworkGroupBase::ConfiguredNetworkGroupBase(
     const ConfigureNetworkParams &config_params, std::vector<std::shared_ptr<CoreOp>> &&core_ops,
     NetworkGroupMetadata &&metadata) :
+        ConfiguredNetworkGroup(),
         m_config_params(config_params),
         m_core_ops(std::move(core_ops)),
         m_network_group_metadata(std::move(metadata)),
@@ -421,6 +510,16 @@ hailo_status ConfiguredNetworkGroupBase::deactivate_impl()
     return get_core_op()->deactivate();
 }
 
+hailo_status ConfiguredNetworkGroupBase::shutdown()
+{
+    std::unique_lock<std::mutex> lock(m_shutdown_mutex);
+    if (!m_is_shutdown) {
+        m_is_shutdown = true;
+        return get_core_op()->shutdown();
+    }
+    return HAILO_SUCCESS;
+}
+
 Expected<std::vector<std::vector<std::string>>> ConfiguredNetworkGroupBase::get_output_vstream_groups()
 {
     std::vector<std::vector<std::string>> results;
@@ -435,9 +534,9 @@ Expected<std::vector<std::vector<std::string>>> ConfiguredNetworkGroupBase::get_
 }
 
 Expected<std::vector<std::map<std::string, hailo_vstream_params_t>>> ConfiguredNetworkGroupBase::make_output_vstream_params_groups(
-    bool quantized, hailo_format_type_t format_type, uint32_t timeout_ms, uint32_t queue_size)
+    bool /*unused*/, hailo_format_type_t format_type, uint32_t timeout_ms, uint32_t queue_size)
 {
-    auto params = make_output_vstream_params(quantized, format_type, timeout_ms, queue_size);
+    auto params = make_output_vstream_params({}, format_type, timeout_ms, queue_size);
     CHECK_EXPECTED(params);
 
     auto groups = get_output_vstream_groups();
@@ -459,27 +558,27 @@ Expected<std::vector<std::map<std::string, hailo_vstream_params_t>>> ConfiguredN
 }
 
 Expected<std::map<std::string, hailo_vstream_params_t>> ConfiguredNetworkGroupBase::make_input_vstream_params(
-    bool quantized, hailo_format_type_t format_type, uint32_t timeout_ms, uint32_t queue_size,
+    bool /*unused*/, hailo_format_type_t format_type, uint32_t timeout_ms, uint32_t queue_size,
     const std::string &network_name)
 {
     auto input_vstream_infos = m_network_group_metadata.get_input_vstream_infos(network_name);
     CHECK_EXPECTED(input_vstream_infos);
 
     std::map<std::string, hailo_vstream_params_t> res;
-    auto status = Hef::Impl::fill_missing_vstream_params_with_default(res, input_vstream_infos.value(), quantized, 
+    auto status = Hef::Impl::fill_missing_vstream_params_with_default(res, input_vstream_infos.value(),
         format_type, timeout_ms, queue_size);
     CHECK_SUCCESS_AS_EXPECTED(status);
     return res;
 }
 
 Expected<std::map<std::string, hailo_vstream_params_t>> ConfiguredNetworkGroupBase::make_output_vstream_params(
-    bool quantized, hailo_format_type_t format_type, uint32_t timeout_ms, uint32_t queue_size,
+    bool /*unused*/, hailo_format_type_t format_type, uint32_t timeout_ms, uint32_t queue_size,
     const std::string &network_name)
 {
     auto output_vstream_infos = m_network_group_metadata.get_output_vstream_infos(network_name);
     CHECK_EXPECTED(output_vstream_infos);
     std::map<std::string, hailo_vstream_params_t> res;
-    auto status = Hef::Impl::fill_missing_vstream_params_with_default(res, output_vstream_infos.value(), quantized, 
+    auto status = Hef::Impl::fill_missing_vstream_params_with_default(res, output_vstream_infos.value(), 
         format_type, timeout_ms, queue_size);
     CHECK_SUCCESS_AS_EXPECTED(status);
     return res;
@@ -533,22 +632,6 @@ static hailo_vstream_params_t expand_vstream_params_autos(const hailo_stream_inf
     return local_vstream_params;
 }
 
-static hailo_vstream_params_t expand_vstream_params_autos_multi_planar(const hailo_vstream_info_t &vstream_info,
-    const hailo_vstream_params_t &vstream_params)
-{
-    /* In multi planar case we compare to vstream_info instead of stream_info,
-        as the ll-streams formats doesnt indicate the format of the vstreams */
-    auto local_vstream_params = vstream_params;
-    if (HAILO_FORMAT_TYPE_AUTO == local_vstream_params.user_buffer_format.type) {
-        local_vstream_params.user_buffer_format.type = vstream_info.format.type;
-    }
-    if (HAILO_FORMAT_ORDER_AUTO == local_vstream_params.user_buffer_format.order) {
-        local_vstream_params.user_buffer_format.order = vstream_info.format.order;
-    }
-
-    return local_vstream_params;
-}
-
 static std::map<std::string, hailo_vstream_info_t> vstream_infos_vector_to_map(std::vector<hailo_vstream_info_t> &&vstream_info_vector)
 {
     std::map<std::string, hailo_vstream_info_t> vstream_infos_map;
@@ -569,9 +652,9 @@ Expected<std::vector<InputVStream>> ConfiguredNetworkGroupBase::create_input_vst
     vstreams.reserve(inputs_params.size());
 
     for (const auto &name_params_pair : inputs_params) {
-        std::vector<std::shared_ptr<InputStream>> streams;
+        std::vector<std::shared_ptr<InputStreamBase>> streams;
         auto &vstream_name = name_params_pair.first;
-        auto &vstream_params = name_params_pair.second;
+        auto vstream_params = name_params_pair.second;
 
         auto stream_names = m_network_group_metadata.get_stream_names_from_vstream_name(vstream_name);
         CHECK_EXPECTED(stream_names);
@@ -588,9 +671,14 @@ Expected<std::vector<InputVStream>> ConfiguredNetworkGroupBase::create_input_vst
             streams.push_back(input_stream);
         }
 
-        auto expanded_vstream_params = (streams.size() > 1) ? expand_vstream_params_autos_multi_planar(vstream_info->second, vstream_params) :
-            expand_vstream_params_autos(streams.back()->get_info(), vstream_params);
-        auto inputs = VStreamsBuilderUtils::create_inputs(streams, vstream_info->second, expanded_vstream_params);
+        if (streams.size() > 1) {
+            auto expanded_user_buffer_format =
+                VStreamsBuilderUtils::expand_user_buffer_format_autos_multi_planar(vstream_info->second, vstream_params.user_buffer_format);
+            vstream_params.user_buffer_format = expanded_user_buffer_format;
+        } else {
+            vstream_params = expand_vstream_params_autos(streams.back()->get_info(), vstream_params);
+        }
+        auto inputs = VStreamsBuilderUtils::create_inputs(streams, vstream_info->second, vstream_params);
         CHECK_EXPECTED(inputs);
 
         vstreams.insert(vstreams.end(), std::make_move_iterator(inputs->begin()), std::make_move_iterator(inputs->end()));
@@ -640,7 +728,6 @@ Expected<std::vector<OutputVStream>> ConfiguredNetworkGroupBase::create_output_v
         vstreams.insert(vstreams.end(), std::make_move_iterator(outputs->begin()), std::make_move_iterator(outputs->end()));
     }
 
-    get_core_op()->set_vstreams_multiplexer_callbacks(vstreams);
     return vstreams;
 }
 
@@ -664,4 +751,82 @@ Expected<Buffer> ConfiguredNetworkGroupBase::get_intermediate_buffer(const Inter
     return get_core_op()->get_intermediate_buffer(key);
 }
 
+Expected<size_t> ConfiguredNetworkGroupBase::get_min_buffer_pool_size()
+{
+    uint32_t buffer_pool_size = UINT32_MAX;
+
+    auto input_streams = get_input_streams();
+    for (const auto &input_stream : input_streams) {
+        auto async_max_queue_size = input_stream.get().get_async_max_queue_size();
+        CHECK_EXPECTED(async_max_queue_size);
+        if (buffer_pool_size > async_max_queue_size.value()) {
+            buffer_pool_size = static_cast<uint32_t>(async_max_queue_size.value());
+        }
+    }
+
+    auto output_streams = get_output_streams();
+    for (const auto &output_stream : output_streams) {
+        auto async_max_queue_size = output_stream.get().get_async_max_queue_size();
+        CHECK_EXPECTED(async_max_queue_size);
+        if (buffer_pool_size > async_max_queue_size.value()) {
+            buffer_pool_size = static_cast<uint32_t>(async_max_queue_size.value());
+        }
+    }
+
+    // TODO (HRT-11294): In some cases, buffer_pool_size is lower then batch_size. we should remove this line.
+    buffer_pool_size = std::max(buffer_pool_size, static_cast<uint32_t>(get_smallest_configured_batch_size(get_config_params())));
+
+    return buffer_pool_size;
+}
+
+hailo_status ConfiguredNetworkGroupBase::infer_async(const NamedBuffersCallbacks &named_buffers_callbacks,
+    const std::function<void(hailo_status)> &infer_request_done_cb)
+{
+    InferRequest infer_request{};
+    const auto dma_able_alignment = OsUtils::get_dma_able_alignment();
+    for (auto &named_buffer_callback : named_buffers_callbacks) {
+        const auto &name = named_buffer_callback.first;
+        const auto &buffer = named_buffer_callback.second.first;
+        const auto &callback = named_buffer_callback.second.second;
+        TransferRequest trans_req{};
+        trans_req.callback = callback;
+        BufferPtr buffer_ptr = nullptr;
+        // TODO (HRT-12239): Avoid this section
+        if (reinterpret_cast<size_t>(buffer.data()) % dma_able_alignment == 0) {
+            auto hailo_buffer = DmaStorage::create_dma_able_buffer_from_user_size(const_cast<uint8_t*>(buffer.data()),
+                buffer.size());
+            CHECK_EXPECTED_AS_STATUS(hailo_buffer);
+            buffer_ptr = hailo_buffer.release();
+        } else {
+            auto hailo_buffer = UserBufferStorage::create_storage_from_user_buffer(const_cast<uint8_t*>(buffer.data()),
+                buffer.size());
+            CHECK_EXPECTED_AS_STATUS(hailo_buffer);
+            buffer_ptr = hailo_buffer.release();
+        }
+        trans_req.transfer_buffers.emplace_back(buffer_ptr);
+        infer_request.transfers.emplace(name, trans_req);
+    }
+    infer_request.callback = [this, infer_request_done_cb](hailo_status status){
+        if (status == HAILO_STREAM_ABORTED_BY_USER) {
+            LOGGER__INFO("Infer request was aborted by user");
+        }
+        else if (status != HAILO_SUCCESS) {
+            LOGGER__ERROR("Infer request callback failed with status = {}", status);
+        }
+
+        infer_request_done_cb(status);
+        decrease_ongoing_callbacks();
+    };
+
+    increase_ongoing_callbacks();
+    auto status = get_core_op()->infer_async(std::move(infer_request));
+    if (status != HAILO_SUCCESS) {
+        // If we got error in `infer_async()`, then the callbacks will not be called.
+        decrease_ongoing_callbacks();
+    }
+    CHECK_SUCCESS(status);
+
+    return HAILO_SUCCESS;
+}
+
 } /* namespace hailort */
diff --git a/hailort/libhailort/src/network_group/network_group_internal.hpp b/hailort/libhailort/src/network_group/network_group_internal.hpp
index 4728f60..98b8f98 100644
--- a/hailort/libhailort/src/network_group/network_group_internal.hpp
+++ b/hailort/libhailort/src/network_group/network_group_internal.hpp
@@ -33,12 +33,9 @@
 #include "common/latency_meter.hpp"
 
 #include "hef/hef_internal.hpp"
-#include "vdma/channel/boundary_channel.hpp"
 #include "core_op/active_core_op_holder.hpp"
 #include "core_op/core_op.hpp"
 
-#include "control_protocol.h"
-
 #ifdef HAILO_SUPPORT_MULTI_PROCESS
 #include "service/hailort_rpc_client.hpp"
 #include "rpc/rpc_definitions.hpp"
@@ -77,6 +74,8 @@ public:
     hailo_status activate_impl(uint16_t dynamic_batch_size = CONTROL_PROTOCOL__IGNORE_DYNAMIC_BATCH_SIZE);
     hailo_status deactivate_impl();
 
+    virtual hailo_status shutdown() override;
+
     virtual const std::string &get_network_group_name() const override;
     virtual const std::string &name() const override;
 
@@ -93,14 +92,14 @@ public:
     virtual Expected<LatencyMeasurementResult> get_latency_measurement(const std::string &network_name="") override;
 
     virtual Expected<std::map<std::string, hailo_vstream_params_t>> make_input_vstream_params(
-        bool quantized, hailo_format_type_t format_type, uint32_t timeout_ms, uint32_t queue_size,
+        bool unused, hailo_format_type_t format_type, uint32_t timeout_ms, uint32_t queue_size,
         const std::string &network_name="") override;
     virtual Expected<std::map<std::string, hailo_vstream_params_t>> make_output_vstream_params(
-        bool quantized, hailo_format_type_t format_type, uint32_t timeout_ms, uint32_t queue_size,
+        bool unused, hailo_format_type_t format_type, uint32_t timeout_ms, uint32_t queue_size,
         const std::string &network_name="") override;
         
     virtual Expected<std::vector<std::map<std::string, hailo_vstream_params_t>>> make_output_vstream_params_groups(
-        bool quantized, hailo_format_type_t format_type, uint32_t timeout_ms, uint32_t queue_size) override;
+        bool unused, hailo_format_type_t format_type, uint32_t timeout_ms, uint32_t queue_size) override;
 
     virtual Expected<std::vector<std::vector<std::string>>> get_output_vstream_groups() override;
 
@@ -118,7 +117,7 @@ public:
     virtual Expected<HwInferResults> run_hw_infer_estimator() override;
 
     // TODO: HRT-9551 - Change to get_core_op_by_name() when multiple core_ops supported
-    std::shared_ptr<CoreOp> get_core_op() const; 
+    std::shared_ptr<CoreOp> get_core_op() const;
     // TODO: HRT-9546 Remove
     const std::shared_ptr<CoreOpMetadata> get_core_op_metadata() const;
 
@@ -132,6 +131,7 @@ public:
 
     virtual Expected<std::vector<InputVStream>> create_input_vstreams(const std::map<std::string, hailo_vstream_params_t> &inputs_params) override;
     virtual Expected<std::vector<OutputVStream>> create_output_vstreams(const std::map<std::string, hailo_vstream_params_t> &outputs_params) override;
+    virtual Expected<size_t> get_min_buffer_pool_size() override;
 
     Expected<std::shared_ptr<InputStreamBase>> get_shared_input_stream_by_name(const std::string &stream_name)
     {
@@ -192,8 +192,18 @@ public:
 
     Expected<Buffer> get_intermediate_buffer(const IntermediateBufferKey &key);
     Expected<OutputStreamPtrVector> get_output_streams_by_vstream_name(const std::string &name);
-    Expected<std::vector<net_flow::PostProcessOpMetadataPtr>> get_ops_metadata();
 
+    virtual hailo_status infer_async(const NamedBuffersCallbacks &named_buffers_callbacks,
+        const std::function<void(hailo_status)> &infer_request_done_cb) override;
+
+    virtual Expected<std::unique_ptr<LayerInfo>> get_layer_info(const std::string &stream_name) override;
+    virtual Expected<std::vector<net_flow::PostProcessOpMetadataPtr>> get_ops_metadata() override;
+
+    virtual hailo_status set_nms_score_threshold(const std::string &edge_name, float32_t nms_score_threshold) override;
+    virtual hailo_status set_nms_iou_threshold(const std::string &edge_name, float32_t iou_threshold) override;
+    virtual hailo_status set_nms_max_bboxes_per_class(const std::string &edge_name, uint32_t max_bboxes_per_class) override;
+
+    Expected<std::shared_ptr<net_flow::NmsOpMetadata>> get_nms_meta_data(const std::string &edge_name);
 private:
     ConfiguredNetworkGroupBase(const ConfigureNetworkParams &config_params,
         std::vector<std::shared_ptr<CoreOp>> &&core_ops, NetworkGroupMetadata &&metadata);
@@ -201,7 +211,6 @@ private:
     static uint16_t get_smallest_configured_batch_size(const ConfigureNetworkParams &config_params);
     hailo_status add_mux_streams_by_edges_names(OutputStreamWithParamsVector &result,
         const std::unordered_map<std::string, hailo_vstream_params_t> &outputs_edges_params);
-    Expected<LayerInfo> get_layer_info(const std::string &stream_name);
 
     hailo_status activate_low_level_streams();
     hailo_status deactivate_low_level_streams();
@@ -209,13 +218,20 @@ private:
     const ConfigureNetworkParams m_config_params;
     std::vector<std::shared_ptr<CoreOp>> m_core_ops;
     NetworkGroupMetadata m_network_group_metadata;
+    bool m_is_shutdown = false;
     bool m_is_forked;
 
+    std::mutex m_shutdown_mutex;
+
     friend class VDeviceCoreOp;
+    friend class PipelineBuilder;
 };
 
 // Move client ng to different header
 #ifdef HAILO_SUPPORT_MULTI_PROCESS
+using NamedBufferCallbackTuple = std::tuple<std::string, MemoryView, std::function<void(hailo_status)>>;
+using NamedBufferCallbackTuplePtr = std::shared_ptr<std::tuple<std::string, MemoryView, std::function<void(hailo_status)>>>;
+
 class ConfiguredNetworkGroupClient : public ConfiguredNetworkGroup
 {
 public:
@@ -244,15 +260,16 @@ public:
     virtual Expected<LatencyMeasurementResult> get_latency_measurement(const std::string &network_name="") override;
     virtual Expected<std::unique_ptr<ActivatedNetworkGroup>> activate(const hailo_activate_network_group_params_t &network_group_params) override;
     virtual hailo_status wait_for_activation(const std::chrono::milliseconds &timeout) override;
+    virtual hailo_status shutdown() override;
 
     virtual Expected<std::map<std::string, hailo_vstream_params_t>> make_input_vstream_params(
-        bool quantized, hailo_format_type_t format_type, uint32_t timeout_ms, uint32_t queue_size,
+        bool unused, hailo_format_type_t format_type, uint32_t timeout_ms, uint32_t queue_size,
         const std::string &network_name="") override;
     virtual Expected<std::map<std::string, hailo_vstream_params_t>> make_output_vstream_params(
-        bool quantized, hailo_format_type_t format_type, uint32_t timeout_ms, uint32_t queue_size,
+        bool unused, hailo_format_type_t format_type, uint32_t timeout_ms, uint32_t queue_size,
         const std::string &network_name="") override;
     virtual Expected<std::vector<std::map<std::string, hailo_vstream_params_t>>> make_output_vstream_params_groups(
-        bool quantized, hailo_format_type_t format_type, uint32_t timeout_ms, uint32_t queue_size) override;
+        bool unused, hailo_format_type_t format_type, uint32_t timeout_ms, uint32_t queue_size) override;
     virtual Expected<std::vector<std::vector<std::string>>> get_output_vstream_groups() override;
 
     virtual Expected<std::vector<hailo_stream_info_t>> get_all_stream_infos(const std::string &network_name="") const override;
@@ -280,6 +297,7 @@ public:
 
     virtual Expected<std::vector<InputVStream>> create_input_vstreams(const std::map<std::string, hailo_vstream_params_t> &inputs_params);
     virtual Expected<std::vector<OutputVStream>> create_output_vstreams(const std::map<std::string, hailo_vstream_params_t> &outputs_params);
+    virtual Expected<size_t> get_min_buffer_pool_size() override;
 
     virtual hailo_status before_fork() override;
     virtual hailo_status after_fork_in_parent() override;
@@ -300,14 +318,34 @@ public:
     static Expected<std::shared_ptr<ConfiguredNetworkGroupClient>> duplicate_network_group_client(uint32_t handle, uint32_t vdevice_handle,
         const std::string &network_group_name);
 
+    virtual hailo_status infer_async(const NamedBuffersCallbacks &named_buffers_callbacks,
+        const std::function<void(hailo_status)> &infer_request_done_cb) override;
+    hailo_status execute_callback(const ProtoCallbackIdentifier &cb_id);
+
+    virtual Expected<std::unique_ptr<LayerInfo>> get_layer_info(const std::string &stream_name) override;
+    virtual Expected<std::vector<net_flow::PostProcessOpMetadataPtr>> get_ops_metadata() override;
+
+    virtual hailo_status set_nms_score_threshold(const std::string &edge_name, float32_t nms_score_threshold) override;
+    virtual hailo_status set_nms_iou_threshold(const std::string &edge_name, float32_t iou_threshold) override;
+    virtual hailo_status set_nms_max_bboxes_per_class(const std::string &edge_name, uint32_t max_bboxes_per_class) override;
+
 private:
     ConfiguredNetworkGroupClient(NetworkGroupIdentifier &&identifier, const std::string &network_group_name);
     hailo_status create_client();
     hailo_status dup_handle();
+    callback_idx_t get_unique_callback_idx();
+    hailo_status execute_infer_request_callback(const ProtoCallbackIdentifier &cb_id);
+    hailo_status execute_transfer_callback(const ProtoCallbackIdentifier &cb_id);
 
     std::unique_ptr<HailoRtRpcClient> m_client;
     NetworkGroupIdentifier m_identifier;
     std::string m_network_group_name;
+    std::atomic<callback_idx_t> m_current_cb_index;
+    std::unordered_set<std::string> m_input_streams_names;
+    std::unordered_set<std::string> m_output_streams_names;
+    std::mutex m_mutex;
+    std::unordered_map<callback_idx_t, NamedBufferCallbackTuplePtr> m_idx_to_callbacks;
+    std::unordered_map<callback_idx_t, std::function<void(hailo_status)>> m_infer_request_idx_to_callbacks;
 };
 #endif // HAILO_SUPPORT_MULTI_PROCESS
 
diff --git a/hailort/libhailort/src/os/hailort_driver.hpp b/hailort/libhailort/src/os/hailort_driver.hpp
index f172cbb..50f242f 100755
--- a/hailort/libhailort/src/os/hailort_driver.hpp
+++ b/hailort/libhailort/src/os/hailort_driver.hpp
@@ -102,6 +102,13 @@ struct DescriptorsListInfo {
     void *user_address;
 };
 
+struct ContinousBufferInfo {
+    uintptr_t handle;  // Unique identifer for the driver.
+    uint64_t dma_address;
+    size_t size;
+    void *user_address;
+};
+
 class HailoRTDriver final
 {
 public:
@@ -237,12 +244,12 @@ public:
      * @param[in] size - Buffer size
      * @return pair <buffer_handle, dma_address>.
      */
-    Expected<std::pair<uintptr_t, uint64_t>> vdma_continuous_buffer_alloc(size_t size);
+    Expected<ContinousBufferInfo> vdma_continuous_buffer_alloc(size_t size);
 
     /**
      * Frees a vdma continuous buffer allocated by 'vdma_continuous_buffer_alloc'.
      */
-    hailo_status vdma_continuous_buffer_free(uintptr_t buffer_handle);
+    hailo_status vdma_continuous_buffer_free(const ContinousBufferInfo &buffer_info);
 
     /**
      * Marks the device as used for vDMA operations. Only one open FD can be marked at once.
@@ -300,6 +307,11 @@ private:
     Expected<void *> descriptors_list_create_mmap(uintptr_t desc_handle, size_t desc_count);
     hailo_status descriptors_list_create_munmap(void *address, size_t desc_count);
 
+    Expected<std::pair<uintptr_t, uint64_t>> continous_buffer_alloc_ioctl(size_t size);
+    hailo_status continous_buffer_free_ioctl(uintptr_t desc_handle);
+    Expected<void *> continous_buffer_mmap(uintptr_t desc_handle, size_t size);
+    hailo_status continous_buffer_munmap(void *address, size_t size);
+
     HailoRTDriver(const DeviceInfo &device_info, FileDescriptor &&fd, hailo_status &status);
 
     bool is_valid_channel_id(const vdma::ChannelId &channel_id);
diff --git a/hailort/libhailort/src/os/posix/hailort_driver.cpp b/hailort/libhailort/src/os/posix/hailort_driver.cpp
index a0e2fc5..c3720fc 100755
--- a/hailort/libhailort/src/os/posix/hailort_driver.cpp
+++ b/hailort/libhailort/src/os/posix/hailort_driver.cpp
@@ -909,7 +909,54 @@ hailo_status HailoRTDriver::vdma_low_memory_buffer_free(uintptr_t buffer_handle)
     return HAILO_SUCCESS; 
 }
 
-Expected<std::pair<uintptr_t, uint64_t>> HailoRTDriver::vdma_continuous_buffer_alloc(size_t size)
+
+#if defined(__linux__)
+Expected<ContinousBufferInfo> HailoRTDriver::vdma_continuous_buffer_alloc(size_t size)
+{
+    auto handle_to_dma_address_pair = continous_buffer_alloc_ioctl(size);
+    if (!handle_to_dma_address_pair) {
+        // Log in continous_buffer_alloc_ioctl
+        return make_unexpected(handle_to_dma_address_pair.status());
+    }
+
+    const auto desc_handle = handle_to_dma_address_pair->first;
+    const auto dma_address = handle_to_dma_address_pair->second;
+
+    auto user_address = continous_buffer_mmap(desc_handle, size);
+    if (!user_address) {
+        auto status = continous_buffer_free_ioctl(desc_handle);
+        if (HAILO_SUCCESS != status) {
+            LOGGER__ERROR("Failed releasing conitnous buffer, status {}", status);
+            // continue
+        }
+        return make_unexpected(user_address.status());
+    }
+
+    return ContinousBufferInfo{desc_handle, dma_address, size, user_address.release()};
+}
+
+hailo_status HailoRTDriver::vdma_continuous_buffer_free(const ContinousBufferInfo &buffer_info)
+{
+    hailo_status status = HAILO_SUCCESS;
+
+    auto unmap_status = continous_buffer_munmap(buffer_info.user_address, buffer_info.size);
+    if (HAILO_SUCCESS != unmap_status) {
+        LOGGER__ERROR("Continous buffer list unmap failed with {}", unmap_status);
+        status = unmap_status;
+        // continue
+    }
+
+    auto release_status = continous_buffer_free_ioctl(buffer_info.handle);
+    if (HAILO_SUCCESS != release_status) {
+        LOGGER__ERROR("Continous buffer release status failed with {}", release_status);
+        status = release_status;
+        // continue
+    }
+
+    return status;
+}
+
+Expected<std::pair<uintptr_t, uint64_t>> HailoRTDriver::continous_buffer_alloc_ioctl(size_t size)
 {
     hailo_allocate_continuous_buffer_params params { .buffer_size = size, .buffer_handle = 0, .dma_address = 0 };
 
@@ -928,10 +975,10 @@ Expected<std::pair<uintptr_t, uint64_t>> HailoRTDriver::vdma_continuous_buffer_a
     return std::make_pair(params.buffer_handle, params.dma_address);
 }
 
-hailo_status HailoRTDriver::vdma_continuous_buffer_free(uintptr_t buffer_handle)
+hailo_status HailoRTDriver::continous_buffer_free_ioctl(uintptr_t desc_handle)
 {
     int err = 0;
-    auto status = hailo_ioctl(this->m_fd, HAILO_VDMA_CONTINUOUS_BUFFER_FREE, (void*)buffer_handle, err);
+    auto status = hailo_ioctl(this->m_fd, HAILO_VDMA_CONTINUOUS_BUFFER_FREE, (void*)desc_handle, err);
     if (HAILO_SUCCESS != status) {
         LOGGER__ERROR("Failed to free continuous buffer with errno: {}", err);
         return HAILO_DRIVER_FAIL;
@@ -940,6 +987,46 @@ hailo_status HailoRTDriver::vdma_continuous_buffer_free(uintptr_t buffer_handle)
     return HAILO_SUCCESS;
 }
 
+Expected<void *> HailoRTDriver::continous_buffer_mmap(uintptr_t desc_handle, size_t size)
+{
+    // We lock m_driver_lock before calling mmap. Read m_driver_lock doc in the header
+    std::unique_lock<std::mutex> lock(m_driver_lock);
+
+    void *address = mmap(nullptr, size, PROT_WRITE | PROT_READ, MAP_SHARED, m_fd, (off_t)desc_handle);
+    if (MAP_FAILED == address) {
+        LOGGER__ERROR("Failed to continous buffer buffer with errno: {}", errno);
+        return make_unexpected(HAILO_DRIVER_FAIL);
+    }
+    return address;
+}
+
+hailo_status HailoRTDriver::continous_buffer_munmap(void *address, size_t size)
+{
+    if (0 != munmap(address, size)) {
+        LOGGER__ERROR("munmap of address {}, length: {} failed with errno: {}", address, size, errno);
+        return HAILO_DRIVER_FAIL;
+    }
+    return HAILO_SUCCESS;
+}
+
+#elif defined(__QNX__)
+
+Expected<ContinousBufferInfo> HailoRTDriver::vdma_continuous_buffer_alloc(size_t /* size */)
+{
+    LOGGER__ERROR("Continous buffer not supported for platform");
+    return make_unexpected(HAILO_NOT_SUPPORTED);
+}
+
+hailo_status HailoRTDriver::vdma_continuous_buffer_free(const ContinousBufferInfo &/* buffer_info */)
+{
+    LOGGER__ERROR("Continous buffer not supported for platform");
+    return HAILO_NOT_SUPPORTED;
+}
+
+#else
+#error "unsupported platform!"
+#endif
+
 hailo_status HailoRTDriver::mark_as_used()
 {
     hailo_mark_as_in_use_params params = {
diff --git a/hailort/libhailort/src/os/windows/hailort_driver.cpp b/hailort/libhailort/src/os/windows/hailort_driver.cpp
index c0bbd6c..f614f17 100644
--- a/hailort/libhailort/src/os/windows/hailort_driver.cpp
+++ b/hailort/libhailort/src/os/windows/hailort_driver.cpp
@@ -913,15 +913,15 @@ hailo_status HailoRTDriver::vdma_low_memory_buffer_free(uintptr_t buffer_handle)
     return HAILO_INVALID_OPERATION;
 }
 
-Expected<std::pair<uintptr_t, uint64_t>> HailoRTDriver::vdma_continuous_buffer_alloc(size_t size)
+Expected<ContinousBufferInfo> HailoRTDriver::vdma_continuous_buffer_alloc(size_t size)
 {
     (void) size;
     return make_unexpected(HAILO_INVALID_OPERATION);
 }
 
-hailo_status HailoRTDriver::vdma_continuous_buffer_free(uintptr_t buffer_handle)
+hailo_status HailoRTDriver::vdma_continuous_buffer_free(const ContinousBufferInfo &buffer_info)
 {
-    (void) buffer_handle;
+    (void) buffer_info;
     return HAILO_INVALID_OPERATION;
 }
 
diff --git a/hailort/libhailort/src/service/hailort_rpc_client.cpp b/hailort/libhailort/src/service/hailort_rpc_client.cpp
index 54a6b83..91db64d 100644
--- a/hailort/libhailort/src/service/hailort_rpc_client.cpp
+++ b/hailort/libhailort/src/service/hailort_rpc_client.cpp
@@ -11,6 +11,14 @@
 
 #include "hef/hef_internal.hpp"
 #include "hailort_rpc_client.hpp"
+#include "net_flow/ops/yolov8_post_process.hpp"
+#include "net_flow/ops/yolox_post_process.hpp"
+#include "net_flow/ops/ssd_post_process.hpp"
+#include "net_flow/ops/softmax_post_process.hpp"
+#include "net_flow/ops/argmax_post_process.hpp"
+#include "net_flow/ops/nms_post_process.hpp"
+#include "net_flow/ops/yolov5_op_metadata.hpp"
+#include "net_flow/ops/yolov5_seg_op_metadata.hpp"
 
 #include <grpcpp/health_check_service_interface.h>
 
@@ -257,6 +265,40 @@ Expected<std::vector<uint32_t>> HailoRtRpcClient::VDevice_configure(const VDevic
     return networks_handles;
 }
 
+Expected<ProtoCallbackIdentifier> HailoRtRpcClient::VDevice_get_callback_id(const VDeviceIdentifier &identifier)
+{
+    VDevice_get_callback_id_Request request;
+    auto proto_identifier = request.mutable_identifier();
+    VDevice_convert_identifier_to_proto(identifier, proto_identifier);
+
+    VDevice_get_callback_id_Reply reply;
+    ClientContextWithTimeout context;
+    grpc::Status status = m_stub->VDevice_get_callback_id(&context, request, &reply);
+    CHECK_GRPC_STATUS_AS_EXPECTED(status);
+    assert(reply.status() < HAILO_STATUS_COUNT);
+    if (reply.status() == HAILO_SHUTDOWN_EVENT_SIGNALED) {
+        return make_unexpected(HAILO_SHUTDOWN_EVENT_SIGNALED);
+    }
+    CHECK_SUCCESS_AS_EXPECTED(static_cast<hailo_status>(reply.status()));
+    auto cb_id = reply.callback_id();
+    return cb_id;
+}
+
+hailo_status HailoRtRpcClient::VDevice_finish_callback_listener(const VDeviceIdentifier &identifier)
+{
+    VDevice_finish_callback_listener_Request request;
+    auto proto_identifier = request.mutable_identifier();
+    VDevice_convert_identifier_to_proto(identifier, proto_identifier);
+
+    VDevice_finish_callback_listener_Reply reply;
+    ClientContextWithTimeout context;
+    grpc::Status status = m_stub->VDevice_finish_callback_listener(&context, request, &reply);
+    CHECK_GRPC_STATUS(status);
+    assert(reply.status() < HAILO_STATUS_COUNT);
+    CHECK_SUCCESS(static_cast<hailo_status>(reply.status()));
+    return HAILO_SUCCESS;
+}
+
 Expected<std::vector<std::string>> HailoRtRpcClient::VDevice_get_physical_devices_ids(const VDeviceIdentifier &identifier)
 {
     VDevice_get_physical_devices_ids_Request request;
@@ -320,6 +362,8 @@ Expected<uint32_t> HailoRtRpcClient::ConfiguredNetworkGroup_dup_handle(const Net
     ClientContextWithTimeout context;
     grpc::Status status = m_stub->ConfiguredNetworkGroup_dup_handle(&context, request, &reply);
     CHECK_GRPC_STATUS_AS_EXPECTED(status);
+    assert(reply.status() < HAILO_STATUS_COUNT);
+    CHECK_SUCCESS_AS_EXPECTED(static_cast<hailo_status>(reply.status()));
     return reply.handle();
 }
 
@@ -364,13 +408,12 @@ std::map<std::string, hailo_vstream_params_t> get_group(const ProtoNamedVStreamP
 }
 
 Expected<std::map<std::string, hailo_vstream_params_t>> HailoRtRpcClient::ConfiguredNetworkGroup_make_input_vstream_params(
-    const NetworkGroupIdentifier &identifier, bool quantized, hailo_format_type_t format_type, uint32_t timeout_ms, uint32_t queue_size,
+    const NetworkGroupIdentifier &identifier, hailo_format_type_t format_type, uint32_t timeout_ms, uint32_t queue_size,
     const std::string &network_name)
 {
     ConfiguredNetworkGroup_make_input_vstream_params_Request request;
     auto proto_identifier = request.mutable_identifier();
     ConfiguredNetworkGroup_convert_identifier_to_proto(identifier, proto_identifier);
-    request.set_quantized(quantized);
     request.set_format_type(format_type);
     request.set_timeout_ms(timeout_ms);
     request.set_queue_size(queue_size);
@@ -386,12 +429,11 @@ Expected<std::map<std::string, hailo_vstream_params_t>> HailoRtRpcClient::Config
 }
 
 Expected<std::vector<std::map<std::string, hailo_vstream_params_t>>> HailoRtRpcClient::ConfiguredNetworkGroup_make_output_vstream_params_groups(
-    const NetworkGroupIdentifier &identifier, bool quantized, hailo_format_type_t format_type, uint32_t timeout_ms, uint32_t queue_size)
+    const NetworkGroupIdentifier &identifier, hailo_format_type_t format_type, uint32_t timeout_ms, uint32_t queue_size)
 {
     ConfiguredNetworkGroup_make_output_vstream_params_groups_Request request;
     auto proto_identifier = request.mutable_identifier();
     ConfiguredNetworkGroup_convert_identifier_to_proto(identifier, proto_identifier);
-    request.set_quantized(quantized);
     request.set_format_type(format_type);
     request.set_timeout_ms(timeout_ms);
     request.set_queue_size(queue_size);
@@ -411,13 +453,12 @@ Expected<std::vector<std::map<std::string, hailo_vstream_params_t>>> HailoRtRpcC
 }
 
 Expected<std::map<std::string, hailo_vstream_params_t>> HailoRtRpcClient::ConfiguredNetworkGroup_make_output_vstream_params(
-    const NetworkGroupIdentifier &identifier, bool quantized, hailo_format_type_t format_type, uint32_t timeout_ms, uint32_t queue_size,
+    const NetworkGroupIdentifier &identifier, hailo_format_type_t format_type, uint32_t timeout_ms, uint32_t queue_size,
     const std::string &network_name)
 {
     ConfiguredNetworkGroup_make_output_vstream_params_Request request;
     auto proto_identifier = request.mutable_identifier();
     ConfiguredNetworkGroup_convert_identifier_to_proto(identifier, proto_identifier);
-    request.set_quantized(quantized);
     request.set_format_type(format_type);
     request.set_timeout_ms(timeout_ms);
     request.set_queue_size(queue_size);
@@ -584,6 +625,20 @@ Expected<hailo_stream_interface_t> HailoRtRpcClient::ConfiguredNetworkGroup_get_
     return stream_interface;
 }
 
+hailo_status HailoRtRpcClient::ConfiguredNetworkGroup_shutdown(const NetworkGroupIdentifier &identifier)
+{
+    ConfiguredNetworkGroup_shutdown_Request request;
+    auto proto_identifier = request.mutable_identifier();
+    ConfiguredNetworkGroup_convert_identifier_to_proto(identifier, proto_identifier);
+
+    ClientContextWithTimeout context;
+    ConfiguredNetworkGroup_shutdown_Reply reply;
+    grpc::Status status = m_stub->ConfiguredNetworkGroup_shutdown(&context, request, &reply);
+    CHECK_GRPC_STATUS(status);
+    assert(reply.status() < HAILO_STATUS_COUNT);
+    return static_cast<hailo_status>(reply.status());
+}
+
 Expected<std::vector<std::vector<std::string>>> HailoRtRpcClient::ConfiguredNetworkGroup_get_output_vstream_groups(const NetworkGroupIdentifier &identifier)
 {
     ConfiguredNetworkGroup_get_output_vstream_groups_Request request;
@@ -610,6 +665,376 @@ Expected<std::vector<std::vector<std::string>>> HailoRtRpcClient::ConfiguredNetw
     return result;
 }
 
+std::pair<std::string, hailort::net_flow::BufferMetaData> deserialize_buffer_metadata(const ProtoNamedMetadata &op_metadata_proto)
+{
+    auto &named_params_proto = op_metadata_proto.params();
+
+    hailo_3d_image_shape_t shape = {
+        named_params_proto.shape().height(),
+        named_params_proto.shape().width(),
+        named_params_proto.shape().features()
+    };
+
+    hailo_3d_image_shape_t padded_shape = {
+        named_params_proto.padded_shape().height(),
+        named_params_proto.padded_shape().width(),
+        named_params_proto.padded_shape().features()
+    };
+
+    hailo_format_t format = {
+        static_cast<hailo_format_type_t>(named_params_proto.format().type()),
+        static_cast<hailo_format_order_t>(named_params_proto.format().order()),
+        static_cast<hailo_format_flags_t>(named_params_proto.format().flags())
+    };
+
+    hailo_quant_info_t single_quant_info = {
+        named_params_proto.quant_info().qp_zp(),
+        named_params_proto.quant_info().qp_scale(),
+        named_params_proto.quant_info().limvals_min(),
+        named_params_proto.quant_info().limvals_max()
+    };
+
+    std::pair<std::string, hailort::net_flow::BufferMetaData> named_metadata_to_insert(op_metadata_proto.name(),
+                                                                {shape, padded_shape, format, single_quant_info});
+    return named_metadata_to_insert;
+}
+
+std::unordered_map<std::string, hailort::net_flow::BufferMetaData> deserialize_inputs_buffer_metadata(const ProtoOpMetadata &ops_metadatas_proto)
+{
+    std::unordered_map<std::string, hailort::net_flow::BufferMetaData> inputs_metadata;
+    auto &inputs_metadata_proto = ops_metadatas_proto.inputs_metadata();
+    for (auto &input_metadata_proto : inputs_metadata_proto) {
+        auto input_metadata = deserialize_buffer_metadata(input_metadata_proto);
+        inputs_metadata.insert(input_metadata);
+    }
+    return inputs_metadata;
+}
+
+std::unordered_map<std::string, hailort::net_flow::BufferMetaData> deserialize_outputs_buffer_metadata(const ProtoOpMetadata &ops_metadatas_proto)
+{
+    std::unordered_map<std::string, hailort::net_flow::BufferMetaData> outputs_metadata;
+    auto &outputs_metadata_proto = ops_metadatas_proto.outputs_metadata();
+    for (auto &output_metadata_proto : outputs_metadata_proto) {
+        auto output_metadata = deserialize_buffer_metadata(output_metadata_proto);
+        outputs_metadata.insert(output_metadata);
+    }
+    return outputs_metadata;
+}
+
+Expected<hailort::net_flow::Yolov8PostProcessConfig> create_yolov8_post_process_config(const ProtoOpMetadata &op_metadata_proto)
+{
+    auto yolov8_config_proto = op_metadata_proto.yolov8_config();
+    std::vector<hailort::net_flow::Yolov8MatchingLayersNames> reg_to_cls_inputs;
+    auto &reg_to_cls_inputs_proto = yolov8_config_proto.reg_to_cls_inputs();
+    for (auto &reg_to_cls_input_proto : reg_to_cls_inputs_proto) {
+        hailort::net_flow::Yolov8MatchingLayersNames yolov8_matching_layers_name;
+        yolov8_matching_layers_name.reg = reg_to_cls_input_proto.reg();
+        yolov8_matching_layers_name.cls = reg_to_cls_input_proto.cls();
+        yolov8_matching_layers_name.stride = reg_to_cls_input_proto.stride();
+        reg_to_cls_inputs.push_back(yolov8_matching_layers_name);
+    }
+
+    hailort::net_flow::Yolov8PostProcessConfig yolov8_post_process_config = {yolov8_config_proto.image_height(),
+                                                            yolov8_config_proto.image_width(), reg_to_cls_inputs};
+    return yolov8_post_process_config;
+}
+
+Expected<hailort::net_flow::YoloPostProcessConfig> create_yolov5_post_process_config(const ProtoOpMetadata &op_metadata_proto)
+{
+    auto yolov5_config_proto = op_metadata_proto.yolov5_config();
+    std::map<std::string, std::vector<int>> anchors_per_layer;
+    auto &yolov5_anchors_list_proto = yolov5_config_proto.yolov5_anchors();
+    for (auto &anchors_list_proto : yolov5_anchors_list_proto) {
+        std::vector<int> anchors;
+        for(auto &anchor : anchors_list_proto.anchors()) {
+            anchors.push_back(anchor);
+        }
+        anchors_per_layer.emplace(anchors_list_proto.layer(), anchors);
+    }
+
+    hailort::net_flow::YoloPostProcessConfig yolov5_post_process_config = {yolov5_config_proto.image_height(),
+                                                            yolov5_config_proto.image_width(), anchors_per_layer};
+    return yolov5_post_process_config;
+}
+
+Expected<hailort::net_flow::YoloxPostProcessConfig> create_yolox_post_process_config(const ProtoOpMetadata &op_metadata_proto)
+{
+    auto yolox_config_proto = op_metadata_proto.yolox_config();
+    std::vector<hailort::net_flow::YoloxMatchingLayersNames> input_names;
+    auto &yolox_anchors_list_proto = yolox_config_proto.input_names();
+    for (auto &input_name : yolox_anchors_list_proto) {
+        input_names.push_back({input_name.reg(), input_name.obj(), input_name.cls()});
+    }
+
+    hailort::net_flow::YoloxPostProcessConfig yolox_post_process_config = {yolox_config_proto.image_height(),
+                                                            yolox_config_proto.image_width(), input_names};
+    return yolox_post_process_config;
+}
+
+Expected<hailort::net_flow::SSDPostProcessConfig> create_ssd_post_process_config(const ProtoOpMetadata &op_metadata_proto)
+{
+    auto ssd_config_proto = op_metadata_proto.ssd_config();
+    std::map<std::string, std::string> reg_to_cls_inputs;
+    auto &ssd_reg_to_cls_proto = ssd_config_proto.reg_to_cls_inputs();
+    for (auto &reg_to_cls_input : ssd_reg_to_cls_proto) {
+        reg_to_cls_inputs.emplace(reg_to_cls_input.reg(), reg_to_cls_input.cls());
+    }
+
+    std::map<std::string, std::vector<float32_t>> anchors_per_layer;
+    auto &ssd_anchors_proto = ssd_config_proto.anchors();
+    for (auto &ssd_anchors : ssd_anchors_proto) {
+        std::vector<float32_t> anchors;
+        for (auto &anchor : ssd_anchors.anchors_per_layer()) {
+            anchors.push_back(anchor);
+        }
+        anchors_per_layer.emplace(ssd_anchors.layer(), anchors);
+    }
+
+    hailort::net_flow::SSDPostProcessConfig ssd_post_process_config = {ssd_config_proto.image_height(), ssd_config_proto.image_width(),
+                                                                        ssd_config_proto.centers_scale_factor(),
+                                                                        ssd_config_proto.bbox_dimensions_scale_factor(),
+                                                                        ssd_config_proto.ty_index(), ssd_config_proto.tx_index(),
+                                                                        ssd_config_proto.th_index(), ssd_config_proto.tw_index(),
+                                                                        reg_to_cls_inputs, anchors_per_layer, ssd_config_proto.normalize_boxes()};
+    return ssd_post_process_config;
+}
+
+Expected<hailort::net_flow::YoloV5SegPostProcessConfig> create_yolov5seg_post_process_config(const ProtoOpMetadata &op_metadata_proto)
+{
+    auto yolov5seg_config_proto = op_metadata_proto.yolov5seg_config();
+    hailort::net_flow::YoloV5SegPostProcessConfig yolov5seg_post_process_config = {yolov5seg_config_proto.mask_threshold(),
+                                                                                    yolov5seg_config_proto.layer_name()};
+    return yolov5seg_post_process_config;
+}
+
+Expected<std::vector<net_flow::PostProcessOpMetadataPtr>> deserialize_ops_metadata(const ProtoOpsMetadata &ops_metadatas_proto)
+{
+    std::vector<hailort::net_flow::PostProcessOpMetadataPtr> ops_metadata_ptr;
+    auto ops_metadata_proto = ops_metadatas_proto.ops_metadata();
+    for (auto &op_metadata_proto : ops_metadata_proto) {
+        auto inputs_metadata = deserialize_inputs_buffer_metadata(op_metadata_proto);
+        auto outputs_metadata = deserialize_outputs_buffer_metadata(op_metadata_proto);
+
+        hailort::net_flow::NmsPostProcessConfig nms_post_process_config;
+        if ((op_metadata_proto.type() == static_cast<uint32_t>(net_flow::OperationType::YOLOV5)) |
+            (op_metadata_proto.type() == static_cast<uint32_t>(net_flow::OperationType::YOLOV8)) |
+            (op_metadata_proto.type() == static_cast<uint32_t>(net_flow::OperationType::YOLOX)) |
+            (op_metadata_proto.type() == static_cast<uint32_t>(net_flow::OperationType::IOU)) |
+            (op_metadata_proto.type() == static_cast<uint32_t>(net_flow::OperationType::SSD)) |
+            (op_metadata_proto.type() == static_cast<uint32_t>(net_flow::OperationType::YOLOV5SEG))) {
+            // In case this is an NMS PP - initilize the values for the nms post process config
+            auto &nms_config_proto = op_metadata_proto.nms_post_process_config();
+            nms_post_process_config = {nms_config_proto.nms_score_th(),
+                                        nms_config_proto.nms_iou_th(),
+                                        nms_config_proto.max_proposals_per_class(),
+                                        nms_config_proto.number_of_classes(),
+                                        nms_config_proto.background_removal(),
+                                        nms_config_proto.background_removal_index(),
+                                        nms_config_proto.cross_classes()};
+            }
+
+        switch (static_cast<net_flow::OperationType>(op_metadata_proto.type())) {
+        case net_flow::OperationType::YOLOV8:
+        {
+            auto expected_yolov8_post_process_config = create_yolov8_post_process_config(op_metadata_proto);
+            CHECK_EXPECTED(expected_yolov8_post_process_config);
+            auto expteted_yolov8_metadata = hailort::net_flow::Yolov8OpMetadata::create(inputs_metadata, outputs_metadata, nms_post_process_config,
+                                                                                        expected_yolov8_post_process_config.value(), 
+                                                                                        op_metadata_proto.network_name());
+            CHECK_EXPECTED(expteted_yolov8_metadata);
+            ops_metadata_ptr.push_back(expteted_yolov8_metadata.value());
+            break;
+        }
+
+        case net_flow::OperationType::YOLOV5:
+        {
+            auto exected_yolov5_post_process_config = create_yolov5_post_process_config(op_metadata_proto);
+            CHECK_EXPECTED(exected_yolov5_post_process_config);
+            auto expteted_yolov5_metadata = hailort::net_flow::Yolov5OpMetadata::create(inputs_metadata, outputs_metadata, nms_post_process_config,
+                                                                                        exected_yolov5_post_process_config.value(),
+                                                                                        op_metadata_proto.network_name());
+            CHECK_EXPECTED(expteted_yolov5_metadata);
+            ops_metadata_ptr.push_back(expteted_yolov5_metadata.value());
+            break;
+        }
+
+        case net_flow::OperationType::YOLOX:
+        {
+            auto expected_yolox_post_process_config = create_yolox_post_process_config(op_metadata_proto);
+            CHECK_EXPECTED(expected_yolox_post_process_config);
+            auto expected_yolox_metadata = hailort::net_flow::YoloxOpMetadata::create(inputs_metadata, outputs_metadata, nms_post_process_config,
+                                                                                        expected_yolox_post_process_config.value(),
+                                                                                        op_metadata_proto.network_name());
+            CHECK_EXPECTED(expected_yolox_metadata);
+            ops_metadata_ptr.push_back(expected_yolox_metadata.value());
+            break;
+        }
+
+        case net_flow::OperationType::SSD:
+        {
+            auto expected_ssd_post_process_config = create_ssd_post_process_config(op_metadata_proto);
+            CHECK_EXPECTED(expected_ssd_post_process_config);
+            auto expteted_ssd_metadata = hailort::net_flow::SSDOpMetadata::create(inputs_metadata, outputs_metadata, nms_post_process_config,
+                                                                                    expected_ssd_post_process_config.value(),
+                                                                                    op_metadata_proto.network_name());
+            CHECK_EXPECTED(expteted_ssd_metadata);
+            ops_metadata_ptr.push_back(expteted_ssd_metadata.value());
+            break;
+        }
+
+        case net_flow::OperationType::SOFTMAX:
+        {
+            auto expteted_softmax_metadata = hailort::net_flow::SoftmaxOpMetadata::create(inputs_metadata, outputs_metadata,
+                                                                                            op_metadata_proto.network_name());
+            CHECK_EXPECTED(expteted_softmax_metadata);
+            ops_metadata_ptr.push_back(expteted_softmax_metadata.value());
+            break;
+        }
+
+        case net_flow::OperationType::ARGMAX:
+        {
+            auto expteted_argmax_metadata = hailort::net_flow::ArgmaxOpMetadata::create(inputs_metadata, outputs_metadata,
+                                                                                        op_metadata_proto.network_name());
+            CHECK_EXPECTED(expteted_argmax_metadata);
+            ops_metadata_ptr.push_back(expteted_argmax_metadata.value());
+            break;
+        }
+
+        case net_flow::OperationType::YOLOV5SEG:
+        {
+            auto expected_yolov5_post_process_config = create_yolov5_post_process_config(op_metadata_proto);
+            CHECK_EXPECTED(expected_yolov5_post_process_config);
+
+            auto expected_yolov5seg_post_process_config = create_yolov5seg_post_process_config(op_metadata_proto);
+            CHECK_EXPECTED(expected_yolov5seg_post_process_config);
+
+            auto expected_yolov5seg_metadata = hailort::net_flow::Yolov5SegOpMetadata::create(inputs_metadata, outputs_metadata, nms_post_process_config,
+                                                                                                expected_yolov5_post_process_config.value(),
+                                                                                                expected_yolov5seg_post_process_config.value(),
+                                                                                                op_metadata_proto.network_name());
+            CHECK_EXPECTED(expected_yolov5seg_metadata);
+            ops_metadata_ptr.push_back(expected_yolov5seg_metadata.value());
+            break;
+        }
+
+        case net_flow::OperationType::IOU:
+        {
+            auto expected_nms_op_metadata = hailort::net_flow::NmsOpMetadata::create(inputs_metadata, outputs_metadata, nms_post_process_config,
+                                                                                        op_metadata_proto.network_name(),
+                                                                                        static_cast<hailort::net_flow::OperationType>(op_metadata_proto.type()),
+                                                                                        op_metadata_proto.name());
+            CHECK_EXPECTED(expected_nms_op_metadata);
+            ops_metadata_ptr.push_back(expected_nms_op_metadata.value());
+            break;
+        }
+        }
+    }
+    return ops_metadata_ptr;
+}
+
+LayerInfo deserialize_layer_info(const ProtoLayerInfo &info_proto)
+{
+    LayerInfo info;
+    info.type = static_cast<LayerType>(info_proto.type());
+    info.direction = static_cast<hailo_stream_direction_t>(info_proto.direction());
+    info.stream_index = static_cast<uint8_t>(info_proto.stream_index());
+    info.dma_engine_index = static_cast<uint8_t>(info_proto.dma_engine_index());
+    info.name = info_proto.name();
+    info.network_name = info_proto.network_name();
+    info.network_index = static_cast<uint8_t>(info_proto.network_index());
+    info.max_shmifo_size = info_proto.max_shmifo_size();
+    info.context_index = static_cast<uint8_t>(info_proto.context_index());
+    info.pad_index = info_proto.pad_index();
+
+    // Transformation and shape info
+    hailo_3d_image_shape_t shape = {
+        info_proto.shape().height(),
+        info_proto.shape().width(),
+        info_proto.shape().features()
+    };
+    info.shape = shape;
+    
+    hailo_3d_image_shape_t hw_shape = {
+        info_proto.hw_shape().height(),
+        info_proto.hw_shape().width(),
+        info_proto.hw_shape().features()
+    };
+    info.hw_shape = hw_shape;
+
+    info.hw_data_bytes = info_proto.hw_data_bytes();
+
+    hailo_format_t format = {
+        static_cast<hailo_format_type_t>(info_proto.format().type()),
+        static_cast<hailo_format_order_t>(info_proto.format().order()),
+        static_cast<hailo_format_flags_t>(info_proto.format().flags())
+    };
+    info.format = format;
+
+    hailo_quant_info_t single_quant_info = {
+        info_proto.quant_info().qp_zp(),
+        info_proto.quant_info().qp_scale(),
+        info_proto.quant_info().limvals_min(),
+        info_proto.quant_info().limvals_max()
+    };
+    info.quant_info = single_quant_info;
+
+    for (const auto &quant_info : info_proto.quant_infos()) {
+        single_quant_info = {
+            quant_info.qp_zp(),
+            quant_info.qp_scale(),
+            quant_info.limvals_min(),
+            quant_info.limvals_max()
+        };
+        info.quant_infos.push_back(single_quant_info);
+    }
+
+    hailo_nms_defuse_info_t nms_defuse_info{
+        info_proto.nms_info().defuse_info().class_group_index(),
+        {0}
+    };
+    strcpy(nms_defuse_info.original_name, info_proto.nms_info().defuse_info().original_name().c_str());
+    hailo_nms_info_t nms_info{
+        info_proto.nms_info().number_of_classes(),
+        info_proto.nms_info().max_bboxes_per_class(),
+        info_proto.nms_info().bbox_size(),
+        info_proto.nms_info().chunks_per_frame(),
+        info_proto.nms_info().is_defused(),
+        nms_defuse_info,
+        info_proto.nms_info().burst_size(),
+        static_cast<hailo_nms_burst_type_t>(info_proto.nms_info().burst_type()),
+    };
+    info.nms_info = nms_info;
+
+
+    // Mux info
+    info.is_mux = info_proto.is_mux();
+    for (const auto &pred_proto : info_proto.predecessor()) {
+        auto pred = deserialize_layer_info(pred_proto);
+        info.predecessor.push_back(pred);
+    }
+    info.height_gcd = info_proto.height_gcd();
+    for (const auto &height_ratio : info_proto.height_ratios()) {
+        info.height_ratios.push_back(height_ratio);
+    }
+
+    // Multi planes info
+    info.is_multi_planar = info_proto.is_multi_planar();
+    for (const auto &planes_proto : info_proto.planes()) {
+        auto plane = deserialize_layer_info(planes_proto);
+        info.planes.push_back(plane);
+    }
+    info.plane_index = static_cast<uint8_t>(info_proto.plane_index());
+
+    // Defused nms info
+    info.is_defused_nms = info_proto.is_defused_nms();
+    for (const auto &fused_proto : info_proto.fused_nms_layer()) {
+        auto fused = deserialize_layer_info(fused_proto);
+        info.fused_nms_layer.push_back(fused);
+    }
+
+    return info;
+}
+
 hailo_vstream_info_t deserialize_vstream_info(const ProtoVStreamInfo &info_proto)
 {
     hailo_vstream_info_t info;
@@ -869,6 +1294,107 @@ Expected<std::vector<std::string>> HailoRtRpcClient::ConfiguredNetworkGroup_get_
     return result;
 }
 
+Expected<size_t> HailoRtRpcClient::ConfiguredNetworkGroup_get_min_buffer_pool_size(const NetworkGroupIdentifier &identifier)
+{
+    ConfiguredNetworkGroup_get_min_buffer_pool_size_Request request;
+    ConfiguredNetworkGroup_get_min_buffer_pool_size_Reply reply;
+    auto proto_identifier = request.mutable_identifier();
+    ConfiguredNetworkGroup_convert_identifier_to_proto(identifier, proto_identifier);
+    ClientContextWithTimeout context;
+    grpc::Status status = m_stub->ConfiguredNetworkGroup_get_min_buffer_pool_size(&context, request, &reply);
+    CHECK_GRPC_STATUS_AS_EXPECTED(status);
+    assert(reply.status() < HAILO_STATUS_COUNT);
+    CHECK_SUCCESS_AS_EXPECTED(static_cast<hailo_status>(reply.status()));
+    auto min_buffer_pool_size = reply.min_buffer_pool_size();
+    return min_buffer_pool_size;
+}
+
+Expected<std::unique_ptr<LayerInfo>> HailoRtRpcClient::ConfiguredNetworkGroup_get_layer_info(const NetworkGroupIdentifier &identifier, const std::string &stream_name)
+{
+    ConfiguredNetworkGroup_get_layer_info_Request request;
+    ConfiguredNetworkGroup_get_layer_info_Reply reply;
+    auto proto_identifier = request.mutable_identifier();
+    ConfiguredNetworkGroup_convert_identifier_to_proto(identifier, proto_identifier);
+    request.set_stream_name(stream_name);
+    ClientContextWithTimeout context;
+    grpc::Status status = m_stub->ConfiguredNetworkGroup_get_layer_info(&context, request, &reply);
+    CHECK_GRPC_STATUS_AS_EXPECTED(status);
+    assert(reply.status() < HAILO_STATUS_COUNT);
+    CHECK_SUCCESS_AS_EXPECTED(static_cast<hailo_status>(reply.status()));
+    auto info_proto = reply.layer_info();
+    auto layer = deserialize_layer_info(info_proto);
+    auto layer_ptr = make_unique_nothrow<LayerInfo>(std::move(layer));
+    CHECK_NOT_NULL_AS_EXPECTED(layer_ptr, HAILO_OUT_OF_HOST_MEMORY);
+    return layer_ptr;
+}
+
+Expected<std::vector<net_flow::PostProcessOpMetadataPtr>> HailoRtRpcClient::ConfiguredNetworkGroup_get_ops_metadata(const NetworkGroupIdentifier &identifier)
+{
+    ConfiguredNetworkGroup_get_ops_metadata_Request request;
+    ConfiguredNetworkGroup_get_ops_metadata_Reply reply;
+    auto proto_identifier = request.mutable_identifier();
+    ConfiguredNetworkGroup_convert_identifier_to_proto(identifier, proto_identifier);
+    ClientContextWithTimeout context;
+    grpc::Status status = m_stub->ConfiguredNetworkGroup_get_ops_metadata(&context, request, &reply);
+    CHECK_GRPC_STATUS_AS_EXPECTED(status);
+    assert(reply.status() < HAILO_STATUS_COUNT);
+    auto ops_meta_data_proto = reply.ops_metadata();
+    auto ops_metadata = deserialize_ops_metadata(ops_meta_data_proto);
+    CHECK_EXPECTED(ops_metadata);
+    return ops_metadata;
+}
+
+hailo_status HailoRtRpcClient::ConfiguredNetworkGroup_set_nms_score_threshold(const NetworkGroupIdentifier &identifier,
+                                                                                const std::string &edge_name, float32_t nms_score_th)
+{
+    ConfiguredNetworkGroup_set_nms_score_threshold_Request request;
+    auto proto_identifier = request.mutable_identifier();
+    ConfiguredNetworkGroup_convert_identifier_to_proto(identifier, proto_identifier);
+    request.set_edge_name(edge_name);
+    request.set_nms_score_th(nms_score_th);
+
+    ConfiguredNetworkGroup_set_nms_score_threshold_Reply reply;
+    ClientContextWithTimeout context;
+    grpc::Status status = m_stub->ConfiguredNetworkGroup_set_nms_score_threshold(&context, request, &reply);
+    CHECK_GRPC_STATUS(status);
+    assert(reply.status() < HAILO_STATUS_COUNT);
+    return static_cast<hailo_status>(reply.status());
+}
+
+hailo_status HailoRtRpcClient::ConfiguredNetworkGroup_set_nms_iou_threshold(const NetworkGroupIdentifier &identifier,
+                                                                                const std::string &edge_name, float32_t nms_iou_threshold)
+{
+    ConfiguredNetworkGroup_set_nms_iou_threshold_Request request;
+    auto proto_identifier = request.mutable_identifier();
+    ConfiguredNetworkGroup_convert_identifier_to_proto(identifier, proto_identifier);
+    request.set_edge_name(edge_name);
+    request.set_nms_iou_th(nms_iou_threshold);
+
+    ConfiguredNetworkGroup_set_nms_iou_threshold_Reply reply;
+    ClientContextWithTimeout context;
+    grpc::Status status = m_stub->ConfiguredNetworkGroup_set_nms_iou_threshold(&context, request, &reply);
+    CHECK_GRPC_STATUS(status);
+    assert(reply.status() < HAILO_STATUS_COUNT);
+    return static_cast<hailo_status>(reply.status());
+}
+
+hailo_status HailoRtRpcClient::ConfiguredNetworkGroup_set_nms_max_bboxes_per_class(const NetworkGroupIdentifier &identifier,
+                                                                                    const std::string &edge_name, uint32_t max_bboxes)
+{
+    ConfiguredNetworkGroup_set_nms_max_bboxes_per_class_Request request;
+    auto proto_identifier = request.mutable_identifier();
+    ConfiguredNetworkGroup_convert_identifier_to_proto(identifier, proto_identifier);
+    request.set_edge_name(edge_name);
+    request.set_nms_max_bboxes_per_class(max_bboxes);
+
+    ConfiguredNetworkGroup_set_nms_max_bboxes_per_class_Reply reply;
+    ClientContextWithTimeout context;
+    grpc::Status status = m_stub->ConfiguredNetworkGroup_set_nms_max_bboxes_per_class(&context, request, &reply);
+    CHECK_GRPC_STATUS(status);
+    assert(reply.status() < HAILO_STATUS_COUNT);
+    return static_cast<hailo_status>(reply.status());
+}
+
 Expected<std::vector<std::string>> HailoRtRpcClient::ConfiguredNetworkGroup_get_stream_names_from_vstream_name(const NetworkGroupIdentifier &identifier,
     const std::string &vstream_name)
 {
@@ -908,6 +1434,42 @@ Expected<std::vector<std::string>> HailoRtRpcClient::ConfiguredNetworkGroup_get_
     return result;
 }
 
+hailo_status HailoRtRpcClient::ConfiguredNetworkGroup_infer_async(const NetworkGroupIdentifier &identifier,
+   const std::vector<std::tuple<callback_idx_t, std::string, MemoryView>> &cb_idx_to_stream_buffer,
+   const callback_idx_t infer_request_done_cb, const std::unordered_set<std::string> &input_streams_names)
+{
+    ConfiguredNetworkGroup_infer_async_Request request;
+    ConfiguredNetworkGroup_infer_async_Reply reply;
+    auto proto_identifier = request.mutable_identifier();
+    ConfiguredNetworkGroup_convert_identifier_to_proto(identifier, proto_identifier);
+    auto proto_transfer_buffers = request.mutable_transfer_requests();
+    for (const auto &idx_named_buffer : cb_idx_to_stream_buffer) {
+        ProtoTransferRequest proto_transfer_request;
+        proto_transfer_request.set_cb_idx(std::get<0>(idx_named_buffer));
+        const auto &stream_name = std::get<1>(idx_named_buffer);
+        proto_transfer_request.set_stream_name(stream_name);
+        if (contains(input_streams_names, stream_name)) {
+            proto_transfer_request.set_direction(HAILO_H2D_STREAM);
+            proto_transfer_request.set_data(std::get<2>(idx_named_buffer).data(), std::get<2>(idx_named_buffer).size());
+        } else {
+            proto_transfer_request.set_direction(HAILO_D2H_STREAM);
+        }
+        proto_transfer_request.set_size(static_cast<uint32_t>(std::get<2>(idx_named_buffer).size()));
+        proto_transfer_buffers->Add(std::move(proto_transfer_request));
+    }
+    request.set_infer_request_done_cb_idx(infer_request_done_cb);
+
+    ClientContextWithTimeout context;
+    grpc::Status status = m_stub->ConfiguredNetworkGroup_infer_async(&context, request, &reply);
+    assert(reply.status() < HAILO_STATUS_COUNT);
+    if (reply.status() == HAILO_STREAM_ABORTED_BY_USER) {
+        return static_cast<hailo_status>(reply.status());
+    }
+    CHECK_GRPC_STATUS(status);
+    CHECK_SUCCESS(static_cast<hailo_status>(reply.status()));
+    return HAILO_SUCCESS;
+}
+
 Expected<bool> HailoRtRpcClient::InputVStream_is_multi_planar(const VStreamIdentifier &identifier)
 {
     InputVStream_is_multi_planar_Request request;
diff --git a/hailort/libhailort/src/service/hailort_rpc_client.hpp b/hailort/libhailort/src/service/hailort_rpc_client.hpp
index 4b3d70b..7be961d 100644
--- a/hailort/libhailort/src/service/hailort_rpc_client.hpp
+++ b/hailort/libhailort/src/service/hailort_rpc_client.hpp
@@ -36,6 +36,7 @@ namespace hailort
 
 // Higher then default-hrt-timeout so we can differentiate errors
 static const std::chrono::milliseconds CONTEXT_TIMEOUT(HAILO_DEFAULT_VSTREAM_TIMEOUT_MS + 500);
+using callback_idx_t = uint32_t;
 
 class ClientContextWithTimeout : public grpc::ClientContext {
 public:
@@ -59,14 +60,16 @@ public:
     Expected<std::vector<std::unique_ptr<Device>>> VDevice_get_physical_devices(const VDeviceIdentifier &identifier);
     Expected<hailo_stream_interface_t> VDevice_get_default_streams_interface(const VDeviceIdentifier &identifier);
     Expected<std::vector<uint32_t>> VDevice_configure(const VDeviceIdentifier &identifier, const Hef &hef, uint32_t pid, const NetworkGroupsParamsMap &configure_params={});
+    Expected<ProtoCallbackIdentifier> VDevice_get_callback_id(const VDeviceIdentifier &identifier);
+    hailo_status VDevice_finish_callback_listener(const VDeviceIdentifier &identifier);
 
     Expected<uint32_t> ConfiguredNetworkGroup_dup_handle(const NetworkGroupIdentifier &identifier, uint32_t pid);
     hailo_status ConfiguredNetworkGroup_release(const NetworkGroupIdentifier &identifier, uint32_t pid);
     Expected<std::map<std::string, hailo_vstream_params_t>> ConfiguredNetworkGroup_make_input_vstream_params(const NetworkGroupIdentifier &identifier,
-        bool quantized, hailo_format_type_t format_type, uint32_t timeout_ms, uint32_t queue_size,
+        hailo_format_type_t format_type, uint32_t timeout_ms, uint32_t queue_size,
         const std::string &network_name);
     Expected<std::map<std::string, hailo_vstream_params_t>> ConfiguredNetworkGroup_make_output_vstream_params(const NetworkGroupIdentifier &identifier,
-        bool quantized, hailo_format_type_t format_type, uint32_t timeout_ms, uint32_t queue_size,
+        hailo_format_type_t format_type, uint32_t timeout_ms, uint32_t queue_size,
         const std::string &network_name);
     Expected<std::string> ConfiguredNetworkGroup_get_network_group_name(const NetworkGroupIdentifier &identifier);
     Expected<std::string> ConfiguredNetworkGroup_name(const NetworkGroupIdentifier &identifier);
@@ -74,7 +77,8 @@ public:
     Expected<std::vector<hailo_stream_info_t>> ConfiguredNetworkGroup_get_all_stream_infos(const NetworkGroupIdentifier &identifier, const std::string &network_name);
     Expected<hailo_stream_interface_t> ConfiguredNetworkGroup_get_default_stream_interface(const NetworkGroupIdentifier &identifier);
     Expected<std::vector<std::map<std::string, hailo_vstream_params_t>>> ConfiguredNetworkGroup_make_output_vstream_params_groups(const NetworkGroupIdentifier &identifier,
-        bool quantized, hailo_format_type_t format_type, uint32_t timeout_ms, uint32_t queue_size);
+        hailo_format_type_t format_type, uint32_t timeout_ms, uint32_t queue_size);
+    hailo_status ConfiguredNetworkGroup_shutdown(const NetworkGroupIdentifier &identifier);
     Expected<std::vector<std::vector<std::string>>> ConfiguredNetworkGroup_get_output_vstream_groups(const NetworkGroupIdentifier &identifier);
     Expected<std::vector<hailo_vstream_info_t>> ConfiguredNetworkGroup_get_input_vstream_infos(const NetworkGroupIdentifier &identifier, std::string network_name);
     Expected<std::vector<hailo_vstream_info_t>> ConfiguredNetworkGroup_get_output_vstream_infos(const NetworkGroupIdentifier &identifier, std::string network_name);
@@ -88,8 +92,17 @@ public:
     Expected<bool> ConfiguredNetworkGroup_is_multi_context(const NetworkGroupIdentifier &identifier);
     Expected<ConfigureNetworkParams> ConfiguredNetworkGroup_get_config_params(const NetworkGroupIdentifier &identifier);
     Expected<std::vector<std::string>> ConfiguredNetworkGroup_get_sorted_output_names(const NetworkGroupIdentifier &identifier);
+    Expected<size_t> ConfiguredNetworkGroup_get_min_buffer_pool_size(const NetworkGroupIdentifier &identifier);
+    Expected<std::unique_ptr<LayerInfo>> ConfiguredNetworkGroup_get_layer_info(const NetworkGroupIdentifier &identifier, const std::string &stream_name);
+    Expected<std::vector<net_flow::PostProcessOpMetadataPtr>> ConfiguredNetworkGroup_get_ops_metadata(const NetworkGroupIdentifier &identifier);
+    hailo_status ConfiguredNetworkGroup_set_nms_score_threshold(const NetworkGroupIdentifier &identifier, const std::string &edge_name, float32_t nms_score_th);
+    hailo_status ConfiguredNetworkGroup_set_nms_iou_threshold(const NetworkGroupIdentifier &identifier, const std::string &edge_name, float32_t iou_th);
+    hailo_status ConfiguredNetworkGroup_set_nms_max_bboxes_per_class(const NetworkGroupIdentifier &identifier, const std::string &edge_name, uint32_t max_bboxes);
     Expected<std::vector<std::string>> ConfiguredNetworkGroup_get_stream_names_from_vstream_name(const NetworkGroupIdentifier &identifier, const std::string &vstream_name);
     Expected<std::vector<std::string>> ConfiguredNetworkGroup_get_vstream_names_from_stream_name(const NetworkGroupIdentifier &identifier, const std::string &stream_name);
+    hailo_status ConfiguredNetworkGroup_infer_async(const NetworkGroupIdentifier &identifier,
+        const std::vector<std::tuple<callback_idx_t, std::string, MemoryView>> &cb_idx_to_stream_buffer,
+        const callback_idx_t infer_request_done_cb, const std::unordered_set<std::string> &input_streams_names);
 
     Expected<std::vector<uint32_t>> InputVStreams_create(const NetworkGroupIdentifier &identifier,
         const std::map<std::string, hailo_vstream_params_t> &inputs_params, uint32_t pid);
diff --git a/hailort/libhailort/src/service/network_group_client.cpp b/hailort/libhailort/src/service/network_group_client.cpp
index b085bba..89b32ec 100644
--- a/hailort/libhailort/src/service/network_group_client.cpp
+++ b/hailort/libhailort/src/service/network_group_client.cpp
@@ -15,6 +15,7 @@
 
 #include "network_group/network_group_internal.hpp"
 #include "net_flow/pipeline/vstream_internal.hpp"
+#include "net_flow/ops/nms_post_process.hpp"
 #include "rpc_client_utils.hpp"
 
 
@@ -22,8 +23,10 @@ namespace hailort
 {
 
 ConfiguredNetworkGroupClient::ConfiguredNetworkGroupClient(std::unique_ptr<HailoRtRpcClient> client, NetworkGroupIdentifier &&identifier) :
+    ConfiguredNetworkGroup(),
     m_client(std::move(client)),
-    m_identifier(identifier)
+    m_identifier(identifier),
+    m_current_cb_index(0)
 {
     auto reply = m_client->ConfiguredNetworkGroup_name(m_identifier);
     if (!reply) {
@@ -31,11 +34,26 @@ ConfiguredNetworkGroupClient::ConfiguredNetworkGroupClient(std::unique_ptr<Hailo
         return;
     }
     m_network_group_name = reply.value();
+
+    auto streams_infos = get_all_stream_infos();
+    if (!streams_infos) {
+        LOGGER__ERROR("get_all_stream_infos failed with status {}", reply.status());
+        return;
+    }
+    for (auto &stream_info : streams_infos.value()) {
+        if (stream_info.direction == HAILO_H2D_STREAM) {
+            m_input_streams_names.insert(stream_info.name);
+        } else {
+            m_output_streams_names.insert(stream_info.name);
+        }
+    }
 }
 
 ConfiguredNetworkGroupClient::ConfiguredNetworkGroupClient(NetworkGroupIdentifier &&identifier, const std::string &network_group_name) :
+    ConfiguredNetworkGroup(),
     m_identifier(identifier),
-    m_network_group_name(network_group_name)
+    m_network_group_name(network_group_name),
+    m_current_cb_index(0)
 {}
 
 Expected<std::shared_ptr<ConfiguredNetworkGroupClient>> ConfiguredNetworkGroupClient::duplicate_network_group_client(uint32_t ng_handle, uint32_t vdevice_handle,
@@ -192,32 +210,37 @@ hailo_status ConfiguredNetworkGroupClient::wait_for_activation(const std::chrono
     return HAILO_INVALID_OPERATION;
 }
 
+hailo_status ConfiguredNetworkGroupClient::shutdown()
+{
+    return m_client->ConfiguredNetworkGroup_shutdown(m_identifier);
+}
+
 Expected<std::vector<std::vector<std::string>>> ConfiguredNetworkGroupClient::get_output_vstream_groups()
 {
     return m_client->ConfiguredNetworkGroup_get_output_vstream_groups(m_identifier);
 }
 
 Expected<std::vector<std::map<std::string, hailo_vstream_params_t>>> ConfiguredNetworkGroupClient::make_output_vstream_params_groups(
-    bool quantized, hailo_format_type_t format_type, uint32_t timeout_ms, uint32_t queue_size)
+    bool /*unused*/, hailo_format_type_t format_type, uint32_t timeout_ms, uint32_t queue_size)
 {
     return m_client->ConfiguredNetworkGroup_make_output_vstream_params_groups(m_identifier,
-        quantized, format_type, timeout_ms, queue_size);
+        format_type, timeout_ms, queue_size);
 }
 
 Expected<std::map<std::string, hailo_vstream_params_t>> ConfiguredNetworkGroupClient::make_input_vstream_params(
-    bool quantized, hailo_format_type_t format_type, uint32_t timeout_ms, uint32_t queue_size,
+    bool /*unused*/, hailo_format_type_t format_type, uint32_t timeout_ms, uint32_t queue_size,
     const std::string &network_name)
 {
     return m_client->ConfiguredNetworkGroup_make_input_vstream_params(m_identifier,
-        quantized, format_type, timeout_ms, queue_size, network_name);
+        format_type, timeout_ms, queue_size, network_name);
 }
 
 Expected<std::map<std::string, hailo_vstream_params_t>> ConfiguredNetworkGroupClient::make_output_vstream_params(
-    bool quantized, hailo_format_type_t format_type, uint32_t timeout_ms, uint32_t queue_size,
+    bool /*unused*/, hailo_format_type_t format_type, uint32_t timeout_ms, uint32_t queue_size,
     const std::string &network_name)
 {
     return m_client->ConfiguredNetworkGroup_make_output_vstream_params(m_identifier,
-        quantized, format_type, timeout_ms, queue_size, network_name);
+        format_type, timeout_ms, queue_size, network_name);
 }
 
 Expected<std::vector<hailo_stream_info_t>> ConfiguredNetworkGroupClient::get_all_stream_infos(const std::string &network_name) const
@@ -360,4 +383,136 @@ Expected<std::vector<OutputVStream>> ConfiguredNetworkGroupClient::create_output
     return vstreams;
 }
 
+Expected<size_t> ConfiguredNetworkGroupClient::get_min_buffer_pool_size()
+{
+    return m_client->ConfiguredNetworkGroup_get_min_buffer_pool_size(m_identifier);
+}
+
+Expected<std::unique_ptr<LayerInfo>> ConfiguredNetworkGroupClient::get_layer_info(const std::string &stream_name)
+{
+    return m_client->ConfiguredNetworkGroup_get_layer_info(m_identifier, stream_name);
+}
+
+Expected<std::vector<net_flow::PostProcessOpMetadataPtr>> ConfiguredNetworkGroupClient::get_ops_metadata()
+{
+    return m_client->ConfiguredNetworkGroup_get_ops_metadata(m_identifier);
+}
+
+hailo_status ConfiguredNetworkGroupClient::set_nms_score_threshold(const std::string &edge_name, float32_t nms_score_threshold)
+{
+    return m_client->ConfiguredNetworkGroup_set_nms_score_threshold(m_identifier, edge_name, nms_score_threshold);
+}
+
+hailo_status ConfiguredNetworkGroupClient::set_nms_iou_threshold(const std::string &edge_name, float32_t iou_threshold)
+{
+    return m_client->ConfiguredNetworkGroup_set_nms_iou_threshold(m_identifier, edge_name, iou_threshold);
+}
+
+hailo_status ConfiguredNetworkGroupClient::set_nms_max_bboxes_per_class(const std::string &edge_name, uint32_t max_bboxes_per_class)
+{
+    return m_client->ConfiguredNetworkGroup_set_nms_max_bboxes_per_class(m_identifier, edge_name, max_bboxes_per_class);
+}
+
+hailo_status ConfiguredNetworkGroupClient::execute_callback(const ProtoCallbackIdentifier &cb_id)
+{
+    if (cb_id.cb_type() == CALLBACK_TYPE_TRANSFER) {
+        execute_transfer_callback(cb_id);
+    } else if (cb_id.cb_type() == CALLBACK_TYPE_INFER_REQUEST) {
+        execute_infer_request_callback(cb_id);
+    } else {
+        LOGGER__ERROR("Got invalid callback type = {}", cb_id.cb_type());
+        return HAILO_INTERNAL_FAILURE;
+    }
+
+    return HAILO_SUCCESS;
+}
+
+hailo_status ConfiguredNetworkGroupClient::execute_infer_request_callback(const ProtoCallbackIdentifier &cb_id)
+{
+    std::function<void(hailo_status)> cb;
+    {
+        std::unique_lock<std::mutex> lock(m_mutex);
+        CHECK(contains(m_infer_request_idx_to_callbacks, cb_id.cb_idx()), HAILO_NOT_FOUND);
+        cb = m_infer_request_idx_to_callbacks.at(cb_id.cb_idx());
+        m_infer_request_idx_to_callbacks.erase(cb_id.cb_idx());
+    }
+    cb(static_cast<hailo_status>(cb_id.status()));
+
+    return HAILO_SUCCESS;
+}
+
+hailo_status ConfiguredNetworkGroupClient::execute_transfer_callback(const ProtoCallbackIdentifier &cb_id)
+{
+    NamedBufferCallbackTuplePtr name_buffer_callback_ptr;
+    {
+        std::unique_lock<std::mutex> lock(m_mutex);
+        CHECK(contains(m_idx_to_callbacks, cb_id.cb_idx()), HAILO_NOT_FOUND);
+        name_buffer_callback_ptr = m_idx_to_callbacks.at(cb_id.cb_idx());
+        m_idx_to_callbacks.erase(cb_id.cb_idx());
+    }
+    const auto &stream_name = cb_id.stream_name();
+    CHECK((std::get<0>(*name_buffer_callback_ptr.get()) == stream_name), HAILO_INTERNAL_FAILURE,
+        "Callback identifier does not match stream name {}", stream_name);
+    if (contains(m_output_streams_names, stream_name)) {
+        memcpy(std::get<1>(*name_buffer_callback_ptr.get()).data(), cb_id.data().data(), cb_id.data().size());
+    }
+    std::get<2>(*name_buffer_callback_ptr.get())(static_cast<hailo_status>(cb_id.status()));
+
+    return HAILO_SUCCESS;
+}
+
+callback_idx_t ConfiguredNetworkGroupClient::get_unique_callback_idx()
+{
+    return m_current_cb_index.fetch_add(1);
+}
+
+hailo_status ConfiguredNetworkGroupClient::infer_async(const NamedBuffersCallbacks &named_buffers_callbacks,
+    const std::function<void(hailo_status)> &infer_request_done_cb)
+{
+    std::vector<std::tuple<callback_idx_t, std::string, MemoryView>> cb_idx_to_stream_buffer;
+    cb_idx_to_stream_buffer.reserve(named_buffers_callbacks.size());
+    {
+        std::unique_lock<std::mutex> lock(m_mutex);
+        for (const auto &name_buffer_cb : named_buffers_callbacks) {
+            auto cb_idx = get_unique_callback_idx();
+            auto name_buffer_cb_tuple = std::make_tuple(name_buffer_cb.first, name_buffer_cb.second.first, name_buffer_cb.second.second);
+            auto tuple_ptr = make_shared_nothrow<NamedBufferCallbackTuple>(name_buffer_cb_tuple);
+            CHECK_NOT_NULL(tuple_ptr, HAILO_OUT_OF_HOST_MEMORY);
+
+            m_idx_to_callbacks.emplace(cb_idx, tuple_ptr);
+            cb_idx_to_stream_buffer.emplace_back(std::make_tuple(cb_idx, name_buffer_cb.first, name_buffer_cb.second.first));
+        }
+    }
+
+    auto infer_request_callback = [this, infer_request_done_cb](hailo_status status){
+        if (status == HAILO_STREAM_ABORTED_BY_USER) {
+            LOGGER__INFO("Infer request was aborted by user");
+        }
+        else if (status != HAILO_SUCCESS) {
+            LOGGER__ERROR("Infer request callback failed with status = {}", status);
+        }
+
+        infer_request_done_cb(status);
+        decrease_ongoing_callbacks();
+    };
+
+    auto infer_request_cb_idx = 0;
+    {
+        std::unique_lock<std::mutex> lock(m_mutex);
+        infer_request_cb_idx = get_unique_callback_idx();
+        m_infer_request_idx_to_callbacks.emplace(infer_request_cb_idx, infer_request_callback);
+    }
+
+    increase_ongoing_callbacks();
+    auto status = m_client->ConfiguredNetworkGroup_infer_async(m_identifier, cb_idx_to_stream_buffer,
+        infer_request_cb_idx, m_input_streams_names);
+    if (status == HAILO_STREAM_ABORTED_BY_USER) {
+        LOGGER__INFO("Infer request was aborted by user");
+        return status;
+    }
+    CHECK_SUCCESS(status);
+
+    return HAILO_SUCCESS;
+}
+
 } /* namespace hailort */
\ No newline at end of file
diff --git a/hailort/libhailort/src/stream_common/async_stream_base.cpp b/hailort/libhailort/src/stream_common/async_stream_base.cpp
index 58278d8..0968aee 100644
--- a/hailort/libhailort/src/stream_common/async_stream_base.cpp
+++ b/hailort/libhailort/src/stream_common/async_stream_base.cpp
@@ -7,6 +7,7 @@
  **/
 
 #include "async_stream_base.hpp"
+#include "common/os_utils.hpp"
 
 namespace hailort
 {
@@ -27,18 +28,24 @@ static const char *get_buffer_mode_api_name(StreamBufferMode mode)
     }
 }
 
-AsyncInputStreamBase::AsyncInputStreamBase(const LayerInfo &edge_layer,
-    hailo_stream_interface_t stream_interface, EventPtr core_op_activated_event, hailo_status &status) :
-        InputStreamBase(edge_layer, stream_interface, core_op_activated_event, status),
+AsyncInputStreamBase::AsyncInputStreamBase(const LayerInfo &edge_layer, EventPtr core_op_activated_event,
+    hailo_status &status) :
+        InputStreamBase(edge_layer, core_op_activated_event, status),
         m_is_stream_activated(false),
         m_is_aborted(false),
         m_timeout(DEFAULT_TRANSFER_TIMEOUT),
         m_buffer_mode(StreamBufferMode::NOT_SET),
-        m_ongoing_transfers(0),
-        m_interrupt_callback(ignore_interrupts_callback)
-{}
+        m_ongoing_transfers(0)
+{
+    // Checking status for base class c'tor
+    if (HAILO_SUCCESS != status) {
+        return;
+    }
 
-hailo_status AsyncInputStreamBase::abort()
+    status = HAILO_SUCCESS;
+}
+
+hailo_status AsyncInputStreamBase::abort_impl()
 {
     {
         std::lock_guard<std::mutex> lock(m_stream_mutex);
@@ -48,7 +55,7 @@ hailo_status AsyncInputStreamBase::abort()
     return HAILO_SUCCESS;
 }
 
-hailo_status AsyncInputStreamBase::clear_abort()
+hailo_status AsyncInputStreamBase::clear_abort_impl()
 {
     {
         std::lock_guard<std::mutex> lock(m_stream_mutex);
@@ -58,16 +65,6 @@ hailo_status AsyncInputStreamBase::clear_abort()
     return HAILO_SUCCESS;
 }
 
-void AsyncInputStreamBase::notify_all()
-{
-    {
-        // Acquire mutex to make sure the notify_all will wake the blocking threads on the cv.
-        std::unique_lock<std::mutex> lock(m_stream_mutex);
-    }
-
-    m_has_ready_buffer.notify_all();
-}
-
 hailo_status AsyncInputStreamBase::set_buffer_mode(StreamBufferMode buffer_mode)
 {
     CHECK(StreamBufferMode::NOT_SET != buffer_mode, HAILO_INVALID_OPERATION, "Can't set buffer mode to NOT_SET");
@@ -117,14 +114,14 @@ hailo_status AsyncInputStreamBase::flush()
     });
 }
 
-hailo_status AsyncInputStreamBase::write_impl(const MemoryView &user_buffer, std::function<bool()> should_cancel)
+hailo_status AsyncInputStreamBase::write_impl(const MemoryView &user_buffer)
 {
     auto status = set_buffer_mode(StreamBufferMode::OWNING);
     CHECK_SUCCESS(status);
 
     std::unique_lock<std::mutex> lock(m_stream_mutex);
     auto is_ready = [this]() { return is_ready_for_transfer() && is_ready_for_dequeue(); };
-    status = cv_wait_for(lock, m_timeout, is_ready, should_cancel);
+    status = cv_wait_for(lock, m_timeout, is_ready);
     if (HAILO_SUCCESS != status) {
         // errors logs on cv_wait_for
         return status;
@@ -137,8 +134,7 @@ hailo_status AsyncInputStreamBase::write_impl(const MemoryView &user_buffer, std
     status = stream_buffer.copy_from(user_buffer);
     CHECK_SUCCESS(status);
 
-    return call_write_async_impl(TransferRequest{
-        stream_buffer,
+    return call_write_async_impl(TransferRequest(std::move(stream_buffer),
         [this, stream_buffer](hailo_status) {
             std::unique_lock<std::mutex> lock(m_stream_mutex);
             auto enqueue_status = m_buffer_pool->enqueue(TransferBuffer{stream_buffer});
@@ -146,25 +142,7 @@ hailo_status AsyncInputStreamBase::write_impl(const MemoryView &user_buffer, std
                 LOGGER__ERROR("Failed enqueue stream buffer {}", enqueue_status);
             }
         }
-    });
-}
-
-hailo_status AsyncInputStreamBase::write_impl(const MemoryView &user_buffer)
-{
-    const auto SHOULD_CANCEL = []() { return false; };
-    return write_impl(user_buffer, SHOULD_CANCEL);
-}
-
-hailo_status AsyncInputStreamBase::register_interrupt_callback(const ProcessingCompleteCallback &callback)
-{
-    std::unique_lock<std::mutex> lock(m_stream_mutex);
-    m_interrupt_callback = callback;
-    return HAILO_SUCCESS;
-}
-
-Expected<size_t> AsyncInputStreamBase::get_buffer_frames_size() const
-{
-    return get_max_ongoing_transfers();
+    ));
 }
 
 Expected<size_t> AsyncInputStreamBase::get_async_max_queue_size() const
@@ -192,6 +170,13 @@ hailo_status AsyncInputStreamBase::write_async(TransferRequest &&transfer_reques
     CHECK_SUCCESS(status);
 
     std::unique_lock<std::mutex> lock(m_stream_mutex);
+
+    if (m_is_aborted) {
+        return HAILO_STREAM_ABORTED_BY_USER;
+    } else if (!m_is_stream_activated) {
+        return HAILO_STREAM_NOT_ACTIVATED;
+    }
+
     return call_write_async_impl(std::move(transfer_request));
 }
 
@@ -234,12 +219,6 @@ hailo_status AsyncInputStreamBase::deactivate_stream()
 hailo_status AsyncInputStreamBase::call_write_async_impl(TransferRequest &&transfer_request)
 {
     transfer_request.callback = [this, callback=transfer_request.callback](hailo_status callback_status) {
-        if (HAILO_SUCCESS == callback_status) {
-            // Calling interrupt callback first (only if successful), since callback() may update the state (and we call
-            // interrupt_callback before the state is activated).
-            m_interrupt_callback();
-        }
-
         callback(callback_status);
 
         {
@@ -272,18 +251,17 @@ bool AsyncInputStreamBase::is_ready_for_dequeue() const
     return m_ongoing_transfers < m_buffer_pool->max_queue_size();
 }
 
-AsyncOutputStreamBase::AsyncOutputStreamBase(const LayerInfo &edge_layer, hailo_stream_interface_t interface,
-    EventPtr core_op_activated_event, hailo_status &status) :
-        OutputStreamBase(edge_layer, interface, std::move(core_op_activated_event), status),
+AsyncOutputStreamBase::AsyncOutputStreamBase(const LayerInfo &edge_layer, EventPtr core_op_activated_event,
+    hailo_status &status) :
+        OutputStreamBase(edge_layer, std::move(core_op_activated_event), status),
         m_is_stream_activated(false),
         m_is_aborted(false),
         m_timeout(DEFAULT_TRANSFER_TIMEOUT),
         m_buffer_mode(StreamBufferMode::NOT_SET),
-        m_ongoing_transfers(0),
-        m_interrupt_callback(ignore_interrupts_callback)
+        m_ongoing_transfers(0)
 {}
 
-hailo_status AsyncOutputStreamBase::abort()
+hailo_status AsyncOutputStreamBase::abort_impl()
 {
     {
         std::lock_guard<std::mutex> lock(m_stream_mutex);
@@ -293,7 +271,7 @@ hailo_status AsyncOutputStreamBase::abort()
     return HAILO_SUCCESS;
 }
 
-hailo_status AsyncOutputStreamBase::clear_abort()
+hailo_status AsyncOutputStreamBase::clear_abort_impl()
 {
     {
         std::lock_guard<std::mutex> lock(m_stream_mutex);
@@ -327,18 +305,19 @@ hailo_status AsyncOutputStreamBase::read_async(TransferRequest &&transfer_reques
     CHECK_SUCCESS(status);
 
     std::unique_lock<std::mutex> lock(m_stream_mutex);
+
+    if (m_is_aborted) {
+        return HAILO_STREAM_ABORTED_BY_USER;
+    } else if (!m_is_stream_activated) {
+        return HAILO_STREAM_NOT_ACTIVATED;
+    }
+
     return call_read_async_impl(std::move(transfer_request));
 }
 
 hailo_status AsyncOutputStreamBase::call_read_async_impl(TransferRequest &&transfer_request)
 {
     transfer_request.callback = [this, callback=transfer_request.callback](hailo_status callback_status) {
-        if (HAILO_SUCCESS == callback_status) {
-            // Calling interrupt callback first (only if successful), since callback() may update the state (and we call
-            // interrupt_callback before the state is activated).
-            m_interrupt_callback();
-        }
-
         callback(callback_status);
 
         {
@@ -349,7 +328,6 @@ hailo_status AsyncOutputStreamBase::call_read_async_impl(TransferRequest &&trans
         m_has_ready_buffer.notify_all();
     };
 
-
     auto status = read_async_impl(std::move(transfer_request));
     if (HAILO_STREAM_ABORTED_BY_USER == status) {
         return status;
@@ -361,13 +339,6 @@ hailo_status AsyncOutputStreamBase::call_read_async_impl(TransferRequest &&trans
     return HAILO_SUCCESS;
 }
 
-hailo_status AsyncOutputStreamBase::register_interrupt_callback(const ProcessingCompleteCallback &callback)
-{
-    std::unique_lock<std::mutex> lock(m_stream_mutex);
-    m_interrupt_callback = callback;
-    return HAILO_SUCCESS;
-}
-
 hailo_status AsyncOutputStreamBase::activate_stream()
 {
     std::unique_lock<std::mutex> lock(m_stream_mutex);
@@ -420,12 +391,8 @@ bool AsyncOutputStreamBase::is_ready_for_transfer() const
 
 hailo_status AsyncOutputStreamBase::prepare_all_transfers()
 {
-    const auto max_transfers_in_buffer = get_buffer_frames_size();
-    CHECK_EXPECTED_AS_STATUS(max_transfers_in_buffer);
-
-    assert(*max_transfers_in_buffer >= m_pending_buffers.size());
-    const auto transfers_count = *max_transfers_in_buffer - m_pending_buffers.size();
-    for (size_t i = 0; i < transfers_count; i++) {
+    const auto queue_size = get_max_ongoing_transfers();
+    for (size_t i = 0; i < queue_size; i++) {
         auto status = dequeue_and_launch_transfer();
         CHECK_SUCCESS(status);
     }
@@ -474,12 +441,6 @@ std::chrono::milliseconds AsyncOutputStreamBase::get_timeout() const
     return m_timeout;
 }
 
-Expected<size_t> AsyncOutputStreamBase::get_buffer_frames_size() const
-{
-    return get_max_ongoing_transfers();
-}
-
-
 hailo_status AsyncOutputStreamBase::read_impl(MemoryView user_buffer)
 {
     auto status = set_buffer_mode(StreamBufferMode::OWNING);
@@ -517,7 +478,7 @@ hailo_status AsyncOutputStreamBase::dequeue_and_launch_transfer()
     auto buffer = m_buffer_pool->dequeue();
     CHECK_EXPECTED_AS_STATUS(buffer);
 
-    auto callback  = [this, buffer=buffer.value()](hailo_status status) {
+    auto callback = [this, buffer=buffer.value()](hailo_status status) {
         if (HAILO_STREAM_ABORTED_BY_USER == status) {
             // On deactivation flow, we should get this status. We just ignore the callback here, and in the next
             // activation we should reset the buffers.
@@ -530,7 +491,7 @@ hailo_status AsyncOutputStreamBase::dequeue_and_launch_transfer()
         }
     };
 
-    auto status = call_read_async_impl(TransferRequest{buffer.value(), callback});
+    auto status = call_read_async_impl(TransferRequest(std::move(buffer.value()), callback));
     if (HAILO_STREAM_ABORTED_BY_USER == status) {
         // The buffer_pool state will reset on next activation.
         return status;
diff --git a/hailort/libhailort/src/stream_common/async_stream_base.hpp b/hailort/libhailort/src/stream_common/async_stream_base.hpp
index 20335a5..48640a3 100644
--- a/hailort/libhailort/src/stream_common/async_stream_base.hpp
+++ b/hailort/libhailort/src/stream_common/async_stream_base.hpp
@@ -14,6 +14,7 @@
 
 #include "stream_common/stream_internal.hpp"
 #include "stream_common/stream_buffer_pool.hpp"
+#include "queued_stream_buffer_pool.hpp"
 
 #include "utils/thread_safe_queue.hpp"
 
@@ -22,26 +23,21 @@ namespace hailort
 
 class AsyncInputStreamBase : public InputStreamBase {
 public:
-    AsyncInputStreamBase(const LayerInfo &edge_layer,
-        hailo_stream_interface_t stream_interface, EventPtr core_op_activated_event, hailo_status &status);
+    AsyncInputStreamBase(const LayerInfo &edge_layer, EventPtr core_op_activated_event,
+        hailo_status &status);
 
     virtual hailo_status set_buffer_mode(StreamBufferMode buffer_mode) override;
     virtual std::chrono::milliseconds get_timeout() const override;
     virtual hailo_status set_timeout(std::chrono::milliseconds timeout) override;
     virtual hailo_status flush() override;
 
-    virtual hailo_status abort() override;
-    virtual hailo_status clear_abort() override;
+    virtual hailo_status abort_impl() override;
+    virtual hailo_status clear_abort_impl() override;
 
-    virtual void notify_all() override;
-
-    virtual hailo_status register_interrupt_callback(const ProcessingCompleteCallback &callback) override;
-    virtual Expected<size_t> get_buffer_frames_size() const override;
     virtual Expected<size_t> get_async_max_queue_size() const override;
     virtual hailo_status wait_for_async_ready(size_t transfer_size, std::chrono::milliseconds timeout) override;
     virtual hailo_status write_async(TransferRequest &&transfer_request) override;
 
-    virtual hailo_status write_impl(const MemoryView &buffer, std::function<bool()> should_cancel);
     virtual hailo_status write_impl(const MemoryView &buffer) override;
 
     virtual hailo_status activate_stream() override;
@@ -63,16 +59,13 @@ private:
     bool is_ready_for_transfer() const;
     bool is_ready_for_dequeue() const;
 
-    static void ignore_interrupts_callback() {}
-
     template<typename Pred>
-    hailo_status cv_wait_for(std::unique_lock<std::mutex> &lock, std::chrono::milliseconds timeout, Pred &&pred,
-        std::function<bool()> should_cancel = [](){ return false; })
+    hailo_status cv_wait_for(std::unique_lock<std::mutex> &lock, std::chrono::milliseconds timeout, Pred &&pred)
     {
         hailo_status status = HAILO_SUCCESS;
         const auto wait_done = m_has_ready_buffer.wait_for(lock, timeout,
-            [this, pred, should_cancel, &status] {
-                if (m_is_aborted || should_cancel()) {
+            [this, pred, &status] {
+                if (m_is_aborted) {
                     status = HAILO_STREAM_ABORTED_BY_USER;
                     return true;
                 }
@@ -108,20 +101,16 @@ private:
 
     // Conditional variable that is use to check if we have some buffer in m_buffer_pool ready to be written to.
     std::condition_variable m_has_ready_buffer;
-
-    ProcessingCompleteCallback m_interrupt_callback;
 };
 
 
 class AsyncOutputStreamBase : public OutputStreamBase {
 public:
-    AsyncOutputStreamBase(const LayerInfo &edge_layer, hailo_stream_interface_t stream_interface,
-        EventPtr core_op_activated_event, hailo_status &status);
+    AsyncOutputStreamBase(const LayerInfo &edge_layer, EventPtr core_op_activated_event, hailo_status &status);
 
     virtual hailo_status set_buffer_mode(StreamBufferMode buffer_mode) override;
     virtual std::chrono::milliseconds get_timeout() const override;
     virtual hailo_status set_timeout(std::chrono::milliseconds timeout) override;
-    virtual hailo_status register_interrupt_callback(const ProcessingCompleteCallback &callback) override;
 
     virtual hailo_status wait_for_async_ready(size_t transfer_size, std::chrono::milliseconds timeout) override;
     virtual Expected<size_t> get_async_max_queue_size() const override;
@@ -129,10 +118,8 @@ public:
 
     virtual hailo_status read_impl(MemoryView buffer) override;
 
-    virtual Expected<size_t> get_buffer_frames_size() const override;
-
-    virtual hailo_status abort() override;
-    virtual hailo_status clear_abort() override;
+    virtual hailo_status abort_impl() override;
+    virtual hailo_status clear_abort_impl() override;
 
     virtual hailo_status activate_stream() override;
     virtual hailo_status deactivate_stream() override;
@@ -157,8 +144,6 @@ private:
 
     hailo_status dequeue_and_launch_transfer();
 
-    static void ignore_interrupts_callback() {}
-
     template<typename Pred>
     hailo_status cv_wait_for(std::unique_lock<std::mutex> &lock, std::chrono::milliseconds timeout, Pred &&pred)
     {
@@ -205,8 +190,6 @@ private:
 
     // Conditional variable that is use to check if we have some pending buffer ready to be read.
     std::condition_variable m_has_ready_buffer;
-
-    ProcessingCompleteCallback m_interrupt_callback;
 };
 
 
diff --git a/hailort/libhailort/src/stream_common/nms_stream.cpp b/hailort/libhailort/src/stream_common/nms_stream.cpp
index 988e120..725530b 100644
--- a/hailort/libhailort/src/stream_common/nms_stream.cpp
+++ b/hailort/libhailort/src/stream_common/nms_stream.cpp
@@ -51,6 +51,7 @@
 #include "hef/layer_info.hpp"
 #include "common/os_utils.hpp"
 #include "stream_common/queued_stream_buffer_pool.hpp"
+#include "utils/profiler/tracer_macros.hpp"
 
 namespace hailort
 {
@@ -341,6 +342,11 @@ hailo_stream_interface_t NmsOutputStream::get_interface() const
     return m_base_stream->get_interface();
 }
 
+void NmsOutputStream::set_vdevice_core_op_handle(vdevice_core_op_handle_t core_op_handle)
+{
+    return m_base_stream->set_vdevice_core_op_handle(core_op_handle);
+}
+
 Expected<std::unique_ptr<StreamBufferPool>> NmsOutputStream::allocate_buffer_pool()
 {
     const size_t queue_size = m_reader_thread.get_max_ongoing_transfers();
@@ -358,6 +364,10 @@ size_t NmsOutputStream::get_max_ongoing_transfers() const
 
 hailo_status NmsOutputStream::read_async_impl(TransferRequest &&transfer_request)
 {
+    CHECK(1 == transfer_request.transfer_buffers.size(), HAILO_INVALID_OPERATION,
+        "NMS Reader stream supports only 1 transfer buffer");
+    // Currently leave as transfer request - because nms reader uses transfer request queue
+    // TODO HRT-12239: Chagge when support async read with any aligned void ptr
     return m_reader_thread.launch_transfer(std::move(transfer_request));
 }
 
@@ -371,6 +381,12 @@ hailo_status NmsOutputStream::deactivate_stream_impl()
     return m_base_stream->deactivate_stream();
 }
 
+hailo_status NmsOutputStream::cancel_pending_transfers()
+{
+    m_reader_thread.cancel_pending_transfers();
+    return m_base_stream->cancel_pending_transfers();
+}
+
 NmsReaderThread::NmsReaderThread(std::shared_ptr<OutputStreamBase> base_stream, size_t max_queue_size) :
     m_base_stream(base_stream),
     m_queue_max_size(max_queue_size),
@@ -395,7 +411,9 @@ NmsReaderThread::~NmsReaderThread()
 
 hailo_status NmsReaderThread::launch_transfer(TransferRequest &&transfer_request)
 {
-    CHECK(0 == transfer_request.buffer.offset(), HAILO_INVALID_OPERATION,
+    CHECK(1 == transfer_request.transfer_buffers.size(), HAILO_INVALID_OPERATION,
+        "NMS Reader stream supports only 1 transfer buffer");
+    CHECK(0 == transfer_request.transfer_buffers[0].offset(), HAILO_INVALID_OPERATION,
         "NMS stream doesn't support buffer with offset");
 
     {
@@ -404,6 +422,16 @@ hailo_status NmsReaderThread::launch_transfer(TransferRequest &&transfer_request
             return HAILO_QUEUE_IS_FULL;
         }
 
+        if (INVALID_CORE_OP_HANDLE != m_base_stream->get_vdevice_core_op_handle()) {
+            transfer_request.callback = [original_callback=transfer_request.callback, this](hailo_status status) {
+                if (HAILO_SUCCESS == status) {
+                    TRACE(FrameEnqueueD2HTrace, m_base_stream->get_device_id(), m_base_stream->get_vdevice_core_op_handle(),
+                        m_base_stream->name());
+                }
+                original_callback(status);
+            };
+        }
+
         m_queue.emplace(std::move(transfer_request));
     }
     m_queue_cond.notify_one();
@@ -441,8 +469,9 @@ void NmsReaderThread::process_transfer_requests()
             m_queue.pop();
         }
 
-        assert(0 == transfer_request.buffer.offset());
-        auto buffer = transfer_request.buffer.base_buffer();
+        assert(1 == transfer_request.transfer_buffers.size());
+        assert(0 == transfer_request.transfer_buffers[0].offset());
+        auto buffer = transfer_request.transfer_buffers[0].base_buffer();
         auto status = NMSStreamReader::read_nms(*m_base_stream, buffer->data(), 0, buffer->size());
 
         if ((HAILO_STREAM_NOT_ACTIVATED == status) || (HAILO_STREAM_ABORTED_BY_USER == status)) {
@@ -455,4 +484,14 @@ void NmsReaderThread::process_transfer_requests()
     }
 }
 
+void NmsReaderThread::cancel_pending_transfers()
+{
+    std::unique_lock<std::mutex> lock(m_queue_mutex);
+    while(!m_queue.empty()) {
+        auto transfer_request = m_queue.front();
+        m_queue.pop();
+        transfer_request.callback(HAILO_STREAM_ABORTED_BY_USER);
+    }
+}
+
 } /* namespace hailort */
\ No newline at end of file
diff --git a/hailort/libhailort/src/stream_common/nms_stream.hpp b/hailort/libhailort/src/stream_common/nms_stream.hpp
index b6f44ef..4242b28 100644
--- a/hailort/libhailort/src/stream_common/nms_stream.hpp
+++ b/hailort/libhailort/src/stream_common/nms_stream.hpp
@@ -56,6 +56,8 @@ public:
 
     size_t get_max_ongoing_transfers() const;
 
+    void cancel_pending_transfers();
+
 private:
 
     void signal_thread_quit();
@@ -84,11 +86,15 @@ public:
 
     NmsOutputStream(std::shared_ptr<OutputStreamBase> base_stream, const LayerInfo &edge_layer, size_t max_queue_size,
         EventPtr core_op_activated_event, hailo_status &status) :
-            AsyncOutputStreamBase(edge_layer, base_stream->get_interface(), std::move(core_op_activated_event), status),
+            AsyncOutputStreamBase(edge_layer, std::move(core_op_activated_event), status),
             m_base_stream(base_stream),
             m_reader_thread(base_stream, max_queue_size)
     {}
 
+    void set_vdevice_core_op_handle(vdevice_core_op_handle_t core_op_handle) override;
+
+    virtual hailo_status cancel_pending_transfers() override;
+
 protected:
     virtual Expected<std::unique_ptr<StreamBufferPool>> allocate_buffer_pool() override;
     virtual size_t get_max_ongoing_transfers() const override;
diff --git a/hailort/libhailort/src/stream_common/remote_process_stream.cpp b/hailort/libhailort/src/stream_common/remote_process_stream.cpp
index 5fae0a1..f231a17 100644
--- a/hailort/libhailort/src/stream_common/remote_process_stream.cpp
+++ b/hailort/libhailort/src/stream_common/remote_process_stream.cpp
@@ -170,8 +170,8 @@ RemoteProcessInputStream::~RemoteProcessInputStream()
             // continue
         }
 
-        // Calling abort() to make sure the thread will exit
-        status = abort();
+        // Calling abort_impl() to make sure the thread will exit
+        status = abort_impl();
         if (HAILO_SUCCESS != status) {
             LOGGER__ERROR("Failed to abort stream with {}", status);
             // continue
@@ -198,24 +198,18 @@ hailo_status RemoteProcessInputStream::set_timeout(std::chrono::milliseconds tim
     return HAILO_SUCCESS;
 }
 
-hailo_status RemoteProcessInputStream::abort()
+hailo_status RemoteProcessInputStream::abort_impl()
 {
     m_buffer_pool->abort();
     return HAILO_SUCCESS;
 }
 
-hailo_status RemoteProcessInputStream::clear_abort()
+hailo_status RemoteProcessInputStream::clear_abort_impl()
 {
     m_buffer_pool->clear_abort();
     return HAILO_SUCCESS;
 }
 
-Expected<size_t> RemoteProcessInputStream::get_buffer_frames_size() const
-{
-    // Must be called on main process
-    return make_unexpected(HAILO_INTERNAL_FAILURE);
-}
-
 bool RemoteProcessInputStream::is_scheduled()
 {
     return m_base_stream->is_scheduled();
@@ -259,6 +253,11 @@ hailo_status RemoteProcessInputStream::deactivate_stream()
     return m_base_stream->deactivate_stream();
 }
 
+hailo_status RemoteProcessInputStream::cancel_pending_transfers()
+{
+    return m_base_stream->cancel_pending_transfers();
+}
+
 hailo_status RemoteProcessInputStream::write_impl(const MemoryView &buffer)
 {
     // Get available buffer
@@ -281,8 +280,7 @@ hailo_status RemoteProcessInputStream::write_impl(const MemoryView &buffer)
 
 RemoteProcessInputStream::RemoteProcessInputStream(std::shared_ptr<InputStreamBase> base_stream,
     EventPtr thread_stop_event, hailo_status &status) :
-        InputStreamBase(base_stream->get_layer_info(), base_stream->get_interface(),
-                        base_stream->get_core_op_activated_event(), status),
+        InputStreamBase(base_stream->get_layer_info(), base_stream->get_core_op_activated_event(), status),
         m_base_stream(base_stream),
         m_timeout(m_base_stream->get_timeout()),
         m_wait_for_activation(m_base_stream->get_core_op_activated_event(), thread_stop_event)
@@ -301,8 +299,8 @@ RemoteProcessInputStream::RemoteProcessInputStream(std::shared_ptr<InputStreamBa
         return;
     }
 
-    // Not all streams supports get_buffer_frames_size, fallback to default.
-    auto queue_size_exp = m_base_stream->get_buffer_frames_size();
+    // Not all streams supports get_async_max_queue_size, fallback to default.
+    auto queue_size_exp = m_base_stream->get_async_max_queue_size();
     const auto queue_size = queue_size_exp ? *queue_size_exp : DEFAULT_QUEUE_SIZE;
 
     auto buffer_pool = RemoteProcessBufferPool::create(HAILO_H2D_STREAM, base_stream->get_frame_size(), queue_size);
@@ -412,8 +410,8 @@ RemoteProcessOutputStream::~RemoteProcessOutputStream()
             // continue
         }
 
-        // Calling abort() to make sure the thread will exit
-        status = abort();
+        // Calling abort_impl() to make sure the thread will exit
+        status = abort_impl();
         if (HAILO_SUCCESS != status) {
             LOGGER__ERROR("Failed to abort stream with {}", status);
             // continue
@@ -440,24 +438,18 @@ hailo_status RemoteProcessOutputStream::set_timeout(std::chrono::milliseconds ti
     return HAILO_SUCCESS;
 }
 
-hailo_status RemoteProcessOutputStream::abort()
+hailo_status RemoteProcessOutputStream::abort_impl()
 {
     m_buffer_pool->abort();
     return HAILO_SUCCESS;
 }
 
-hailo_status RemoteProcessOutputStream::clear_abort()
+hailo_status RemoteProcessOutputStream::clear_abort_impl()
 {
     m_buffer_pool->clear_abort();
     return HAILO_SUCCESS;
 }
 
-Expected<size_t> RemoteProcessOutputStream::get_buffer_frames_size() const
-{
-    // Must be called on main process
-    return make_unexpected(HAILO_INTERNAL_FAILURE);
-}
-
 bool RemoteProcessOutputStream::is_scheduled()
 {
     return m_base_stream->is_scheduled();
@@ -473,11 +465,9 @@ hailo_status RemoteProcessOutputStream::deactivate_stream()
     return m_base_stream->deactivate_stream();
 }
 
-hailo_status RemoteProcessOutputStream::register_interrupt_callback(const ProcessingCompleteCallback &)
+hailo_status RemoteProcessOutputStream::cancel_pending_transfers()
 {
-    // register_interrupt_callback is an internal function (used by the scheduler)
-    // and it shouldn't be called from here.
-    return HAILO_NOT_SUPPORTED;
+    return m_base_stream->cancel_pending_transfers();
 }
 
 hailo_status RemoteProcessOutputStream::read_impl(MemoryView buffer)
@@ -500,8 +490,7 @@ hailo_status RemoteProcessOutputStream::read_impl(MemoryView buffer)
 
 RemoteProcessOutputStream::RemoteProcessOutputStream(std::shared_ptr<OutputStreamBase> base_stream,
     EventPtr thread_stop_event, hailo_status &status) :
-        OutputStreamBase(base_stream->get_layer_info(), base_stream->get_interface(),
-                         base_stream->get_core_op_activated_event(), status),
+        OutputStreamBase(base_stream->get_layer_info(), base_stream->get_core_op_activated_event(), status),
         m_base_stream(base_stream),
         m_timeout(m_base_stream->get_timeout()),
         m_wait_for_activation(m_base_stream->get_core_op_activated_event(), thread_stop_event)
@@ -519,8 +508,8 @@ RemoteProcessOutputStream::RemoteProcessOutputStream(std::shared_ptr<OutputStrea
         return;
     }
 
-    // Not all streams supports get_buffer_frames_size, fallback to default.
-    auto queue_size_exp = m_base_stream->get_buffer_frames_size();
+    // Not all streams supports get_async_max_queue_size, fallback to default.
+    auto queue_size_exp = m_base_stream->get_async_max_queue_size();
     auto queue_size = queue_size_exp ? *queue_size_exp : DEFAULT_QUEUE_SIZE;
 
     auto buffer_pool = RemoteProcessBufferPool::create(HAILO_D2H_STREAM, base_stream->get_frame_size(), queue_size);
diff --git a/hailort/libhailort/src/stream_common/remote_process_stream.hpp b/hailort/libhailort/src/stream_common/remote_process_stream.hpp
index 14e87e7..2f6ccc7 100644
--- a/hailort/libhailort/src/stream_common/remote_process_stream.hpp
+++ b/hailort/libhailort/src/stream_common/remote_process_stream.hpp
@@ -119,14 +119,14 @@ public:
     virtual hailo_stream_interface_t get_interface() const override;
     virtual std::chrono::milliseconds get_timeout() const override;
     virtual hailo_status set_timeout(std::chrono::milliseconds timeout) override;
-    virtual hailo_status abort() override;
-    virtual hailo_status clear_abort() override;
-    virtual Expected<size_t> get_buffer_frames_size() const override;
+    virtual hailo_status abort_impl() override;
+    virtual hailo_status clear_abort_impl() override;
     virtual bool is_scheduled() override;
     virtual hailo_status flush() override;
 
     virtual hailo_status activate_stream() override;
     virtual hailo_status deactivate_stream() override;
+    virtual hailo_status cancel_pending_transfers() override;
 
 
     RemoteProcessInputStream(std::shared_ptr<InputStreamBase> base_stream, EventPtr thread_stop_event,
@@ -168,15 +168,13 @@ public:
     virtual hailo_stream_interface_t get_interface() const override;
     virtual std::chrono::milliseconds get_timeout() const override;
     virtual hailo_status set_timeout(std::chrono::milliseconds timeout) override;
-    virtual hailo_status abort() override;
-    virtual hailo_status clear_abort() override;
-    virtual Expected<size_t> get_buffer_frames_size() const override;
+    virtual hailo_status abort_impl() override;
+    virtual hailo_status clear_abort_impl() override;
     virtual bool is_scheduled() override;
 
     virtual hailo_status activate_stream() override;
     virtual hailo_status deactivate_stream() override;
-
-    virtual hailo_status register_interrupt_callback(const ProcessingCompleteCallback &);
+    virtual hailo_status cancel_pending_transfers() override;
 
     RemoteProcessOutputStream(std::shared_ptr<OutputStreamBase> base_stream, EventPtr thread_stop_event,
         hailo_status &status);
diff --git a/hailort/libhailort/src/stream_common/stream_internal.cpp b/hailort/libhailort/src/stream_common/stream_internal.cpp
index 2c34128..eb0de7b 100644
--- a/hailort/libhailort/src/stream_common/stream_internal.cpp
+++ b/hailort/libhailort/src/stream_common/stream_internal.cpp
@@ -21,17 +21,6 @@
 namespace hailort
 {
 
-static Expected<BufferPtr> create_dma_able_buffer_from_user_size(void *addr, size_t size)
-{
-    auto storage = DmaStorage::create_from_user_address(addr, size);
-    CHECK_EXPECTED(storage);
-
-    auto buffer = make_shared_nothrow<Buffer>(storage.release());
-    CHECK_NOT_NULL_AS_EXPECTED(buffer, HAILO_OUT_OF_HOST_MEMORY);
-
-    return buffer;
-}
-
 hailo_status InputStreamBase::write(const MemoryView &buffer)
 {
     CHECK(buffer.size() == get_frame_size(), HAILO_INVALID_ARGUMENT,
@@ -58,15 +47,25 @@ hailo_status InputStreamBase::write_async(BufferPtr buffer, const TransferDoneCa
     auto wrapped_callback = [buffer, user_callback](hailo_status status) {
         user_callback(CompletionInfo{status, buffer->data(), buffer->size()});
     };
-    return write_async(TransferRequest{ buffer, wrapped_callback});
+    return write_async(TransferRequest(std::move(buffer), wrapped_callback));
 }
 
 hailo_status InputStreamBase::write_async(const MemoryView &buffer, const TransferDoneCallback &user_callback)
 {
-    auto dma_able_buffer = create_dma_able_buffer_from_user_size(const_cast<uint8_t*>(buffer.data()), buffer.size());
-    CHECK_EXPECTED_AS_STATUS(dma_able_buffer);
+    CHECK(0 == (reinterpret_cast<size_t>(buffer.data()) % HailoRTCommon::HW_DATA_ALIGNMENT), HAILO_INVALID_ARGUMENT,
+        "User address must be aligned to {}", HailoRTCommon::HW_DATA_ALIGNMENT);
 
-    return write_async(dma_able_buffer.release(), user_callback);
+    const auto dma_able_alignment = OsUtils::get_dma_able_alignment();
+    // User address is not aligned to page size
+    if ((0 != (reinterpret_cast<size_t>(buffer.data()) % dma_able_alignment))) {
+        auto user_buffer = UserBufferStorage::create_storage_from_user_buffer(const_cast<uint8_t*>(buffer.data()), buffer.size());
+        CHECK_EXPECTED_AS_STATUS(user_buffer);
+        return write_async(user_buffer.release(), user_callback);
+    } else {
+        auto dma_able_buffer = DmaStorage::create_dma_able_buffer_from_user_size(const_cast<uint8_t*>(buffer.data()), buffer.size());
+        CHECK_EXPECTED_AS_STATUS(dma_able_buffer);
+        return write_async(dma_able_buffer.release(), user_callback);
+    }
 }
 
 hailo_status InputStreamBase::write_async(const void *buffer, size_t size, const TransferDoneCallback &user_callback)
@@ -80,6 +79,18 @@ hailo_status InputStreamBase::write_async(TransferRequest &&)
     return HAILO_NOT_IMPLEMENTED;
 }
 
+hailo_status InputStreamBase::abort()
+{
+    LOGGER__ERROR("InputStream::abort is deprecated. One should use ConfiguredNetworkGroup::shutdown()");
+    return abort_impl();
+}
+
+hailo_status InputStreamBase::clear_abort()
+{
+    LOGGER__ERROR("InputStream::clear_abort() is deprecated. To reuse network after shutdown, reconfigure it");
+    return clear_abort_impl();
+}
+
 EventPtr &InputStreamBase::get_core_op_activated_event()
 {
     return m_core_op_activated_event;
@@ -91,20 +102,17 @@ bool InputStreamBase::is_scheduled()
 }
 
 // TODO - HRT-11739 - remove vdevice related members/functions (get/set_vdevice_core_op_handle)
-vdevice_core_op_handle_t InputStreamBase::get_vdevice_core_op_handle()
-{
-    LOGGER__WARNING("VDevice InputStream::get_vedvice_core_op_handle is not implemented for this class.");
-    return INVALID_CORE_OP_HANDLE;
-}
+void InputStreamBase::set_vdevice_core_op_handle(vdevice_core_op_handle_t /*core_op_handle*/) {}
 
-void InputStreamBase::set_vdevice_core_op_handle(vdevice_core_op_handle_t /*core_op_handle*/)
+hailo_status InputStreamBase::cancel_pending_transfers()
 {
-    LOGGER__WARNING("VDevice InputStream::set_vedvice_core_op_handle is not implemented for this class.");
+    LOGGER__ERROR("cancel_pending_transfers not implemented for this type of stream");
+    return HAILO_NOT_IMPLEMENTED;
 }
 
 OutputStreamBase::OutputStreamBase(const LayerInfo &layer_info, const hailo_stream_info_t &stream_info,
-        const CONTROL_PROTOCOL__nn_stream_config_t &nn_stream_config, const EventPtr &core_op_activated_event) :
-    m_nn_stream_config(nn_stream_config), m_layer_info(layer_info), m_core_op_activated_event(core_op_activated_event)
+        const EventPtr &core_op_activated_event) :
+    m_layer_info(layer_info), m_core_op_activated_event(core_op_activated_event)
 {
     m_stream_info = stream_info;
     m_quant_infos = m_layer_info.quant_infos;
@@ -133,7 +141,7 @@ hailo_status OutputStreamBase::read_async(BufferPtr buffer, const TransferDoneCa
     auto wrapped_callback = [buffer, user_callback](hailo_status status) {
         user_callback(CompletionInfo{status, const_cast<uint8_t*>(buffer->data()), buffer->size()});
     };
-    return read_async(TransferRequest{buffer, wrapped_callback});
+    return read_async(TransferRequest(std::move(buffer), wrapped_callback));
 }
 
 hailo_status OutputStreamBase::read_async(MemoryView buffer, const TransferDoneCallback &user_callback)
@@ -142,14 +150,19 @@ hailo_status OutputStreamBase::read_async(MemoryView buffer, const TransferDoneC
     CHECK(buffer.size() == get_frame_size(), HAILO_INVALID_ARGUMENT, "Read size {} must be frame size {}", buffer.size(),
         get_frame_size());
 
-    auto wrapped_callback = [buffer, user_callback](hailo_status status) {
-        user_callback(CompletionInfo{status, const_cast<uint8_t*>(buffer.data()), buffer.size()});
-    };
-
-    auto dma_able_buffer = create_dma_able_buffer_from_user_size(buffer.data(), buffer.size());
-    CHECK_EXPECTED_AS_STATUS(dma_able_buffer);
+    const auto dma_able_alignment = HailoRTCommon::DMA_ABLE_ALIGNMENT_READ_HW_LIMITATION;
+    BufferPtr wrapped_buffer = nullptr;
+    if ((0 != (reinterpret_cast<size_t>(buffer.data()) % dma_able_alignment))) {
+        auto user_buffer = UserBufferStorage::create_storage_from_user_buffer(const_cast<uint8_t*>(buffer.data()), buffer.size());
+        CHECK_EXPECTED_AS_STATUS(user_buffer);
+        wrapped_buffer = user_buffer.release();
+    } else {
+        auto dma_able_buffer = DmaStorage::create_dma_able_buffer_from_user_size(const_cast<uint8_t*>(buffer.data()), buffer.size());
+        CHECK_EXPECTED_AS_STATUS(dma_able_buffer);
+        wrapped_buffer = dma_able_buffer.release();
+    }
 
-    return read_async(dma_able_buffer.release(), user_callback);
+    return read_async(wrapped_buffer, user_callback);
 }
 
 hailo_status OutputStreamBase::read_async(void *buffer, size_t size, const TransferDoneCallback &user_callback)
@@ -157,12 +170,30 @@ hailo_status OutputStreamBase::read_async(void *buffer, size_t size, const Trans
     return read_async(MemoryView(buffer, size), user_callback);
 }
 
+hailo_status OutputStreamBase::read_unaligned_address_async(const MemoryView &, const TransferDoneCallback &)
+{
+    LOGGER__ERROR("read_unaligned_address_async not implemented OutputStreamBase");
+    return HAILO_NOT_IMPLEMENTED;
+}
+
 hailo_status OutputStreamBase::read_async(TransferRequest &&)
 {
     LOGGER__ERROR("read_async not implemented for sync API");
     return HAILO_NOT_IMPLEMENTED;
 }
 
+hailo_status OutputStreamBase::abort()
+{
+    LOGGER__ERROR("OutputStream::abort is deprecated. One should use ConfiguredNetworkGroup::shutdown()");
+    return abort_impl();
+}
+
+hailo_status OutputStreamBase::clear_abort()
+{
+    LOGGER__ERROR("OutputStream::clear_abort() is deprecated. To reuse network after shutdown, reconfigure it");
+    return clear_abort_impl();
+}
+
 EventPtr &OutputStreamBase::get_core_op_activated_event()
 {
     return m_core_op_activated_event;
@@ -173,4 +204,13 @@ bool OutputStreamBase::is_scheduled()
     return false;
 }
 
+// TODO - HRT-11739 - remove vdevice related members/functions (get/set_vdevice_core_op_handle)
+void OutputStreamBase::set_vdevice_core_op_handle(vdevice_core_op_handle_t) {}
+
+hailo_status OutputStreamBase::cancel_pending_transfers()
+{
+    LOGGER__ERROR("cancel_pending_transfers not implemented for this type of stream");
+    return HAILO_NOT_IMPLEMENTED;
+}
+
 } /* namespace hailort */
diff --git a/hailort/libhailort/src/stream_common/stream_internal.hpp b/hailort/libhailort/src/stream_common/stream_internal.hpp
index c8d3663..7e0758d 100644
--- a/hailort/libhailort/src/stream_common/stream_internal.hpp
+++ b/hailort/libhailort/src/stream_common/stream_internal.hpp
@@ -18,7 +18,6 @@
  *     |-- MipiInputStream
  *     |-- RemoteProcessInputStream (used for pyhailort to support fork)
  *     |-- VDeviceNativeInputStream
- *     |-- VDeviceInputStreamMultiplexerWrapper
  *
  * OutputStream                      (External "interface")
  * |-- OutputStreamBase              (Base class)
@@ -29,7 +28,6 @@
  *     |-- EthernetOutputStream
  *     |-- RemoteProcessOutputStream (used for pyhailort to support fork)
  *     |-- VDeviceNativeOutputStream
- *     |-- VDeviceOutputStreamMultiplexerWrapper
  **/
 
 #ifndef _STREAM_INTERNAL_HPP_
@@ -83,29 +81,11 @@ public:
     // Manually set the buffer mode, fails if the mode was already set (and different from buffer_mode)
     virtual hailo_status set_buffer_mode(StreamBufferMode buffer_mode) = 0;
 
-    virtual const CONTROL_PROTOCOL__nn_stream_config_t &get_nn_stream_config()
-    {
-        return m_nn_stream_config;
-    };
-
     const LayerInfo& get_layer_info()
     {
         return m_layer_info;
     };
 
-    // Use by the scheduler to launch the transfer on the given activated device.
-    // TODO HRT-11679: remove this.
-    virtual hailo_status launch_transfer(const device_id_t &device_id)
-    {
-        (void)device_id;
-        return HAILO_INVALID_OPERATION;
-    }
-
-    virtual Expected<size_t> get_buffer_frames_size() const
-    {
-        return make_unexpected(HAILO_INVALID_OPERATION);
-    }
-
     const std::vector<hailo_quant_info_t> &get_quant_infos() const
     {
         return m_quant_infos;
@@ -122,32 +102,24 @@ public:
 
     virtual hailo_status write_async(TransferRequest &&transfer_request);
 
+    virtual hailo_status abort() override final;
+    virtual hailo_status abort_impl() = 0;
+
+    virtual hailo_status clear_abort() override final;
+    virtual hailo_status clear_abort_impl() = 0;
+
     virtual EventPtr &get_core_op_activated_event() override;
     virtual bool is_scheduled() override;
 
     virtual hailo_status activate_stream() = 0;
     virtual hailo_status deactivate_stream() = 0;
 
-    using ProcessingCompleteCallback = std::function<void()>;
-    virtual hailo_status register_interrupt_callback(const ProcessingCompleteCallback &)
-    {
-        return HAILO_INVALID_OPERATION;
-    }
-
-    virtual void notify_all()
-    {
-        // Do nothing, override on subclass if notify is needed.
-    }
-
-    virtual vdevice_core_op_handle_t get_vdevice_core_op_handle();
-
     virtual void set_vdevice_core_op_handle(vdevice_core_op_handle_t core_op_handle);
 
-    CONTROL_PROTOCOL__nn_stream_config_t m_nn_stream_config;
+    virtual hailo_status cancel_pending_transfers();
 
 protected:
-    explicit InputStreamBase(const LayerInfo &layer_info, hailo_stream_interface_t stream_interface,
-        EventPtr core_op_activated_event, hailo_status &status) :
+    explicit InputStreamBase(const LayerInfo &layer_info, EventPtr core_op_activated_event, hailo_status &status) :
         m_layer_info(layer_info),
         m_core_op_activated_event(std::move(core_op_activated_event))
     {
@@ -155,23 +127,6 @@ protected:
         assert(1 == stream_infos.size());
         m_stream_info = stream_infos[0];
         m_quant_infos = layer_info.quant_infos;
-
-        auto max_periph_bytes_from_hef = HefConfigurator::max_periph_bytes_value(stream_interface);
-        if (HAILO_SUCCESS != max_periph_bytes_from_hef.status()) {
-            status = max_periph_bytes_from_hef.status();
-            return;
-        }
-        const auto max_periph_bytes = MIN(max_periph_bytes_from_hef.value(), layer_info.max_shmifo_size);
-        const bool hw_padding_supported = HefConfigurator::is_hw_padding_supported(layer_info, max_periph_bytes);
-
-        auto nn_stream_config = HefConfigurator::parse_nn_stream_config(layer_info,
-            hw_padding_supported && (HAILO_STREAM_INTERFACE_MIPI != stream_interface)); // On MIPI networks, we don't want to use hw padding nn stream config.
-        if(!nn_stream_config) {
-            LOGGER__ERROR("Failed parse nn stream config");
-            status = nn_stream_config.status();
-            return;
-        }
-        m_nn_stream_config = nn_stream_config.release();
         status = HAILO_SUCCESS;
     }
 
@@ -190,34 +145,16 @@ public:
     // Manually set the buffer mode, fails if the mode was already set (and different from buffer_mode)
     virtual hailo_status set_buffer_mode(StreamBufferMode buffer_mode) = 0;
 
-    virtual const CONTROL_PROTOCOL__nn_stream_config_t &get_nn_stream_config()
-    {
-        return m_nn_stream_config;
-    };
-
     const LayerInfo& get_layer_info()
     {
         return m_layer_info;
     };
 
-    virtual Expected<size_t> get_buffer_frames_size() const
-    {
-        return make_unexpected(HAILO_INVALID_OPERATION);
-    }
-
     const std::vector<hailo_quant_info_t> &get_quant_infos() const override
     {
         return m_quant_infos;
     }
 
-    // Use by the scheduler to launch the transfer on the given activated device.
-    // TODO HRT-11679: remove this.
-    virtual hailo_status launch_transfer(const device_id_t &device_id)
-    {
-        (void)device_id;
-        return HAILO_INVALID_OPERATION;
-    }
-
     virtual hailo_status read(MemoryView buffer) override;
     virtual hailo_status read(void *buffer, size_t size) override;
 
@@ -228,6 +165,13 @@ public:
     virtual hailo_status read_async(void *buffer, size_t size, const TransferDoneCallback &user_callback) override final;
 
     virtual hailo_status read_async(TransferRequest &&transfer_request);
+    virtual hailo_status read_unaligned_address_async(const MemoryView &buffer, const TransferDoneCallback &user_callback);
+
+    virtual hailo_status abort() override final;
+    virtual hailo_status abort_impl() = 0;
+
+    virtual hailo_status clear_abort() override final;
+    virtual hailo_status clear_abort_impl() = 0;
 
     virtual EventPtr &get_core_op_activated_event() override;
     virtual bool is_scheduled() override;
@@ -235,44 +179,27 @@ public:
     virtual hailo_status activate_stream() = 0;
     virtual hailo_status deactivate_stream() = 0;
 
-    using ProcessingCompleteCallback = std::function<void()>;
-    virtual hailo_status register_interrupt_callback(const ProcessingCompleteCallback &)
-    {
-        return HAILO_INVALID_OPERATION;
-    }
+    virtual void set_vdevice_core_op_handle(vdevice_core_op_handle_t core_op_handle);
 
-    CONTROL_PROTOCOL__nn_stream_config_t m_nn_stream_config;
+    virtual inline const char *get_device_id() { return ""; };
+    // TODO - HRT-11739 - remove vdevice related members/functions (get/set_vdevice_core_op_handle)
+    virtual inline vdevice_core_op_handle_t get_vdevice_core_op_handle() { return INVALID_CORE_OP_HANDLE; };
+
+    virtual hailo_status cancel_pending_transfers();
 
 protected:
-    explicit OutputStreamBase(const LayerInfo &layer_info, hailo_stream_interface_t stream_interface,
-        EventPtr core_op_activated_event, hailo_status &status) :
+    explicit OutputStreamBase(const LayerInfo &layer_info, EventPtr core_op_activated_event, hailo_status &status) :
         m_layer_info(layer_info), m_core_op_activated_event(std::move(core_op_activated_event))
     {
         const auto &stream_infos = LayerInfoUtils::get_stream_infos_from_layer_info(layer_info);
         assert(1 == stream_infos.size());
         m_stream_info = stream_infos[0];
         m_quant_infos = m_layer_info.quant_infos;
-
-        auto max_periph_bytes_from_hef = HefConfigurator::max_periph_bytes_value(stream_interface);
-        if (HAILO_SUCCESS != max_periph_bytes_from_hef.status()) {
-            status = max_periph_bytes_from_hef.status();
-            return;
-        }
-        const auto max_periph_bytes = MIN(max_periph_bytes_from_hef.value(), layer_info.max_shmifo_size);
-        const bool hw_padding_supported = HefConfigurator::is_hw_padding_supported(layer_info, max_periph_bytes);
-
-        auto nn_stream_config = HefConfigurator::parse_nn_stream_config(m_layer_info, hw_padding_supported);
-        if(!nn_stream_config) {
-            LOGGER__ERROR("Failed parse nn stream config");
-            status = nn_stream_config.status();
-            return;
-        }
-        m_nn_stream_config = nn_stream_config.release();
         status = HAILO_SUCCESS;
     }
 
     OutputStreamBase(const LayerInfo &layer_info, const hailo_stream_info_t &stream_info,
-        const CONTROL_PROTOCOL__nn_stream_config_t &nn_stream_config, const EventPtr &core_op_activated_event);
+        const EventPtr &core_op_activated_event);
 
     LayerInfo m_layer_info;
 
diff --git a/hailort/libhailort/src/stream_common/transfer_common.cpp b/hailort/libhailort/src/stream_common/transfer_common.cpp
index 06abb10..f181ec2 100644
--- a/hailort/libhailort/src/stream_common/transfer_common.cpp
+++ b/hailort/libhailort/src/stream_common/transfer_common.cpp
@@ -8,6 +8,7 @@
 
 #include "transfer_common.hpp"
 #include "vdma/memory/mapped_buffer.hpp"
+#include "vdma/vdma_device.hpp"
 
 namespace hailort
 {
@@ -32,16 +33,16 @@ TransferBuffer::TransferBuffer(BufferPtr base_buffer)
     : TransferBuffer(base_buffer, base_buffer->size(), 0)
 {}
 
-Expected<vdma::MappedBufferPtr> TransferBuffer::map_buffer(HailoRTDriver &driver, HailoRTDriver::DmaDirection direction)
+Expected<vdma::MappedBufferPtr> TransferBuffer::map_buffer(VdmaDevice &device, HailoRTDriver::DmaDirection direction)
 {
     CHECK_AS_EXPECTED(m_base_buffer->storage().type() == BufferStorage::Type::DMA, HAILO_INVALID_ARGUMENT,
-        "Buffer must be dma-able (provided buffer type {})", static_cast<int>(m_base_buffer->storage().type()));
+       "Buffer must be dma-able (provided buffer type {})", static_cast<int>(m_base_buffer->storage().type()));
 
     // Map if not already mapped
-    auto is_new_mapping_exp = m_base_buffer->storage().dma_map(driver, to_hailo_dma_direction(direction));
+    auto is_new_mapping_exp = m_base_buffer->storage().dma_map(device, to_hailo_dma_direction(direction));
     CHECK_EXPECTED(is_new_mapping_exp);
 
-    return m_base_buffer->storage().get_dma_mapped_buffer(driver.device_id());
+    return m_base_buffer->storage().get_dma_mapped_buffer(device.get_dev_id());
 }
 
 hailo_status TransferBuffer::copy_to(MemoryView buffer)
@@ -71,9 +72,9 @@ hailo_status TransferBuffer::copy_from(const MemoryView buffer)
     return HAILO_SUCCESS;
 }
 
-hailo_status TransferBuffer::synchronize(HailoRTDriver &driver, HailoRTDriver::DmaSyncDirection sync_direction)
+hailo_status TransferBuffer::synchronize(VdmaDevice &device, HailoRTDriver::DmaSyncDirection sync_direction)
 {
-    auto mapped_buffer = m_base_buffer->storage().get_dma_mapped_buffer(driver.device_id());
+    auto mapped_buffer = m_base_buffer->storage().get_dma_mapped_buffer(device.get_dev_id());
     CHECK_EXPECTED_AS_STATUS(mapped_buffer);
 
     auto continuous_parts = get_continuous_parts();
diff --git a/hailort/libhailort/src/stream_common/transfer_common.hpp b/hailort/libhailort/src/stream_common/transfer_common.hpp
index 77e7366..795b458 100644
--- a/hailort/libhailort/src/stream_common/transfer_common.hpp
+++ b/hailort/libhailort/src/stream_common/transfer_common.hpp
@@ -18,6 +18,8 @@
 namespace hailort
 {
 
+class VdmaDevice;
+
 // Contains buffer that can be transferred. The buffer can be circular -
 // It relies at [m_offset, m_base_buffer.size()) and [0, m_base_buffer.size() - m_size).
 class TransferBuffer final {
@@ -31,13 +33,13 @@ public:
     size_t offset() const { return m_offset; }
     size_t size() const { return m_size; }
 
-    Expected<vdma::MappedBufferPtr> map_buffer(HailoRTDriver &driver, HailoRTDriver::DmaDirection direction);
+    Expected<vdma::MappedBufferPtr> map_buffer(VdmaDevice &device, HailoRTDriver::DmaDirection direction);
 
     hailo_status copy_to(MemoryView buffer);
     hailo_status copy_from(const MemoryView buffer);
 
     // Sync the buffer to the given direction, fails if the buffer is not mapped.
-    hailo_status synchronize(HailoRTDriver &driver, HailoRTDriver::DmaSyncDirection sync_direction);
+    hailo_status synchronize(VdmaDevice &device, HailoRTDriver::DmaSyncDirection sync_direction);
 
 private:
 
@@ -61,11 +63,41 @@ private:
 };
 
 // Internal function, wrapper to the user callbacks, accepts the callback status as an argument.
-using InternalTransferDoneCallback = std::function<void(hailo_status)>;
+using TransferDoneCallback = std::function<void(hailo_status)>;
 
 struct TransferRequest {
-    TransferBuffer buffer;
-    InternalTransferDoneCallback callback;
+    std::vector<TransferBuffer> transfer_buffers;
+    TransferDoneCallback callback;
+    TransferRequest() = default;
+    TransferRequest(TransferBuffer &&transfer_buffers_arg, const TransferDoneCallback &callback_arg):
+        transfer_buffers(), callback(callback_arg)
+    {
+        transfer_buffers.emplace_back(std::move(transfer_buffers_arg));
+    }
+    TransferRequest(const TransferBuffer& transfer_buffers_arg, const TransferDoneCallback &callback_arg):
+        transfer_buffers(), callback(callback_arg)
+    {
+        transfer_buffers.emplace_back(std::move(transfer_buffers_arg));
+    }
+    TransferRequest(std::vector<TransferBuffer> &&transfer_buffers_arg, const TransferDoneCallback &callback_arg) :
+        transfer_buffers(std::move(transfer_buffers_arg)), callback(callback_arg)
+    {}
+
+    size_t get_total_transfer_size() const {
+        size_t total_transfer_size = 0;
+        for (size_t i = 0; i < transfer_buffers.size(); i++) {
+            total_transfer_size += transfer_buffers[i].size();
+        }
+        return total_transfer_size;
+    }
+};
+
+struct InferRequest {
+    // Transfer for each stream
+    std::unordered_map<std::string, TransferRequest> transfers;
+
+    // Callback to be called when all transfer finishes
+    TransferDoneCallback callback;
 };
 
 } /* namespace hailort */
diff --git a/hailort/libhailort/src/transform/transform.cpp b/hailort/libhailort/src/transform/transform.cpp
index ed12efb..b977c46 100644
--- a/hailort/libhailort/src/transform/transform.cpp
+++ b/hailort/libhailort/src/transform/transform.cpp
@@ -29,16 +29,9 @@ namespace hailort
 {
 
 #define RGB_FEATURES (3)
+#define F8CR_MIN_FEATURES_FOR_TRANSFORMATION (8)
 
 
-bool TransformContextUtils::should_quantize_by_flags(const hailo_stream_direction_t stream_direction,
-    const hailo_format_flags_t &src_format_flags, const hailo_format_flags_t &dst_format_flags)
-{
-    return (HAILO_H2D_STREAM == stream_direction) ?
-        (!(HAILO_FORMAT_FLAGS_QUANTIZED & src_format_flags) && (HAILO_FORMAT_FLAGS_QUANTIZED & dst_format_flags)) :
-        ((HAILO_FORMAT_FLAGS_QUANTIZED & src_format_flags) && !(HAILO_FORMAT_FLAGS_QUANTIZED & dst_format_flags));
-}
-
 Expected<bool> TransformContextUtils::should_quantize_by_type(const hailo_stream_direction_t stream_direction,
     const hailo_format_type_t &src_format_type, const hailo_format_type_t &dst_format_type)
 {
@@ -70,26 +63,9 @@ Expected<bool> TransformContextUtils::should_quantize_by_type(const hailo_stream
 }
 
 Expected<bool> TransformContextUtils::should_quantize(const hailo_stream_direction_t stream_direction, 
-    const hailo_format_t &src_format, const hailo_format_t &dst_format, const std::vector<hailo_quant_info_t> &quant_infos)
+    const hailo_format_t &src_format, const hailo_format_t &dst_format)
 {
-    auto should_quantize_by_flags = TransformContextUtils::should_quantize_by_flags(stream_direction, src_format.flags, dst_format.flags);
-    auto should_quantize_by_type = TransformContextUtils::should_quantize_by_type(stream_direction, src_format.type, dst_format.type);
-    CHECK_EXPECTED(should_quantize_by_type);
-
-    if (should_quantize_by_type.value() != should_quantize_by_flags) {
-        auto direction_str = (HAILO_H2D_STREAM == stream_direction) ? "H2D" : "D2H";
-        auto quantization_by_type_needed_str = (should_quantize_by_type.value()) ? "" : "not ";
-        LOGGER__WARNING(
-            "{} stream is marked as quantized={}, but according to format types (src={}, dst={}), quantization is {}needed. Usage of HAILO_FORMAT_FLAGS_QUANTIZED is deprecated and will be ignored.",
-            direction_str, !should_quantize_by_flags, HailoRTCommon::get_format_type_str(src_format.type), HailoRTCommon::get_format_type_str(dst_format.type),
-            quantization_by_type_needed_str);
-    }
-
-    if (HAILO_H2D_STREAM == stream_direction) {
-        return (should_quantize_by_type.value() && !((are_all_quant_infos_identity(quant_infos)) && (src_format.type == dst_format.type)));
-    } else {
-        return should_quantize_by_type;
-    }
+    return TransformContextUtils::should_quantize_by_type(stream_direction, src_format.type, dst_format.type);
 }
 
 bool TransformContextUtils::should_transpose(const hailo_format_flags_t &src_flags, const hailo_format_flags_t &dst_flags)
@@ -114,6 +90,12 @@ bool TransformContextUtils::should_reorder(const hailo_3d_image_shape_t &src_ima
     switch (src_format.order) {
         // Orders that are supported both on host and hw sides, and where transformation is still needed when shapes are equals
         case HAILO_FORMAT_ORDER_F8CR:
+            // In F8CR - if amount of features is less (or equal) than F8CR_MIN_FEATURES_FOR_TRANSFORMATION (8) - dont transform
+            if (F8CR_MIN_FEATURES_FOR_TRANSFORMATION >= src_image_shape.features) {
+                return false;
+            } else {
+                return true;
+            }
         case HAILO_FORMAT_ORDER_HAILO_NMS:
             return true;
         default:
@@ -121,6 +103,14 @@ bool TransformContextUtils::should_reorder(const hailo_3d_image_shape_t &src_ima
     }
 }
 
+bool TransformContextUtils::should_pad_periph(const hailo_3d_image_shape_t &dst_image_shape, const hailo_format_t &dst_format)
+{
+    // Check if hw frame size is aligned to 8 for periph transfer
+    const auto shape_size = dst_image_shape.height * dst_image_shape.width * dst_image_shape.features *
+        HailoRTCommon::get_data_bytes(dst_format.type);
+    return (0 != (shape_size % HailoRTCommon::HW_DATA_ALIGNMENT));
+}
+
 Expected<bool> TransformContextUtils::is_transformation_required(const hailo_stream_direction_t stream_direction,
     const hailo_3d_image_shape_t &src_image_shape, const hailo_format_t &src_format,
     const hailo_3d_image_shape_t &dst_image_shape, const hailo_format_t &dst_format, const std::vector<hailo_quant_info_t> &quant_infos)
@@ -133,11 +123,12 @@ Expected<bool> TransformContextUtils::is_transformation_required(const hailo_str
     assert((HAILO_FORMAT_ORDER_AUTO != src_format.order) && (HAILO_FORMAT_ORDER_AUTO != dst_format.order));
     assert((HAILO_FORMAT_TYPE_AUTO != src_format.type) && (HAILO_FORMAT_TYPE_AUTO != dst_format.type));
 
-    auto should_quantize_exp = should_quantize(stream_direction, src_format, dst_format, quant_infos);
+    auto should_quantize_exp = should_quantize(stream_direction, src_format, dst_format);
     CHECK_EXPECTED(should_quantize_exp);
 
     return (*should_quantize_exp || should_transpose(src_format.flags, dst_format.flags) ||
-        should_reorder(src_image_shape, src_format, dst_image_shape, dst_format));
+        should_reorder(src_image_shape, src_format, dst_image_shape, dst_format) ||
+        should_pad_periph(dst_image_shape, dst_format));
 }
 
 std::string TransformContextUtils::make_quantization_description(hailo_format_type_t src_type,
@@ -175,16 +166,6 @@ std::string TransformContextUtils::make_transpose_description(hailo_3d_image_sha
     return transpose_description.str();
 }
 
-bool TransformContextUtils::are_all_quant_infos_identity(const std::vector<hailo_quant_info_t> &quant_infos)
-{
-    for (const auto &quant_info : quant_infos) {
-        if (!Quantization::is_identity_qp(quant_info)) {
-            return false;
-        }
-    }
-    return true;
-}
-
 template<typename T, typename Q>
 void cast_elements_inplace(T *dst_ptr, uint32_t frame_size)
 {
@@ -610,7 +591,7 @@ void transform__h2d_F8CR(const T *src_ptr, hailo_3d_image_shape_t *src_image_sha
     /* Validate arguments */
     ASSERT(NULL != src_ptr);
     ASSERT(NULL != dst_ptr);
-    ASSERT(0 == (dst_image_shape->features % HW_DATA_ALIGNMENT));
+    ASSERT(0 == (dst_image_shape->features % HailoRTCommon::HW_DATA_ALIGNMENT));
 
     uint32_t src_row_size = src_image_shape->width * src_image_shape->features;
     uint32_t dst_row_size = dst_image_shape->width * dst_image_shape->features;
@@ -623,15 +604,15 @@ void transform__h2d_F8CR(const T *src_ptr, hailo_3d_image_shape_t *src_image_sha
         for (uint32_t c = 0; c < src_image_shape->width; c++) {
             for (uint32_t f = 0; f < src_image_shape->features; f+=8) {
                 src_offset = r * src_row_size + c * src_image_shape->features + f;
-                dst_offset = r * dst_row_size + c * HW_DATA_ALIGNMENT + f * dst_image_shape->width;
-                if (f + HW_DATA_ALIGNMENT <= src_image_shape->features) {
+                dst_offset = r * dst_row_size + c * HailoRTCommon::HW_DATA_ALIGNMENT + f * dst_image_shape->width;
+                if (f + HailoRTCommon::HW_DATA_ALIGNMENT <= src_image_shape->features) {
                     /* take 8 full features for each column and write them */
-                    memcpy(dst_ptr + dst_offset, src_ptr + src_offset, HW_DATA_ALIGNMENT * sizeof(T));
+                    memcpy(dst_ptr + dst_offset, src_ptr + src_offset, HailoRTCommon::HW_DATA_ALIGNMENT * sizeof(T));
                 }
                 else {
                     /* take the last 8 or less features, pad features to 8 and write */
-                    auto last_features = (src_features % HW_DATA_ALIGNMENT);
-                    auto remainder = (HW_DATA_ALIGNMENT - last_features);
+                    auto last_features = (src_features % HailoRTCommon::HW_DATA_ALIGNMENT);
+                    auto remainder = (HailoRTCommon::HW_DATA_ALIGNMENT - last_features);
                     memcpy(dst_ptr + dst_offset, src_ptr + src_offset, last_features * sizeof(T));
                     dst_offset += last_features;
                     memset(dst_ptr + dst_offset, 0, remainder * sizeof(T));
@@ -658,15 +639,16 @@ void transform__d2h_F8CR(const T *src_ptr, hailo_3d_image_shape_t *src_image_sha
     for (uint32_t r = 0; r < dst_image_shape->height ; r++) {
         for (uint32_t c = 0; c < dst_image_shape->width; c++) {
             for (uint32_t f = 0; f < dst_image_shape->features; f+=8) {
-                src_offset = r * src_row_size + c * HW_DATA_ALIGNMENT + f * src_image_shape->width;
+                src_offset = r * src_row_size + c * static_cast<uint32_t>(HailoRTCommon::HW_DATA_ALIGNMENT) +
+                    f * src_image_shape->width;
                 dst_offset = r * dst_row_size + c * dst_image_shape->features + f;
-                if (f + HW_DATA_ALIGNMENT <= dst_image_shape->features) {
+                if (f + HailoRTCommon::HW_DATA_ALIGNMENT <= dst_image_shape->features) {
                     /* copy the first dst_image_features (which are aligned to 8)! */
-                    memcpy(dst_ptr + dst_offset, src_ptr + src_offset, HW_DATA_ALIGNMENT * sizeof(T));
+                    memcpy(dst_ptr + dst_offset, src_ptr + src_offset, HailoRTCommon::HW_DATA_ALIGNMENT * sizeof(T));
                     }
                 else {
                     /* copy the last 8 or less features, remove pad */
-                    memcpy(dst_ptr + dst_offset, src_ptr + src_offset, (dst_features % HW_DATA_ALIGNMENT) * sizeof(T));
+                    memcpy(dst_ptr + dst_offset, src_ptr + src_offset, (dst_features % HailoRTCommon::HW_DATA_ALIGNMENT) * sizeof(T));
                 }
             }
         }
@@ -741,8 +723,8 @@ hailo_status transform__h2d_NCHW_to_NHCW(
           "NCHW_to_NHCW Transform height src/dst should be the same");
     CHECK(src_image_shape->width <= dst_image_shape->width, HAILO_INVALID_ARGUMENT,
           "NCHW_to_NHCW Transform src width should be smaller/equal than dst width");
-    CHECK(((dst_image_shape->width * sizeof(T)) % HW_DATA_ALIGNMENT) == 0, HAILO_INVALID_ARGUMENT,
-          "NCHW_to_NHCW Transform dst width must be aligned to {}", HW_DATA_ALIGNMENT);
+    CHECK(((dst_image_shape->width * sizeof(T)) % HailoRTCommon::HW_DATA_ALIGNMENT) == 0, HAILO_INVALID_ARGUMENT,
+          "NCHW_to_NHCW Transform dst width must be aligned to {}", HailoRTCommon::HW_DATA_ALIGNMENT);
 
     size_t width_size = src_image_shape->width;
     size_t pad_size = (dst_image_shape->width - src_image_shape->width);
@@ -824,8 +806,8 @@ hailo_status transform__h2d_YUY2_to_YUY2(const T *src_ptr, T *dst_ptr, uint32_t
     
     auto shape_size_in_bytes = shape_size * sizeof(T);
 
-    CHECK((shape_size_in_bytes % HW_DATA_ALIGNMENT) == 0, HAILO_INVALID_ARGUMENT,
-          "YUY2_to_YUY2 Transform shape_size must be aligned to {}", HW_DATA_ALIGNMENT);
+    CHECK((shape_size_in_bytes % HailoRTCommon::HW_DATA_ALIGNMENT) == 0, HAILO_INVALID_ARGUMENT,
+          "YUY2_to_YUY2 Transform shape_size must be aligned to {}", HailoRTCommon::HW_DATA_ALIGNMENT);
 
     std::copy_n(src_ptr, shape_size, dst_ptr);
 
@@ -841,7 +823,7 @@ hailo_status transform__h2d_RGB4_to_NHWC(const T *src_ptr, const hailo_3d_image_
     ASSERT(NULL != dst_ptr);
 
     const auto row_size = src_image_shape.width * src_image_shape.features;
-    const auto src_row_size = HailoRTCommon::align_to(row_size, RGB4_ALIGNMENT);
+    const auto src_row_size = HailoRTCommon::align_to(row_size, static_cast<uint32_t>(RGB4_ALIGNMENT));
     const auto dst_row_size = dst_image_shape.width * dst_image_shape.features;
 
     const auto pad_size = (dst_image_shape.width - src_image_shape.width) * dst_image_shape.features;
@@ -870,7 +852,7 @@ hailo_status transform__h2d_RGB4_to_NHCW(const T *src_ptr, const hailo_3d_image_
     ASSERT(NULL != dst_ptr);
 
     const auto row_size = src_image_shape.width * src_image_shape.features;
-    const auto src_row_size = HailoRTCommon::align_to(row_size, RGB4_ALIGNMENT);
+    const auto src_row_size = HailoRTCommon::align_to(row_size, static_cast<uint32_t>(RGB4_ALIGNMENT));
     const auto dst_row_size = dst_image_shape.width * dst_image_shape.features;
 
     const auto pad_size = dst_image_shape.width - src_image_shape.width;
@@ -1050,7 +1032,7 @@ hailo_status reorder_input_stream(const void *src_ptr, hailo_3d_image_shape_t sr
     if (((HAILO_FORMAT_ORDER_FCR == src_format.order) || (HAILO_FORMAT_ORDER_NHWC == src_format.order)) &&
         (HAILO_FORMAT_ORDER_FCR == dst_format.order)) {
         //Check that there is alignment for 8 bytes
-        assert(0 == ((HailoRTCommon::get_data_bytes(dst_format.type) * dst_image_shape.features) % HW_DATA_ALIGNMENT));
+        assert(0 == ((HailoRTCommon::get_data_bytes(dst_format.type) * dst_image_shape.features) % HailoRTCommon::HW_DATA_ALIGNMENT));
         switch (dst_format.type) {
             case HAILO_FORMAT_TYPE_UINT8:
                 transform__h2d_FCR<uint8_t>((uint8_t*)src_ptr, &src_image_shape, (uint8_t*)dst_ptr, &dst_image_shape);
@@ -1394,7 +1376,7 @@ hailo_status InputTransformContext::transform_inner(const void *src_ptr, void *q
     hailo_3d_image_shape_t transposed_image_shape = m_src_image_shape;
     hailo_format_t quantized_src_format = m_src_format;
 
-    if (!(m_should_quantize || m_should_transpose || m_should_reorder)) {
+    if (!(m_should_quantize || m_should_transpose || m_should_reorder || m_should_pad_periph)) {
         /* If transform was created without any actual use - just copy src_ptr to dst_ptr */
         LOGGER__WARN("Transformer was created, but not needed and can be removed. copies src buffer to dst buffer");
         auto frame_size = HailoRTCommon::get_frame_size(m_dst_image_shape, m_dst_format);
@@ -1457,7 +1439,7 @@ hailo_status FrameOutputTransformContext::transform_inner(const void *src_ptr, v
     void *orig_dst_ptr = nullptr;
     void *orig_src_ptr = nullptr;
 
-    if (!(m_should_quantize || m_should_transpose || m_should_reorder)) {
+    if (!(m_should_quantize || m_should_transpose || m_should_reorder || m_should_pad_periph)) {
         /* If transform context was created without any actual use - just copy src_ptr to dst_ptr */
         LOGGER__WARN("Transform context was created, but not needed and can be removed. copies src buffer to dst buffer");
         auto frame_size = HailoRTCommon::get_frame_size(m_dst_image_shape, m_dst_format);
@@ -1566,9 +1548,9 @@ hailo_status validate_input_transform_params(hailo_3d_image_shape_t src_image_sh
     if ((HAILO_FORMAT_ORDER_FCR == src_format.order) &&
         (HAILO_FORMAT_ORDER_FCR == dst_format.order)) {
         //Check that there is alignment for 8 bytes
-        if (0 != ((HailoRTCommon::get_data_bytes(dst_format.type) * dst_image_shape.features) % HW_DATA_ALIGNMENT)) {
+        if (0 != ((HailoRTCommon::get_data_bytes(dst_format.type) * dst_image_shape.features) % HailoRTCommon::HW_DATA_ALIGNMENT)) {
             LOGGER__ERROR("HW features must be aligned to {}. passed hw features - {}",
-                HW_DATA_ALIGNMENT, dst_image_shape.features);
+                HailoRTCommon::HW_DATA_ALIGNMENT, dst_image_shape.features);
             return HAILO_INVALID_ARGUMENT;
         }
     } else if ((HAILO_FORMAT_ORDER_BAYER_RGB == src_format.order) &&
@@ -1586,8 +1568,8 @@ hailo_status validate_input_transform_params(hailo_3d_image_shape_t src_image_sh
     } else if ((HAILO_FORMAT_ORDER_YUY2 == src_format.order) &&
         (HAILO_FORMAT_ORDER_YUY2 == dst_format.order)) {
         auto shape_size_in_bytes = HailoRTCommon::get_shape_size(src_image_shape) * HailoRTCommon::get_data_bytes(src_format.type);
-        CHECK(shape_size_in_bytes % HW_DATA_ALIGNMENT == 0, HAILO_INVALID_ARGUMENT,
-          "YUY2_to_YUY2 Transform shape_size must be aligned to {}", HW_DATA_ALIGNMENT);
+        CHECK(shape_size_in_bytes % HailoRTCommon::HW_DATA_ALIGNMENT == 0, HAILO_INVALID_ARGUMENT,
+          "YUY2_to_YUY2 Transform shape_size must be aligned to {}", HailoRTCommon::HW_DATA_ALIGNMENT);
     }
 
     return HAILO_SUCCESS;
@@ -1685,11 +1667,10 @@ Expected<std::unique_ptr<InputTransformContext>> InputTransformContext::create(c
     const auto internal_src_format = HailoRTDefaults::expand_auto_format(src_format, dst_format);
 
     const auto src_frame_size = HailoRTCommon::get_frame_size(src_image_shape, internal_src_format);
-    const auto dst_frame_size = HailoRTCommon::get_frame_size(dst_image_shape, dst_format);
+    const auto dst_frame_size = HailoRTCommon::get_periph_frame_size(dst_image_shape, dst_format);
 
     Buffer quant_buffer;
-    auto should_quantize = TransformContextUtils::should_quantize(HAILO_H2D_STREAM, src_format, dst_format, 
-        dst_quant_infos);
+    auto should_quantize = TransformContextUtils::should_quantize(HAILO_H2D_STREAM, src_format, dst_format);
     CHECK_EXPECTED(should_quantize);
     if (should_quantize.value()) {
         auto expected_quant_buffer = Buffer::create(src_frame_size, 0);
@@ -1707,10 +1688,11 @@ Expected<std::unique_ptr<InputTransformContext>> InputTransformContext::create(c
     }
 
     auto should_reorder = TransformContextUtils::should_reorder(src_image_shape, src_format, dst_image_shape, dst_format);
+    auto should_pad_periph = TransformContextUtils::should_pad_periph(dst_image_shape, dst_format);
 
     std::unique_ptr<InputTransformContext> transform_context(new (std::nothrow) InputTransformContext(src_frame_size, src_image_shape,
         internal_src_format, dst_frame_size, dst_image_shape, dst_format, dst_quant_infos, std::move(quant_buffer),
-        std::move(transpose_buffer), *should_quantize, should_transpose, should_reorder));
+        std::move(transpose_buffer), *should_quantize, should_transpose, should_reorder, should_pad_periph));
     CHECK_AS_EXPECTED(nullptr != transform_context, HAILO_OUT_OF_HOST_MEMORY);
 
     return transform_context;
@@ -1753,7 +1735,8 @@ Expected<std::unique_ptr<InputTransformContext>> InputTransformContext::create(I
 InputTransformContext::InputTransformContext(size_t src_frame_size, const hailo_3d_image_shape_t &src_image_shape,
     const hailo_format_t &src_format, size_t dst_frame_size, const hailo_3d_image_shape_t &dst_image_shape,
     const hailo_format_t &dst_format, const std::vector<hailo_quant_info_t> &dst_quant_infos, Buffer &&quant_buffer,
-    Buffer &&transpose_buffer,const bool should_quantize, const bool should_transpose, const bool should_reorder) :
+    Buffer &&transpose_buffer,const bool should_quantize, const bool should_transpose, const bool should_reorder,
+    const bool should_pad_periph) :
         m_src_frame_size(src_frame_size),
         m_src_image_shape(src_image_shape),
         m_src_format(src_format),
@@ -1764,6 +1747,7 @@ InputTransformContext::InputTransformContext(size_t src_frame_size, const hailo_
         m_should_quantize(should_quantize),
         m_should_transpose(should_transpose),
         m_should_reorder(should_reorder),
+        m_should_pad_periph(should_pad_periph),
         m_quant_buffer(std::move(quant_buffer)),
         m_transpose_buffer(std::move(transpose_buffer))
 {}
@@ -1876,7 +1860,7 @@ Expected<std::unique_ptr<OutputTransformContext>> OutputTransformContext::create
 
 OutputTransformContext::OutputTransformContext(size_t src_frame_size, const hailo_format_t &src_format, size_t dst_frame_size,
     const hailo_format_t &dst_format, const std::vector<hailo_quant_info_t> &dst_quant_infos, const bool should_quantize, 
-    const bool should_transpose, const bool should_reorder) :
+    const bool should_transpose, const bool should_reorder, const bool should_pad_periph) :
         m_src_frame_size(src_frame_size),
         m_src_format(src_format),
         m_dst_frame_size(dst_frame_size),
@@ -1884,15 +1868,16 @@ OutputTransformContext::OutputTransformContext(size_t src_frame_size, const hail
         m_dst_quant_infos(dst_quant_infos),
         m_should_quantize(should_quantize),
         m_should_transpose(should_transpose),
-        m_should_reorder(should_reorder)
+        m_should_reorder(should_reorder),
+        m_should_pad_periph(should_pad_periph)
 {}
 
 FrameOutputTransformContext::FrameOutputTransformContext(size_t src_frame_size, const hailo_3d_image_shape_t &src_image_shape,
     const hailo_format_t &src_format, size_t dst_frame_size, const hailo_3d_image_shape_t &dst_image_shape,
     const hailo_format_t &dst_format, const std::vector<hailo_quant_info_t> &dst_quant_infos, Buffer&& transpose_buffer,
-    const bool should_quantize, const bool should_transpose, const bool should_reorder) :
+    const bool should_quantize, const bool should_transpose, const bool should_reorder, const bool should_pad_periph) :
         OutputTransformContext(src_frame_size, src_format, dst_frame_size, dst_format, dst_quant_infos, should_quantize, 
-            should_transpose, should_reorder), m_src_image_shape(src_image_shape), m_dst_image_shape(dst_image_shape), 
+            should_transpose, should_reorder, should_pad_periph), m_src_image_shape(src_image_shape), m_dst_image_shape(dst_image_shape), 
             m_transpose_buffer(std::move(transpose_buffer))
 {
     // TODO: Add verification that quant infos size equals to features count (HRT-11052)
@@ -1946,11 +1931,10 @@ Expected<std::unique_ptr<OutputTransformContext>> FrameOutputTransformContext::c
 {
     const auto internal_dst_format = HailoRTDefaults::expand_auto_format(dst_format, src_format);
 
-    const auto src_frame_size = HailoRTCommon::get_frame_size(src_image_shape, src_format);
+    const auto src_frame_size = HailoRTCommon::get_periph_frame_size(src_image_shape, src_format);
     const auto dst_frame_size = HailoRTCommon::get_frame_size(dst_image_shape, internal_dst_format);
 
-    auto should_quantize = TransformContextUtils::should_quantize(HAILO_D2H_STREAM, src_format, dst_format, 
-        dst_quant_infos);
+    auto should_quantize = TransformContextUtils::should_quantize(HAILO_D2H_STREAM, src_format, dst_format);
     CHECK_EXPECTED(should_quantize);
 
     Buffer transpose_buffer;
@@ -1962,10 +1946,11 @@ Expected<std::unique_ptr<OutputTransformContext>> FrameOutputTransformContext::c
     }
 
     auto should_reorder = TransformContextUtils::should_reorder(src_image_shape, src_format, dst_image_shape, dst_format);
+    auto should_pad_periph = TransformContextUtils::should_pad_periph(dst_image_shape, dst_format);
 
     std::unique_ptr<OutputTransformContext> frame_transform_context = std::make_unique<FrameOutputTransformContext>(src_frame_size,
         src_image_shape, src_format, dst_frame_size, dst_image_shape, internal_dst_format, dst_quant_infos, std::move(transpose_buffer),
-        *should_quantize, should_transpose, should_reorder);
+        *should_quantize, should_transpose, should_reorder, should_pad_periph);
 
     CHECK_AS_EXPECTED(nullptr != frame_transform_context, HAILO_OUT_OF_HOST_MEMORY);
 
@@ -1976,7 +1961,7 @@ NMSOutputTransformContext::NMSOutputTransformContext(size_t src_frame_size, cons
     size_t dst_frame_size, const hailo_format_t &dst_format, const std::vector<hailo_quant_info_t> &dst_quant_infos,
     const hailo_nms_info_t &nms_info, Buffer &&quant_buffer, const bool should_quantize, const bool should_transpose) :
         OutputTransformContext(src_frame_size, src_format, dst_frame_size, dst_format, dst_quant_infos, should_quantize ,should_transpose, 
-        true), m_nms_info(nms_info), m_chunk_offsets(nms_info.chunks_per_frame, 0), m_quant_buffer(std::move(quant_buffer))
+        true, false), m_nms_info(nms_info), m_chunk_offsets(nms_info.chunks_per_frame, 0), m_quant_buffer(std::move(quant_buffer))
 {}
 
 Expected<std::unique_ptr<OutputTransformContext>> NMSOutputTransformContext::create(const hailo_format_t &src_format,
@@ -1998,7 +1983,7 @@ Expected<std::unique_ptr<OutputTransformContext>> NMSOutputTransformContext::cre
     auto dst_frame_size = HailoRTCommon::get_nms_host_frame_size(nms_info, internal_dst_format);
 
     Buffer quant_buffer;
-    auto should_quantize = TransformContextUtils::should_quantize(HAILO_D2H_STREAM, src_format, dst_format, dst_quant_infos);
+    auto should_quantize = TransformContextUtils::should_quantize(HAILO_D2H_STREAM, src_format, dst_format);
     CHECK_EXPECTED(should_quantize);
     if (*should_quantize) {
         dst_frame_size = HailoRTCommon::get_nms_host_frame_size(nms_info, internal_dst_format);
diff --git a/hailort/libhailort/src/transform/transform_internal.hpp b/hailort/libhailort/src/transform/transform_internal.hpp
index 4738807..3f254a6 100644
--- a/hailort/libhailort/src/transform/transform_internal.hpp
+++ b/hailort/libhailort/src/transform/transform_internal.hpp
@@ -28,8 +28,6 @@
 namespace hailort
 {
 
-#define HW_DATA_ALIGNMENT (8)
-
 class HAILORTAPI TransformContextUtils final
 {
 public:
@@ -38,16 +36,16 @@ public:
         const hailo_format_t &src_format, const hailo_3d_image_shape_t &dst_image_shape, 
         const hailo_format_t &dst_format, const std::vector<hailo_quant_info_t> &quant_info);
     static Expected<bool> should_quantize(const hailo_stream_direction_t stream_direction, 
-        const hailo_format_t &src_format, const hailo_format_t &dst_format, const std::vector<hailo_quant_info_t> &quant_info);
+        const hailo_format_t &src_format, const hailo_format_t &dst_format);
     static bool should_transpose(const hailo_format_flags_t &src_flags, const hailo_format_flags_t &dst_flags);
     static bool should_reorder(const hailo_3d_image_shape_t &src_image_shape, const hailo_format_t &src_format,
         const hailo_3d_image_shape_t &dst_image_shape, const hailo_format_t &dst_format);
+    static bool should_pad_periph(const hailo_3d_image_shape_t &dst_image_shape, const hailo_format_t &dst_format);
     static std::string make_quantization_description(hailo_format_type_t src_type, hailo_format_type_t dst_type,
                                                     const std::vector<hailo_quant_info_t> &quant_info);
     static std::string make_reorder_description(hailo_format_order_t src_order, hailo_3d_image_shape_t src_shape,
                                                 hailo_format_order_t dst_order, hailo_3d_image_shape_t dst_shape);
     static std::string make_transpose_description(hailo_3d_image_shape_t original_shape, hailo_3d_image_shape_t transposed_shape);
-    static bool are_all_quant_infos_identity(const std::vector<hailo_quant_info_t> &quant_infos);
 
     template<typename T>
     static hailo_status transform__d2h_NHCW_to_NCHW(
@@ -63,8 +61,8 @@ public:
             "NCHW_to_NHCW Transform height src/dst should be the same");
         CHECK(dst_image_shape->width <= src_image_shape->width, HAILO_INVALID_ARGUMENT,
             "NCHW_to_NHCW Transform dst width should be smaller/equal than src width");
-        CHECK(((src_image_shape->width * sizeof(T)) % HW_DATA_ALIGNMENT) == 0, HAILO_INVALID_ARGUMENT,
-            "NCHW_to_NHCW Transform src width must be aligned to {}", HW_DATA_ALIGNMENT);
+        CHECK(((src_image_shape->width * sizeof(T)) % HailoRTCommon::HW_DATA_ALIGNMENT) == 0, HAILO_INVALID_ARGUMENT,
+            "NCHW_to_NHCW Transform src width must be aligned to {}", HailoRTCommon::HW_DATA_ALIGNMENT);
 
         size_t width_size = dst_image_shape->width;
         for (uint32_t r = 0; r < src_image_shape->height; r++) {
@@ -84,8 +82,6 @@ public:
         return HAILO_SUCCESS;
     }
 private:
-    static bool should_quantize_by_flags(const hailo_stream_direction_t stream_direction,
-        const hailo_format_flags_t &src_format_flags, const hailo_format_flags_t &dst_format_flags);
     static Expected<bool> should_quantize_by_type(const hailo_stream_direction_t stream_direction,
         const hailo_format_type_t &src_format_type, const hailo_format_type_t &dst_format_type);
 };
@@ -136,7 +132,7 @@ public:
     FrameOutputTransformContext(size_t src_frame_size, const hailo_3d_image_shape_t &src_image_shape,
         const hailo_format_t &src_format, size_t dst_frame_size, const hailo_3d_image_shape_t &dst_image_shape,
         const hailo_format_t &dst_format, const std::vector<hailo_quant_info_t> &dst_quant_info, Buffer&& transpose_buffer,
-        const bool should_quantize, const bool should_transpose, const bool should_reorder);
+        const bool should_quantize, const bool should_transpose, const bool should_reorder, const bool should_pad_periph);
 
     hailo_status transform_inner(const void *src_ptr, void *dst_ptr, MemoryView transpose_buffer);
 
diff --git a/hailort/libhailort/src/utils/CMakeLists.txt b/hailort/libhailort/src/utils/CMakeLists.txt
index 70cbfc1..066e16e 100644
--- a/hailort/libhailort/src/utils/CMakeLists.txt
+++ b/hailort/libhailort/src/utils/CMakeLists.txt
@@ -7,6 +7,7 @@ set(SRC_FILES
     ${CMAKE_CURRENT_SOURCE_DIR}/buffer_storage.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/buffer.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/sensor_config_utils.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/soc_utils/partial_cluster_reader.cpp
 )
 
 if(HAILO_BUILD_PROFILER)
diff --git a/hailort/libhailort/src/utils/buffer_storage.cpp b/hailort/libhailort/src/utils/buffer_storage.cpp
index 2f94cb4..fbdde3d 100644
--- a/hailort/libhailort/src/utils/buffer_storage.cpp
+++ b/hailort/libhailort/src/utils/buffer_storage.cpp
@@ -201,7 +201,7 @@ Expected<bool> HeapStorage::dma_map(Device &, hailo_dma_buffer_direction_t)
     return make_unexpected(HAILO_INVALID_OPERATION);
 }
 
-Expected<bool> HeapStorage::dma_map(HailoRTDriver &, hailo_dma_buffer_direction_t)
+Expected<bool> HeapStorage::dma_map(VdmaDevice &, hailo_dma_buffer_direction_t)
 {
     LOGGER__ERROR("Heap allocated buffers can't be mapped to DMA");
     return make_unexpected(HAILO_INVALID_OPERATION);
@@ -258,15 +258,34 @@ Expected<DmaStoragePtr> DmaStorage::create_from_user_address(void *user_address,
     return create(user_address, size, data_direction, physical_devices.release());
 }
 
+Expected<std::shared_ptr<Buffer>> DmaStorage::create_dma_able_buffer_from_user_size(void *addr, size_t size)
+{
+    auto storage = create_from_user_address(addr, size);
+    CHECK_EXPECTED(storage);
+
+    auto buffer = make_shared_nothrow<Buffer>(storage.release());
+    CHECK_NOT_NULL_AS_EXPECTED(buffer, HAILO_OUT_OF_HOST_MEMORY);
+
+    return buffer;
+}
+
 Expected<DmaStoragePtr> DmaStorage::create(void *user_address, size_t size,
     hailo_dma_buffer_direction_t data_direction,
     std::vector<std::reference_wrapper<Device>> &&physical_devices)
 {
-    // TODO: HRT-10283 support sharing low memory buffers for DART and similar systems.
-    auto dma_able_buffer = vdma::DmaAbleBuffer::create(size, user_address);
-    CHECK_EXPECTED(dma_able_buffer);
+    vdma::DmaAbleBufferPtr dma_able_buffer_ptr = nullptr;
+    if (nullptr == user_address) {
+        // TODO: HRT-10283 support sharing low memory buffers for DART and similar systems.
+        auto dma_able_buffer = vdma::DmaAbleBuffer::create_by_allocation(size);
+        CHECK_EXPECTED(dma_able_buffer);
+        dma_able_buffer_ptr = dma_able_buffer.release();
+    } else {
+        auto dma_able_buffer = vdma::DmaAbleBuffer::create_from_user_address(user_address, size);
+        CHECK_EXPECTED(dma_able_buffer);
+        dma_able_buffer_ptr = dma_able_buffer.release();
+    }
 
-    auto result = make_shared_nothrow<DmaStorage>(dma_able_buffer.release());
+    auto result = make_shared_nothrow<DmaStorage>(std::move(dma_able_buffer_ptr));
     CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY);
 
     for (auto &device : physical_devices) {
@@ -284,6 +303,19 @@ DmaStorage::DmaStorage(vdma::DmaAbleBufferPtr &&dma_able_buffer) :
     m_mappings()
 {}
 
+DmaStorage::~DmaStorage()
+{
+    // TODO: deleter callback holds a reference to a device, which is bad since this BufferStorage could outlive
+    //       the device. We need to doc that it isn't allowed. Later on, I think devices should use shared_ptrs
+    //       and then the mapping will inc the reference count (HRT-12361)
+    for (const auto &device_mapping_pair : m_mappings) {
+        const auto &mapping = device_mapping_pair.second;
+        if (nullptr != mapping.second) {
+            mapping.second();
+        }
+    }
+}
+
 size_t DmaStorage::size() const
 {
     return m_dma_able_buffer->size();
@@ -304,30 +336,42 @@ Expected<bool> DmaStorage::dma_map(Device &device, hailo_dma_buffer_direction_t
     const auto device_type = device.get_type();
     CHECK_AS_EXPECTED(((Device::Type::INTEGRATED == device_type) || (Device::Type::PCIE == device_type)),
         HAILO_INVALID_ARGUMENT, "Invalid device type (expected integrated/pcie, received {})", device_type);
-    VdmaDevice *vdma_device = reinterpret_cast<VdmaDevice*>(&device);
-
-    return dma_map(vdma_device->get_driver(), data_direction);
+    return dma_map(*reinterpret_cast<VdmaDevice*>(&device), data_direction);
 }
 
-Expected<bool> DmaStorage::dma_map(HailoRTDriver &driver, hailo_dma_buffer_direction_t data_direction)
+// TODO: change data_direction to hailo_stream_direction_t (HRT-12391)
+Expected<bool> DmaStorage::dma_map(VdmaDevice &device, hailo_dma_buffer_direction_t data_direction)
 {
     CHECK_AS_EXPECTED(data_direction <= HAILO_DMA_BUFFER_DIRECTION_BOTH, HAILO_INVALID_ARGUMENT,
         "Invalid data direction {}", data_direction);
 
-    const auto &device_id = driver.device_id();
+    const auto device_id = device.get_dev_id();
     auto find_result = m_mappings.find(device_id);
     if (find_result != m_mappings.end()) {
-        // The buffer has been mapped => don't map it again
+        // The buffer has been mapped in this object => don't map it again
         return Expected<bool>(false); // not a new mapping
     }
 
-    // The buffer hasn't been mapped => map it now
-    auto mapped_buffer = vdma::MappedBuffer::create_shared(driver, m_dma_able_buffer,
-        static_cast<HailoRTDriver::DmaDirection>(data_direction));
-    CHECK_EXPECTED(mapped_buffer);
-
-    m_mappings.emplace(device_id, mapped_buffer.value());
-    return Expected<bool>(true); // new mapping
+    const auto direction = (data_direction == HAILO_DMA_BUFFER_DIRECTION_H2D) ? HAILO_H2D_STREAM : HAILO_D2H_STREAM;
+
+    auto mapping_result = device.try_dma_map(m_dma_able_buffer, direction);
+    CHECK_EXPECTED(mapping_result);
+
+    const auto is_new_mapping = mapping_result->second;
+    if (is_new_mapping) {
+        const auto deleter = [&device, address = m_dma_able_buffer->user_address(), direction]() {
+            // Best effort
+            auto status = device.dma_unmap(address, direction);
+            if (HAILO_SUCCESS != status) {
+                LOGGER__ERROR("Failed to un-map buffer {} from device {} in direction {}",
+                address, device.get_dev_id(), direction);
+            }
+        };
+        m_mappings.emplace(device_id, std::make_pair(mapping_result->first, deleter));
+    } else {
+        m_mappings.emplace(device_id, std::make_pair(mapping_result->first, nullptr));
+    }
+    return Expected<bool>(is_new_mapping);
 }
 
 Expected<vdma::MappedBufferPtr> DmaStorage::get_dma_mapped_buffer(const std::string &device_id)
@@ -339,7 +383,63 @@ Expected<vdma::MappedBufferPtr> DmaStorage::get_dma_mapped_buffer(const std::str
         return make_unexpected(HAILO_NOT_FOUND);
     }
 
-    return Expected<vdma::MappedBufferPtr>(mapped_buffer->second);
+    return Expected<vdma::MappedBufferPtr>(mapped_buffer->second.first);
+}
+
+Expected<UserBufferStoragePtr> UserBufferStorage::create(void *user_address, const size_t size)
+{
+    auto result = make_shared_nothrow<UserBufferStorage>(user_address, size);
+    CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY);
+
+    return result;
+}
+
+UserBufferStorage::UserBufferStorage(void * user_address, const size_t size) :
+    BufferStorage(Type::USER_BUFFER),
+    m_user_address(user_address),
+    m_size(size)
+{}
+
+size_t UserBufferStorage::size() const
+{
+    return m_size;
+}
+
+void *UserBufferStorage::user_address()
+{
+    return const_cast<void *>(m_user_address);
+}
+
+Expected<void *> UserBufferStorage::release() noexcept
+{
+    return make_unexpected(HAILO_NOT_IMPLEMENTED);
+}
+
+Expected<bool> UserBufferStorage::dma_map(Device &/* device */, hailo_dma_buffer_direction_t /* data_direction */)
+{
+    return make_unexpected(HAILO_NOT_IMPLEMENTED);
+}
+
+// TODO: change data_direction to hailo_stream_direction_t (HRT-12391)
+Expected<bool> UserBufferStorage::dma_map(VdmaDevice &/* device */, hailo_dma_buffer_direction_t /* data_direction */)
+{
+    return make_unexpected(HAILO_NOT_IMPLEMENTED);
+}
+
+Expected<vdma::MappedBufferPtr> UserBufferStorage::get_dma_mapped_buffer(const std::string &/* device_id */)
+{
+    return make_unexpected(HAILO_NOT_IMPLEMENTED);
+}
+
+Expected<std::shared_ptr<Buffer>> UserBufferStorage::create_storage_from_user_buffer(void *addr, size_t size)
+{
+    auto storage = UserBufferStorage::create(addr, size);
+    CHECK_EXPECTED(storage);
+
+    auto buffer = make_shared_nothrow<Buffer>(storage.release());
+    CHECK_NOT_NULL_AS_EXPECTED(buffer, HAILO_OUT_OF_HOST_MEMORY);
+
+    return buffer;
 }
 
 } /* namespace hailort */
diff --git a/hailort/libhailort/src/utils/hailort_common.cpp b/hailort/libhailort/src/utils/hailort_common.cpp
index c036b39..d908e87 100644
--- a/hailort/libhailort/src/utils/hailort_common.cpp
+++ b/hailort/libhailort/src/utils/hailort_common.cpp
@@ -19,6 +19,8 @@ const uint32_t HailoRTCommon::MASK_PARAMS;
 const uint32_t HailoRTCommon::MAX_DEFUSED_LAYER_COUNT;
 const size_t HailoRTCommon::HW_DATA_ALIGNMENT;
 const uint32_t HailoRTCommon::MAX_NMS_BURST_SIZE;
+const size_t HailoRTCommon::DMA_ABLE_ALIGNMENT_WRITE_HW_LIMITATION;
+const size_t HailoRTCommon::DMA_ABLE_ALIGNMENT_READ_HW_LIMITATION;
 
 Expected<hailo_device_id_t> HailoRTCommon::to_device_id(const std::string &device_id)
 {
diff --git a/hailort/libhailort/src/utils/hailort_logger.cpp b/hailort/libhailort/src/utils/hailort_logger.cpp
index 5cf8515..57fb592 100644
--- a/hailort/libhailort/src/utils/hailort_logger.cpp
+++ b/hailort/libhailort/src/utils/hailort_logger.cpp
@@ -229,7 +229,7 @@ void HailoRTLogger::set_levels(spdlog::level::level_enum console_level, spdlog::
 
     bool flush_every_print = should_flush_every_print(HAILORT_LOGGER_FLUSH_EVERY_PRINT_ENV_VAR);
     if (flush_every_print){
-        m_hailort_logger->flush_on(spdlog::level::debug);
+        m_hailort_logger->flush_on(spdlog::level::trace);
         std::cerr << "HailoRT warning: Flushing log file on every print. May reduce HailoRT performance!" << std::endl;
     } else {
         m_hailort_logger->flush_on(flush_level);
diff --git a/hailort/libhailort/src/utils/profiler/handler.hpp b/hailort/libhailort/src/utils/profiler/handler.hpp
index 5728562..406a811 100644
--- a/hailort/libhailort/src/utils/profiler/handler.hpp
+++ b/hailort/libhailort/src/utils/profiler/handler.hpp
@@ -40,16 +40,6 @@ struct InitProfilerProtoTrace : Trace
     InitProfilerProtoTrace () : Trace("init_profiler_proto") {}
 };
 
-struct CoreOpIdleTrace : Trace
-{
-    CoreOpIdleTrace(const device_id_t &device_id, scheduler_core_op_handle_t core_op_handle)
-        : Trace("core_op_idle"), device_id(device_id), core_op_handle(core_op_handle)
-    {}
-
-    device_id_t device_id;
-    scheduler_core_op_handle_t core_op_handle;
-};
-
 struct AddDeviceTrace : Trace
 {
     AddDeviceTrace(const device_id_t &device_id, const std::string &device_arch)
@@ -62,33 +52,30 @@ struct AddDeviceTrace : Trace
 
 struct MonitorStartTrace : Trace
 {
-    MonitorStartTrace(uint32_t device_count)
-        : Trace("scheduler_start"), device_count(device_count)
+    MonitorStartTrace()
+        : Trace("scheduler_start")
     {}
 
-    uint32_t device_count = 0;
 };
 
 struct AddCoreOpTrace : Trace
 {
-    AddCoreOpTrace(const device_id_t &device_id, const std::string &core_op_name, uint64_t timeout, uint32_t threshold,
-        scheduler_core_op_handle_t handle, bool is_nms, int batch_size)
-        : Trace("add_core_op"), device_id(device_id), core_op_name(core_op_name), timeout(timeout), threshold(threshold),
-            core_op_handle(handle), is_nms(is_nms), batch_size(batch_size)
+    AddCoreOpTrace(const std::string &core_op_name, uint64_t timeout, uint32_t threshold,
+        scheduler_core_op_handle_t handle, int batch_size)
+        : Trace("add_core_op"), core_op_name(core_op_name), timeout(timeout), threshold(threshold),
+            core_op_handle(handle), batch_size(batch_size)
     {}
 
-    device_id_t device_id;
     std::string core_op_name;
     uint64_t timeout = 0;
     uint32_t threshold = 0;
     scheduler_core_op_handle_t core_op_handle = INVALID_CORE_OP_HANDLE;
-    bool is_nms = false;
     int batch_size = 0;
 };
 
-struct CreateCoreOpInputStreamsTrace : Trace
+struct AddStreamH2DTrace : Trace
 {
-    CreateCoreOpInputStreamsTrace(const device_id_t &device_id, const std::string &core_op_name, const std::string &stream_name, uint32_t queue_size,
+    AddStreamH2DTrace(const device_id_t &device_id, const std::string &core_op_name, const std::string &stream_name, uint32_t queue_size,
         scheduler_core_op_handle_t core_op_handle)
         : Trace("create_input_stream"), device_id(device_id), core_op_name(core_op_name), stream_name(stream_name), queue_size(queue_size),
         core_op_handle(core_op_handle)
@@ -101,9 +88,9 @@ struct CreateCoreOpInputStreamsTrace : Trace
     scheduler_core_op_handle_t core_op_handle;
 };
 
-struct CreateCoreOpOutputStreamsTrace : Trace
+struct AddStreamD2HTrace : Trace
 {
-    CreateCoreOpOutputStreamsTrace(const device_id_t &device_id, const std::string &core_op_name, const std::string &stream_name, uint32_t queue_size,
+    AddStreamD2HTrace(const device_id_t &device_id, const std::string &core_op_name, const std::string &stream_name, uint32_t queue_size,
         scheduler_core_op_handle_t core_op_handle)
         : Trace("create_output_stream"), device_id(device_id), core_op_name(core_op_name), stream_name(stream_name), queue_size(queue_size),
         core_op_handle(core_op_handle)
@@ -116,9 +103,9 @@ struct CreateCoreOpOutputStreamsTrace : Trace
     scheduler_core_op_handle_t core_op_handle;
 };
 
-struct WriteFrameTrace : Trace
+struct FrameEnqueueH2DTrace : Trace
 {
-    WriteFrameTrace(scheduler_core_op_handle_t core_op_handle, const std::string &queue_name)
+    FrameEnqueueH2DTrace(scheduler_core_op_handle_t core_op_handle, const std::string &queue_name)
         : Trace("write_frame"), core_op_handle(core_op_handle), queue_name(queue_name)
     {}
 
@@ -126,9 +113,9 @@ struct WriteFrameTrace : Trace
     std::string queue_name;
 };
 
-struct InputVdmaDequeueTrace : Trace
+struct FrameDequeueH2DTrace : Trace
 {
-    InputVdmaDequeueTrace(const device_id_t &device_id, scheduler_core_op_handle_t core_op_handle, const std::string &queue_name)
+    FrameDequeueH2DTrace(const device_id_t &device_id, scheduler_core_op_handle_t core_op_handle, const std::string &queue_name)
         : Trace("input_vdma_dequeue"), device_id(device_id), core_op_handle(core_op_handle), queue_name(queue_name)
     {}
 
@@ -137,9 +124,9 @@ struct InputVdmaDequeueTrace : Trace
     std::string queue_name;
 };
 
-struct ReadFrameTrace : Trace
+struct FrameDequeueD2HTrace : Trace
 {
-    ReadFrameTrace(scheduler_core_op_handle_t core_op_handle, const std::string &queue_name)
+    FrameDequeueD2HTrace(scheduler_core_op_handle_t core_op_handle, const std::string &queue_name)
         : Trace("read_frame"), core_op_handle(core_op_handle), queue_name(queue_name)
     {}
 
@@ -147,9 +134,9 @@ struct ReadFrameTrace : Trace
     std::string queue_name;
 };
 
-struct OutputVdmaEnqueueTrace : Trace
+struct FrameEnqueueD2HTrace : Trace
 {
-    OutputVdmaEnqueueTrace(const device_id_t &device_id, scheduler_core_op_handle_t core_op_handle, const std::string &queue_name)
+    FrameEnqueueD2HTrace(const device_id_t &device_id, scheduler_core_op_handle_t core_op_handle, const std::string &queue_name)
         : Trace("output_vdma_enqueue"), device_id(device_id), core_op_handle(core_op_handle), queue_name(queue_name)
     {}
 
@@ -213,9 +200,9 @@ struct OracleDecisionTrace : Trace
     bool over_timeout;
 };
 
-struct DumpProfilerState : Trace
+struct DumpProfilerStateTrace : Trace
 {
-    DumpProfilerState() : Trace("dump_profiler_state") {}
+    DumpProfilerStateTrace() : Trace("dump_profiler_state") {}
 };
 
 class Handler
@@ -225,21 +212,20 @@ public:
 
     virtual void handle_trace(const InitTrace&) {};
     virtual void handle_trace(const AddCoreOpTrace&) {};
-    virtual void handle_trace(const CreateCoreOpInputStreamsTrace&) {};
-    virtual void handle_trace(const CreateCoreOpOutputStreamsTrace&) {};
-    virtual void handle_trace(const WriteFrameTrace&) {};
-    virtual void handle_trace(const InputVdmaDequeueTrace&) {};
-    virtual void handle_trace(const ReadFrameTrace&) {};
-    virtual void handle_trace(const OutputVdmaEnqueueTrace&) {};
+    virtual void handle_trace(const AddStreamH2DTrace&) {};
+    virtual void handle_trace(const AddStreamD2HTrace&) {};
+    virtual void handle_trace(const FrameEnqueueH2DTrace&) {};
+    virtual void handle_trace(const FrameDequeueH2DTrace&) {};
+    virtual void handle_trace(const FrameDequeueD2HTrace&) {};
+    virtual void handle_trace(const FrameEnqueueD2HTrace&) {};
     virtual void handle_trace(const SwitchCoreOpTrace&) {};
     virtual void handle_trace(const MonitorStartTrace&) {};
-    virtual void handle_trace(const CoreOpIdleTrace&) {};
     virtual void handle_trace(const AddDeviceTrace&) {};
     virtual void handle_trace(const SetCoreOpTimeoutTrace&) {};
     virtual void handle_trace(const SetCoreOpThresholdTrace&) {};
     virtual void handle_trace(const SetCoreOpPriorityTrace&) {};
     virtual void handle_trace(const OracleDecisionTrace&) {};
-    virtual void handle_trace(const DumpProfilerState&) {};
+    virtual void handle_trace(const DumpProfilerStateTrace&) {};
     virtual void handle_trace(const InitProfilerProtoTrace&) {};
 
 };
diff --git a/hailort/libhailort/src/utils/profiler/monitor_handler.cpp b/hailort/libhailort/src/utils/profiler/monitor_handler.cpp
index 698d2a2..25d6c72 100644
--- a/hailort/libhailort/src/utils/profiler/monitor_handler.cpp
+++ b/hailort/libhailort/src/utils/profiler/monitor_handler.cpp
@@ -37,7 +37,7 @@ void MonitorHandler::clear_monitor() {
 
 void MonitorHandler::handle_trace(const MonitorStartTrace &trace)
 {
-    m_device_count = trace.device_count;
+    (void)trace;
     start_mon();
 }
 
@@ -45,7 +45,6 @@ void MonitorHandler::handle_trace(const AddCoreOpTrace &trace)
 {
     m_core_ops_info[trace.core_op_handle].utilization = 0;
     m_core_ops_info[trace.core_op_handle].core_op_name = trace.core_op_name;
-    m_core_ops_info[trace.core_op_handle].is_nms = trace.is_nms;
 }
 
 void MonitorHandler::handle_trace(const AddDeviceTrace &trace)
@@ -56,14 +55,14 @@ void MonitorHandler::handle_trace(const AddDeviceTrace &trace)
 
 void MonitorHandler::handle_trace(const SwitchCoreOpTrace &trace)
 {
+    // TODO: 'if' should be removed, this is temporary solution since this trace is called out of the scheduler or vdevice.
+    if (!m_is_monitor_currently_working) { return; }
     assert(contains(m_devices_info, trace.device_id));
     m_devices_info.at(trace.device_id).current_core_op_handle = trace.core_op_handle;
 }
 
-void MonitorHandler::handle_trace(const CreateCoreOpInputStreamsTrace &trace)
+void MonitorHandler::handle_trace(const AddStreamH2DTrace &trace)
 {
-    // TODO- HRT-10371 'if' should be removed, this is temporary solution since this trace is called out of the scheduler.
-    if (!m_is_monitor_currently_working) { return; }
     auto core_op_handle = get_core_op_handle_by_name(trace.core_op_name);
     assert(contains(m_core_ops_info, core_op_handle));
     assert(contains(m_devices_info, trace.device_id));
@@ -74,10 +73,8 @@ void MonitorHandler::handle_trace(const CreateCoreOpInputStreamsTrace &trace)
     m_devices_info.at(trace.device_id).requested_transferred_frames_h2d[core_op_handle]->insert(trace.stream_name);
 }
 
-void MonitorHandler::handle_trace(const CreateCoreOpOutputStreamsTrace &trace)
+void MonitorHandler::handle_trace(const AddStreamD2HTrace &trace)
 {
-    // TODO- HRT-10371 'if' should be removed, this is temporary solution since this trace is called out of the scheduler.
-    if (!m_is_monitor_currently_working) { return; }
     auto core_op_handle = get_core_op_handle_by_name(trace.core_op_name);
     assert(contains(m_core_ops_info, core_op_handle));
     assert(contains(m_devices_info, trace.device_id));
@@ -88,7 +85,7 @@ void MonitorHandler::handle_trace(const CreateCoreOpOutputStreamsTrace &trace)
     m_devices_info.at(trace.device_id).finished_transferred_frames_d2h[core_op_handle]->insert(trace.stream_name);
 }
 
-void MonitorHandler::handle_trace(const WriteFrameTrace &trace)
+void MonitorHandler::handle_trace(const FrameEnqueueH2DTrace &trace)
 {
     assert(contains(m_core_ops_info, trace.core_op_handle));
     assert(contains(m_core_ops_info[trace.core_op_handle].input_streams_info, trace.queue_name));
@@ -97,7 +94,7 @@ void MonitorHandler::handle_trace(const WriteFrameTrace &trace)
     queue.pending_frames_count_acc->add_data_point(queue.pending_frames_count->load());
 }
 
-void MonitorHandler::handle_trace(const ReadFrameTrace &trace)
+void MonitorHandler::handle_trace(const FrameDequeueD2HTrace &trace)
 {
     assert(contains(m_core_ops_info, trace.core_op_handle));
     assert(contains(m_core_ops_info[trace.core_op_handle].output_streams_info, trace.queue_name));
@@ -107,8 +104,10 @@ void MonitorHandler::handle_trace(const ReadFrameTrace &trace)
     queue.total_frames_count->fetch_add(1);
 }
 
-void MonitorHandler::handle_trace(const OutputVdmaEnqueueTrace &trace)
+void MonitorHandler::handle_trace(const FrameEnqueueD2HTrace &trace)
 {
+    // TODO: 'if' should be removed, this is temporary solution since this trace is called out of the scheduler or vdevice.
+    if (!m_is_monitor_currently_working) { return; }
     assert(contains(m_core_ops_info, trace.core_op_handle));
     assert(contains(m_core_ops_info[trace.core_op_handle].output_streams_info, trace.queue_name));
 
@@ -128,8 +127,10 @@ void MonitorHandler::handle_trace(const OutputVdmaEnqueueTrace &trace)
     }
 }
 
-void MonitorHandler::handle_trace(const InputVdmaDequeueTrace &trace)
+void MonitorHandler::handle_trace(const FrameDequeueH2DTrace &trace)
 {
+    // TODO: 'if' should be removed, this is temporary solution since this trace is called out of the scheduler or vdevice.
+    if (!m_is_monitor_currently_working) { return; }
     assert(contains(m_core_ops_info, trace.core_op_handle));
     assert(contains(m_core_ops_info[trace.core_op_handle].input_streams_info, trace.queue_name));
     assert(contains(m_devices_info, trace.device_id));
@@ -292,7 +293,7 @@ void MonitorHandler::log_monitor_frames_infos(ProtoMon &mon)
             auto stream_frames_info = net_frames_info->add_streams_frames_infos();
             stream_frames_info->set_stream_name(stream.first);
             stream_frames_info->set_stream_direction(PROTO__STREAM_DIRECTION__HOST_TO_DEVICE);
-            stream_frames_info->set_buffer_frames_size(static_cast<int32_t>(stream.second.queue_size * m_device_count));
+            stream_frames_info->set_buffer_frames_size(static_cast<int32_t>(stream.second.queue_size * m_devices_info.size()));
             stream_frames_info->set_pending_frames_count(static_cast<int32_t>(stream.second.pending_frames_count->load()));
 
             auto expected_min_val = stream.second.pending_frames_count_acc->min();
@@ -324,36 +325,32 @@ void MonitorHandler::log_monitor_frames_infos(ProtoMon &mon)
             auto stream_frames_info = net_frames_info->add_streams_frames_infos();
             stream_frames_info->set_stream_name(stream.first);
             stream_frames_info->set_stream_direction(PROTO__STREAM_DIRECTION__DEVICE_TO_HOST);
-            if (m_core_ops_info[core_op_handle].is_nms) {
-                stream_frames_info->set_pending_frames_count(SCHEDULER_MON_NAN_VAL);
-                stream_frames_info->set_buffer_frames_size(SCHEDULER_MON_NAN_VAL);
+
+            stream_frames_info->set_pending_frames_count(static_cast<int32_t>(stream.second.pending_frames_count->load()));
+            stream_frames_info->set_buffer_frames_size(static_cast<int32_t>(stream.second.queue_size * m_devices_info.size()));
+
+            auto expected_min_val = stream.second.pending_frames_count_acc->min();
+            if (expected_min_val.status() == HAILO_SUCCESS) {
+                stream_frames_info->set_min_pending_frames_count(static_cast<int32_t>(expected_min_val.release()));
+            } else {
+                stream_frames_info->set_min_pending_frames_count(-1);
+            }
+
+            auto expected_max_val = stream.second.pending_frames_count_acc->max();
+            if (expected_max_val.status() == HAILO_SUCCESS) {
+                stream_frames_info->set_max_pending_frames_count(static_cast<int32_t>(expected_max_val.release()));
+            } else {
+                stream_frames_info->set_max_pending_frames_count(-1);
+            }
+
+            auto expected_avg_val = stream.second.pending_frames_count_acc->mean();
+            if (expected_avg_val.status() == HAILO_SUCCESS) {
+                stream_frames_info->set_avg_pending_frames_count(expected_avg_val.release());
             } else {
-                stream_frames_info->set_pending_frames_count(static_cast<int32_t>(stream.second.pending_frames_count->load()));
-                stream_frames_info->set_buffer_frames_size(static_cast<int32_t>(stream.second.queue_size * m_device_count));
-
-                auto expected_min_val = stream.second.pending_frames_count_acc->min();
-                if (expected_min_val.status() == HAILO_SUCCESS) {
-                    stream_frames_info->set_min_pending_frames_count(static_cast<int32_t>(expected_min_val.release()));
-                } else {
-                    stream_frames_info->set_min_pending_frames_count(-1);
-                }
-
-                auto expected_max_val = stream.second.pending_frames_count_acc->max();
-                if (expected_max_val.status() == HAILO_SUCCESS) {
-                    stream_frames_info->set_max_pending_frames_count(static_cast<int32_t>(expected_max_val.release()));
-                } else {
-                    stream_frames_info->set_max_pending_frames_count(-1);
-                }
-
-                auto expected_avg_val = stream.second.pending_frames_count_acc->mean();
-                if (expected_avg_val.status() == HAILO_SUCCESS) {
-                    stream_frames_info->set_avg_pending_frames_count(expected_avg_val.release());
-                } else {
-                    stream_frames_info->set_avg_pending_frames_count(-1);
-                }
-
-                stream.second.pending_frames_count_acc->get_and_clear();
+                stream_frames_info->set_avg_pending_frames_count(-1);
             }
+
+            stream.second.pending_frames_count_acc->get_and_clear();
         }
     }
 }
diff --git a/hailort/libhailort/src/utils/profiler/monitor_handler.hpp b/hailort/libhailort/src/utils/profiler/monitor_handler.hpp
index 8ee18ef..a62498d 100644
--- a/hailort/libhailort/src/utils/profiler/monitor_handler.hpp
+++ b/hailort/libhailort/src/utils/profiler/monitor_handler.hpp
@@ -51,6 +51,79 @@ namespace hailort
 
 using stream_name = std::string;
 
+class SchedulerCounter
+{
+public:
+    SchedulerCounter() : m_map()
+    {}
+
+    void insert(const stream_name_t &name)
+    {
+        assert(!contains(m_map, name));
+        m_map[name] = 0;
+    }
+
+    uint32_t operator[](const stream_name_t &name) const
+    {
+        assert(contains(m_map, name));
+        return m_map.at(name);
+    }
+
+    void increase(const stream_name_t &name)
+    {
+        assert(contains(m_map, name));
+        m_map[name]++;
+    }
+
+    void decrease(const stream_name_t &name)
+    {
+        assert(contains(m_map, name));
+        assert(m_map[name] > 0);
+        m_map[name]--;
+    }
+
+    uint32_t get_min_value() const
+    {
+        return get_min_value_of_unordered_map(m_map);
+    }
+
+    uint32_t get_max_value() const
+    {
+        return get_max_value_of_unordered_map(m_map);
+    }
+
+    bool all_values_bigger_or_equal(uint32_t value) const
+    {
+        for (const auto &pair : m_map) {
+            if (value > pair.second) {
+                return false;
+            }
+        }
+        return true;
+    }
+
+    bool empty() const
+    {
+        for (const auto &pair : m_map) {
+            if (0 != pair.second) {
+                return false;
+            }
+        }
+        return true;
+    }
+
+    void reset()
+    {
+        for (auto &pair : m_map) {
+            pair.second = 0;
+        }
+    }
+
+private:
+    std::unordered_map<stream_name_t, std::atomic_uint32_t> m_map;
+};
+
+
 struct DeviceInfo {
     DeviceInfo(const device_id_t &device_id, const std::string &device_arch) :
         device_id(device_id), device_arch(device_arch), device_has_drained_everything(true),
@@ -78,7 +151,6 @@ struct CoreOpInfo {
     std::unordered_map<stream_name, StreamsInfo> input_streams_info;
     std::unordered_map<stream_name, StreamsInfo> output_streams_info;
     std::string core_op_name;
-    bool is_nms;
     double utilization;
 };
 
@@ -93,12 +165,12 @@ public:
     void clear_monitor();
 
     virtual void handle_trace(const AddCoreOpTrace&) override;
-    virtual void handle_trace(const CreateCoreOpInputStreamsTrace&) override;
-    virtual void handle_trace(const CreateCoreOpOutputStreamsTrace&) override;
-    virtual void handle_trace(const WriteFrameTrace&) override;
-    virtual void handle_trace(const ReadFrameTrace&) override;
-    virtual void handle_trace(const InputVdmaDequeueTrace&) override;
-    virtual void handle_trace(const OutputVdmaEnqueueTrace&) override;
+    virtual void handle_trace(const AddStreamH2DTrace&) override;
+    virtual void handle_trace(const AddStreamD2HTrace&) override;
+    virtual void handle_trace(const FrameEnqueueH2DTrace&) override;
+    virtual void handle_trace(const FrameDequeueD2HTrace&) override;
+    virtual void handle_trace(const FrameDequeueH2DTrace&) override;
+    virtual void handle_trace(const FrameEnqueueD2HTrace&) override;
     virtual void handle_trace(const SwitchCoreOpTrace&) override;
     virtual void handle_trace(const MonitorStartTrace&) override;
     virtual void handle_trace(const AddDeviceTrace&) override;
@@ -122,7 +194,6 @@ private:
     scheduler_core_op_handle_t get_core_op_handle_by_name(const std::string &name);
 
     bool m_is_monitor_currently_working = false;
-    uint32_t m_device_count;
     std::thread m_mon_thread;
     EventPtr m_mon_shutdown_event;
 #if defined(__GNUC__)
diff --git a/hailort/libhailort/src/utils/profiler/profiler_utils.hpp b/hailort/libhailort/src/utils/profiler/profiler_utils.hpp
index a71037c..de28bd7 100644
--- a/hailort/libhailort/src/utils/profiler/profiler_utils.hpp
+++ b/hailort/libhailort/src/utils/profiler/profiler_utils.hpp
@@ -26,6 +26,7 @@ struct ProfilerTime {
     uint32_t day;
     uint32_t hour;
     uint32_t min;
+    int64_t time_since_epoch;
 };
 
 #if defined(__linux__)
@@ -86,6 +87,8 @@ ProfilerTime get_curr_time()
     curr_time.year = t_time.tm_year + 1900;
     curr_time.hour = t_time.tm_hour;
     curr_time.min = t_time.tm_min;
+    curr_time.time_since_epoch = std::chrono::duration_cast<std::chrono::nanoseconds>
+        (std::chrono::high_resolution_clock::now().time_since_epoch()).count();
 
     return curr_time;
 }
diff --git a/hailort/libhailort/src/utils/profiler/scheduler_profiler_handler.cpp b/hailort/libhailort/src/utils/profiler/scheduler_profiler_handler.cpp
index f81da4d..bc4f171 100644
--- a/hailort/libhailort/src/utils/profiler/scheduler_profiler_handler.cpp
+++ b/hailort/libhailort/src/utils/profiler/scheduler_profiler_handler.cpp
@@ -39,7 +39,7 @@ namespace hailort
 SchedulerProfilerHandler::SchedulerProfilerHandler(int64_t &start_time)
 #ifndef __ANDROID__
     : m_file_sink(HailoRTLogger::create_file_sink(HailoRTLogger::get_log_path(SCHEDULER_PROFILER_LOGGER_PATH), SCHEDULER_PROFILER_LOGGER_FILENAME, false)),
-      m_first_write(true), m_start_time(start_time)
+      m_first_write(true)
 #endif
 {
 #ifndef __ANDROID__
@@ -151,7 +151,7 @@ void SchedulerProfilerHandler::handle_trace(const InitProfilerProtoTrace &trace)
     init->mutable_time()->set_hour(curr_time.hour);
     init->mutable_time()->set_min(curr_time.min);
     init->set_time_stamp(trace.timestamp);
-    init->set_time_stamp_since_epoch(m_start_time);
+    init->set_time_stamp_since_epoch(curr_time.time_since_epoch);
 }
 
 void SchedulerProfilerHandler::handle_trace(const AddCoreOpTrace &trace)
@@ -159,11 +159,11 @@ void SchedulerProfilerHandler::handle_trace(const AddCoreOpTrace &trace)
     log(JSON({
         {"action", json_to_string(trace.name)},
         {"timestamp", json_to_string(trace.timestamp)},
-        {"device_id", json_to_string(trace.device_id)},
         {"core_op_name", json_to_string(trace.core_op_name)},
         {"core_op_handle", json_to_string(trace.core_op_handle)},
         {"timeout", json_to_string((uint64_t)trace.timeout)},
-        {"threshold", json_to_string((uint64_t)trace.threshold)}
+        {"threshold", json_to_string((uint64_t)trace.threshold)},
+        {"max_batch_size", json_to_string((uint64_t)trace.batch_size)}
     }));
 
     std::lock_guard<std::mutex> lock(m_proto_lock);
@@ -171,6 +171,7 @@ void SchedulerProfilerHandler::handle_trace(const AddCoreOpTrace &trace)
     added_trace->mutable_added_core_op()->set_time_stamp(trace.timestamp);
     added_trace->mutable_added_core_op()->set_core_op_handle(trace.core_op_handle);
     added_trace->mutable_added_core_op()->set_core_op_name(trace.core_op_name);
+    added_trace->mutable_added_core_op()->set_max_batch_size(trace.batch_size);
 }
 
 void SchedulerProfilerHandler::handle_trace(const AddDeviceTrace &trace)
@@ -182,7 +183,7 @@ void SchedulerProfilerHandler::handle_trace(const AddDeviceTrace &trace)
     added_trace->mutable_added_device()->set_time_stamp(trace.timestamp);
 }
 
-void SchedulerProfilerHandler::handle_trace(const CreateCoreOpInputStreamsTrace &trace)
+void SchedulerProfilerHandler::handle_trace(const AddStreamH2DTrace &trace)
 {
     log(JSON({
         {"action", json_to_string(trace.name)},
@@ -203,7 +204,7 @@ void SchedulerProfilerHandler::handle_trace(const CreateCoreOpInputStreamsTrace
     added_trace->mutable_added_stream()->set_time_stamp(trace.timestamp);
 }
 
-void SchedulerProfilerHandler::handle_trace(const CreateCoreOpOutputStreamsTrace &trace)
+void SchedulerProfilerHandler::handle_trace(const AddStreamD2HTrace &trace)
 {
     log(JSON({
         {"action", json_to_string(trace.name)},
@@ -224,7 +225,7 @@ void SchedulerProfilerHandler::handle_trace(const CreateCoreOpOutputStreamsTrace
     added_trace->mutable_added_stream()->set_time_stamp(trace.timestamp);
 }
 
-void SchedulerProfilerHandler::handle_trace(const WriteFrameTrace &trace)
+void SchedulerProfilerHandler::handle_trace(const FrameEnqueueH2DTrace &trace)
 {
     log(JSON({
         {"action", json_to_string(trace.name)},
@@ -241,7 +242,7 @@ void SchedulerProfilerHandler::handle_trace(const WriteFrameTrace &trace)
     added_trace->mutable_frame_enqueue()->set_time_stamp(trace.timestamp);
 }
 
-void SchedulerProfilerHandler::handle_trace(const InputVdmaDequeueTrace &trace)
+void SchedulerProfilerHandler::handle_trace(const FrameDequeueH2DTrace &trace)
 {
     log(JSON({
         {"action", json_to_string(trace.name)},
@@ -260,7 +261,7 @@ void SchedulerProfilerHandler::handle_trace(const InputVdmaDequeueTrace &trace)
     added_trace->mutable_frame_dequeue()->set_time_stamp(trace.timestamp);
 }
 
-void SchedulerProfilerHandler::handle_trace(const ReadFrameTrace &trace)
+void SchedulerProfilerHandler::handle_trace(const FrameDequeueD2HTrace &trace)
 {
     log(JSON({
         {"action", json_to_string(trace.name)},
@@ -277,7 +278,7 @@ void SchedulerProfilerHandler::handle_trace(const ReadFrameTrace &trace)
     added_trace->mutable_frame_dequeue()->set_time_stamp(trace.timestamp);
 }
 
-void SchedulerProfilerHandler::handle_trace(const OutputVdmaEnqueueTrace &trace)
+void SchedulerProfilerHandler::handle_trace(const FrameEnqueueD2HTrace &trace)
 {
     log(JSON({
         {"action", json_to_string(trace.name)},
@@ -371,7 +372,7 @@ void SchedulerProfilerHandler::handle_trace(const OracleDecisionTrace &trace)
     added_trace->mutable_switch_core_op_decision()->set_over_timeout(trace.over_timeout);
 }
 
-void SchedulerProfilerHandler::handle_trace(const DumpProfilerState &trace)
+void SchedulerProfilerHandler::handle_trace(const DumpProfilerStateTrace &trace)
 {
     (void)trace;
     serialize_and_dump_proto();
diff --git a/hailort/libhailort/src/utils/profiler/scheduler_profiler_handler.hpp b/hailort/libhailort/src/utils/profiler/scheduler_profiler_handler.hpp
index b5a1699..358d06f 100644
--- a/hailort/libhailort/src/utils/profiler/scheduler_profiler_handler.hpp
+++ b/hailort/libhailort/src/utils/profiler/scheduler_profiler_handler.hpp
@@ -17,6 +17,7 @@
 #else
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wconversion"
+#pragma GCC diagnostic ignored "-Wunused-parameter"
 #endif
 #include "tracer_profiler.pb.h"
 #if defined(_MSC_VER)
@@ -39,19 +40,19 @@ public:
     ~SchedulerProfilerHandler();
 
     virtual void handle_trace(const AddCoreOpTrace&) override;
-    virtual void handle_trace(const CreateCoreOpInputStreamsTrace&) override;
-    virtual void handle_trace(const CreateCoreOpOutputStreamsTrace&) override;
-    virtual void handle_trace(const WriteFrameTrace&) override;
-    virtual void handle_trace(const InputVdmaDequeueTrace&) override;
-    virtual void handle_trace(const ReadFrameTrace&) override;
-    virtual void handle_trace(const OutputVdmaEnqueueTrace&) override;
+    virtual void handle_trace(const AddStreamH2DTrace&) override;
+    virtual void handle_trace(const AddStreamD2HTrace&) override;
+    virtual void handle_trace(const FrameEnqueueH2DTrace&) override;
+    virtual void handle_trace(const FrameDequeueH2DTrace&) override;
+    virtual void handle_trace(const FrameDequeueD2HTrace&) override;
+    virtual void handle_trace(const FrameEnqueueD2HTrace&) override;
     virtual void handle_trace(const SwitchCoreOpTrace&) override;
     virtual void handle_trace(const AddDeviceTrace&) override;
     virtual void handle_trace(const SetCoreOpTimeoutTrace&) override;
     virtual void handle_trace(const SetCoreOpThresholdTrace&) override;
     virtual void handle_trace(const SetCoreOpPriorityTrace&) override;
     virtual void handle_trace(const OracleDecisionTrace&) override;
-    virtual void handle_trace(const DumpProfilerState&) override;
+    virtual void handle_trace(const DumpProfilerStateTrace&) override;
     virtual void handle_trace(const InitProfilerProtoTrace&) override;
 
 private:
@@ -64,7 +65,6 @@ private:
     std::atomic<bool> m_first_write;
     ProtoProfiler m_profiler_trace_proto;
     std::mutex m_proto_lock;
-    int64_t m_start_time;
 };
 
 }
diff --git a/hailort/libhailort/src/utils/profiler/tracer.hpp b/hailort/libhailort/src/utils/profiler/tracer.hpp
index 35036aa..6388e2d 100644
--- a/hailort/libhailort/src/utils/profiler/tracer.hpp
+++ b/hailort/libhailort/src/utils/profiler/tracer.hpp
@@ -19,24 +19,26 @@ namespace hailort
 class Tracer
 {
 public:
+    Tracer();
     template<class TraceType, typename... Args>
     static void trace(Args... trace_args)
     {
         auto &tracer = get_instance();
-        tracer.execute_trace<TraceType>(trace_args...);
+        tracer->execute_trace<TraceType>(trace_args...);
     }
 
-private:
-    Tracer();
-    void init_monitor_handler();
-    void init_scheduler_profiler_handler();
-
-    static Tracer& get_instance()
+    static std::unique_ptr<Tracer> &get_instance()
     {
-        static Tracer tracer;
+        static std::unique_ptr<Tracer> tracer = nullptr;
+        if (nullptr == tracer) {
+            tracer = make_unique_nothrow<Tracer>();
+        }
         return tracer;
     }
 
+private:
+    void init_monitor_handler();
+    void init_scheduler_profiler_handler();
     template<class TraceType, typename... Args>
     void execute_trace(Args... trace_args)
     {
diff --git a/hailort/libhailort/src/utils/soc_utils/partial_cluster_reader.cpp b/hailort/libhailort/src/utils/soc_utils/partial_cluster_reader.cpp
new file mode 100644
index 0000000..a417936
--- /dev/null
+++ b/hailort/libhailort/src/utils/soc_utils/partial_cluster_reader.cpp
@@ -0,0 +1,78 @@
+/**
+ * Copyright (c) 2020-2023 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+ **/
+/**
+ * @file partial_cluster_reader.cpp
+ * @brief class to read and parse file to determine which clusters are enabled.
+ **/
+
+#include "hailo/hailort_common.hpp"
+
+#include "partial_cluster_reader.hpp"
+
+#include <fstream>
+#include <algorithm>
+
+namespace hailort
+{
+
+Expected<uint32_t> PartialClusterReader::get_arch_default_bitmap(const hailo_device_architecture_t dev_arch)
+{
+    switch(dev_arch) {
+        // Currently only supported architecture for this function is HAILO15M - but in future can add more
+        case HAILO_ARCH_HAILO15M:
+            return static_cast<uint32_t>(PARTIAL_CLUSTERS_LAYOUT_BITMAP__HAILO15M_DEFAULT);
+        default:
+            LOGGER__ERROR("Error, Given architecture {} doesnt support partial cluster layout",
+                HailoRTCommon::get_device_arch_str(dev_arch));
+            return make_unexpected(HAILO_INTERNAL_FAILURE);
+    }
+}
+
+bool PartialClusterReader::validate_arch_partial_clusters_bitmap(const hailo_device_architecture_t dev_arch,
+    const uint32_t bitmap)
+{
+    switch(dev_arch) {
+        // Currently only supported architecture for this function is HAILO15M - but in future can add more
+        case HAILO_ARCH_HAILO15M:
+            return (std::find(HAILO15M__PARTIAL_CLUSTERS_LAYOUT_BITMAP_ARRAY.begin(),
+                HAILO15M__PARTIAL_CLUSTERS_LAYOUT_BITMAP_ARRAY.end(), bitmap) !=
+                HAILO15M__PARTIAL_CLUSTERS_LAYOUT_BITMAP_ARRAY.end());
+        default:
+            LOGGER__ERROR("Error, Given architecture {} doesnt support partial cluster layout",
+                HailoRTCommon::get_device_arch_str(dev_arch));
+            return false;
+    }
+}
+
+Expected<uint32_t> PartialClusterReader::get_partial_clusters_layout_bitmap(const hailo_device_architecture_t dev_arch)
+{
+    std::ifstream layout_bitmap_file;
+    layout_bitmap_file.open(PARTIAL_CLUSTER_READER_CLUSTER_LAYOUT_FILE_PATH, std::ios::binary);
+    if (!layout_bitmap_file.is_open()) {
+        LOGGER__WARNING("partial cluster layout bitmap file not found, Enabling all clusters by default");
+        return get_arch_default_bitmap(dev_arch);
+    }
+
+    uint32_t partial_clusters_layout_bitmap = 0;
+    layout_bitmap_file.read(reinterpret_cast<char*>(&partial_clusters_layout_bitmap),
+        sizeof(partial_clusters_layout_bitmap));
+
+    // Fuse file represents clusters that are enabled with 0 in bit value and clusters that are disabled with 1
+    // We also ignore all the MSB's that dont represent clusters.
+    // Therefore, after reading the uint32 layout - we mask with the default bitmap and bitwise flip the
+    // relevant bits so that 1 will represent enabled clusters
+    const auto arch_bitmap_mask_exp = get_arch_default_bitmap(dev_arch);
+    CHECK_EXPECTED(arch_bitmap_mask_exp);
+    partial_clusters_layout_bitmap = (~partial_clusters_layout_bitmap & arch_bitmap_mask_exp.value());
+    layout_bitmap_file.close();
+
+    CHECK_AS_EXPECTED(validate_arch_partial_clusters_bitmap(dev_arch, partial_clusters_layout_bitmap),
+        HAILO_INTERNAL_FAILURE, "Error, Invalid partial clusters bitmap value given {}",
+        partial_clusters_layout_bitmap);
+
+    return partial_clusters_layout_bitmap;
+}
+
+} /* namespace hailort */
diff --git a/hailort/libhailort/src/utils/soc_utils/partial_cluster_reader.hpp b/hailort/libhailort/src/utils/soc_utils/partial_cluster_reader.hpp
new file mode 100644
index 0000000..4926272
--- /dev/null
+++ b/hailort/libhailort/src/utils/soc_utils/partial_cluster_reader.hpp
@@ -0,0 +1,50 @@
+/**
+ * Copyright (c) 2020-2023 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+ **/
+/**
+ * @file partial_cluster_reader.hpp
+ * @brief class to read and parse file to determine which clusters are enabled.
+ **/
+
+#ifndef _HAILO_PARTIAL_CLUSTER_READER_HPP_
+#define _HAILO_PARTIAL_CLUSTER_READER_HPP_
+
+#include "hailo/hailort.h"
+#include "hailo/expected.hpp"
+
+#include "common/logger_macros.hpp"
+#include "common/utils.hpp"
+
+#include <array>
+
+namespace hailort
+{
+
+// valid partial cluster layouts for Hailo15M
+#define PARTIAL_CLUSTERS_LAYOUT_BITMAP__HAILO15M_0	((0x1 << 1) | (0x1 << 2) | (0x1 << 3))
+#define PARTIAL_CLUSTERS_LAYOUT_BITMAP__HAILO15M_1	((0x1 << 0) | (0x1 << 2) | (0x1 << 3))
+#define PARTIAL_CLUSTERS_LAYOUT_BITMAP__HAILO15M_2	((0x1 << 0) | (0x1 << 1) | (0x1 << 4))
+// Default is all clusters are enabled
+#define PARTIAL_CLUSTERS_LAYOUT_BITMAP__HAILO15M_DEFAULT	((0x1 << 0) | (0x1 << 1) | (0x1 << 2) | (0x1 << 3) | (0x1 << 4))
+
+constexpr const char* PARTIAL_CLUSTER_READER_CLUSTER_LAYOUT_FILE_PATH = "/sys/devices/platform/fuse";
+
+// Array that has all the valid layouts for Hailo15M
+static constexpr std::array<uint32_t, 4> HAILO15M__PARTIAL_CLUSTERS_LAYOUT_BITMAP_ARRAY = {
+    PARTIAL_CLUSTERS_LAYOUT_BITMAP__HAILO15M_0, PARTIAL_CLUSTERS_LAYOUT_BITMAP__HAILO15M_1,
+    PARTIAL_CLUSTERS_LAYOUT_BITMAP__HAILO15M_2, PARTIAL_CLUSTERS_LAYOUT_BITMAP__HAILO15M_DEFAULT
+};
+
+class PartialClusterReader {
+public:
+    static Expected<uint32_t> get_partial_clusters_layout_bitmap(const hailo_device_architecture_t dev_arch);
+private:
+    static Expected<uint32_t> get_arch_default_bitmap(const hailo_device_architecture_t dev_arch);
+    static bool validate_arch_partial_clusters_bitmap(const hailo_device_architecture_t dev_arch, const uint32_t bitmap);
+};
+
+
+} /* namespace hailort */
+
+#endif /* _HAILO_SENSOR_CONFIG_UTILS_HPP_ */
\ No newline at end of file
diff --git a/hailort/libhailort/src/utils/thread_safe_queue.hpp b/hailort/libhailort/src/utils/thread_safe_queue.hpp
index b736d15..f7dfe6f 100644
--- a/hailort/libhailort/src/utils/thread_safe_queue.hpp
+++ b/hailort/libhailort/src/utils/thread_safe_queue.hpp
@@ -115,26 +115,11 @@ public:
         m_items_enqueued_sema(items_enqueued_sema),
         m_items_dequeued_sema_or_shutdown(items_dequeued_sema, shutdown_event),
         m_items_dequeued_sema(items_dequeued_sema),
-        m_default_timeout(default_timeout),
-        m_size(max_size),
-        m_enqueues_count(0),
-        m_callback_mutex()
+        m_default_timeout(default_timeout)
     {}
 
     virtual ~SpscQueue() = default;
-    SpscQueue(SpscQueue &&other) :
-        m_inner(std::move(other.m_inner)),
-        m_items_enqueued_sema_or_shutdown(std::move(other.m_items_enqueued_sema_or_shutdown)),
-        m_items_enqueued_sema(std::move(other.m_items_enqueued_sema)),
-        m_items_dequeued_sema_or_shutdown(std::move(other.m_items_dequeued_sema_or_shutdown)),
-        m_items_dequeued_sema(std::move(other.m_items_dequeued_sema)),
-        m_default_timeout(std::move(other.m_default_timeout)),
-        m_size(std::move(other.m_size)),
-        m_enqueues_count(std::move(other.m_enqueues_count.load())),
-        m_cant_enqueue_callback(std::move(other.m_cant_enqueue_callback)),
-        m_can_enqueue_callback(std::move(other.m_can_enqueue_callback)),
-        m_callback_mutex()
-    {}
+    SpscQueue(SpscQueue &&other) = default;
 
     static Expected<SpscQueue> create(size_t max_size, const EventPtr& shutdown_event,
         std::chrono::milliseconds default_timeout = std::chrono::milliseconds(1000))
@@ -213,14 +198,6 @@ public:
         assert(success);
         AE_UNUSED(success);
 
-        {
-            std::unique_lock<std::mutex> lock(m_callback_mutex);
-            if ((m_size == m_enqueues_count) && m_can_enqueue_callback) {
-                m_can_enqueue_callback();
-            }
-            m_enqueues_count--;
-        }
-
         const auto signal_result = m_items_dequeued_sema_or_shutdown.signal();
         if (HAILO_SUCCESS != signal_result) {
             return make_unexpected(signal_result);
@@ -254,14 +231,6 @@ public:
         assert(success);
         AE_UNUSED(success);
 
-        {
-            std::unique_lock<std::mutex> lock(m_callback_mutex);
-            m_enqueues_count++;
-            if ((m_size == m_enqueues_count) && m_cant_enqueue_callback) {
-                m_cant_enqueue_callback();
-            }
-        }
-
         return m_items_enqueued_sema_or_shutdown.signal();
     }
 
@@ -298,14 +267,6 @@ public:
         assert(success);
         AE_UNUSED(success);
 
-        {
-            std::unique_lock<std::mutex> lock(m_callback_mutex);
-            m_enqueues_count++;
-            if ((m_size == m_enqueues_count) && m_cant_enqueue_callback) {
-                m_cant_enqueue_callback();
-            }
-        }
-
         return m_items_enqueued_sema_or_shutdown.signal();
     }
 
@@ -320,6 +281,11 @@ public:
         return m_inner.size_approx();
     }
 
+    bool is_queue_full()
+    {
+        return (m_inner.size_approx() == m_inner.max_capacity());
+    }
+
     hailo_status clear() AE_NO_TSAN
     {
         auto status = HAILO_SUCCESS;
@@ -334,16 +300,6 @@ public:
         return status;
     }
 
-    void set_on_cant_enqueue_callback(std::function<void()> callback)
-    {
-        m_cant_enqueue_callback = callback;
-    }
-
-    void set_on_can_enqueue_callback(std::function<void()> callback)
-    {
-        m_can_enqueue_callback = callback;
-    }
-
 private:
     ReaderWriterQueue m_inner;
     WaitOrShutdown m_items_enqueued_sema_or_shutdown;
@@ -351,12 +307,6 @@ private:
     WaitOrShutdown m_items_dequeued_sema_or_shutdown;
     SemaphorePtr m_items_dequeued_sema;
     std::chrono::milliseconds m_default_timeout;
-
-    const size_t m_size;
-    std::atomic_uint32_t m_enqueues_count;
-    std::function<void()> m_cant_enqueue_callback;
-    std::function<void()> m_can_enqueue_callback;
-    std::mutex m_callback_mutex;
 };
 
 } /* namespace hailort */
diff --git a/hailort/libhailort/src/vdevice/CMakeLists.txt b/hailort/libhailort/src/vdevice/CMakeLists.txt
index 2fd62f0..c3c6cb7 100644
--- a/hailort/libhailort/src/vdevice/CMakeLists.txt
+++ b/hailort/libhailort/src/vdevice/CMakeLists.txt
@@ -4,15 +4,14 @@ set(SRC_FILES
     ${CMAKE_CURRENT_SOURCE_DIR}/vdevice.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/vdevice_core_op.cpp
 
-    ${CMAKE_CURRENT_SOURCE_DIR}/pipeline_multiplexer.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/vdevice_native_stream.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/vdevice_stream_multiplexer_wrapper.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/callback_reorder_queue.cpp
 
     ${CMAKE_CURRENT_SOURCE_DIR}/scheduler/scheduler.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/scheduler/scheduler_oracle.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/scheduler/scheduled_core_op_state.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/scheduler/scheduled_stream.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/scheduler/infer_request_accumulator.cpp
 )
 
 set(HAILORT_CPP_SOURCES ${HAILORT_CPP_SOURCES} ${SRC_FILES} PARENT_SCOPE)
diff --git a/hailort/libhailort/src/vdevice/callback_reorder_queue.cpp b/hailort/libhailort/src/vdevice/callback_reorder_queue.cpp
index d2b1b42..730ff43 100644
--- a/hailort/libhailort/src/vdevice/callback_reorder_queue.cpp
+++ b/hailort/libhailort/src/vdevice/callback_reorder_queue.cpp
@@ -11,7 +11,7 @@
 namespace hailort
 {
 
-InternalTransferDoneCallback CallbackReorderQueue::wrap_callback(const InternalTransferDoneCallback &original)
+TransferDoneCallback CallbackReorderQueue::wrap_callback(const TransferDoneCallback &original)
 {
     std::lock_guard<std::mutex> lock_guard(m_queue_mutex);
     const uint64_t current_callback_index = m_registered_callbacks++;
diff --git a/hailort/libhailort/src/vdevice/callback_reorder_queue.hpp b/hailort/libhailort/src/vdevice/callback_reorder_queue.hpp
index 7f672b0..e6f0108 100644
--- a/hailort/libhailort/src/vdevice/callback_reorder_queue.hpp
+++ b/hailort/libhailort/src/vdevice/callback_reorder_queue.hpp
@@ -28,7 +28,7 @@ public:
     {}
 
     // Wraps the given original callback so it will be called in the same wrap_callback order.
-    InternalTransferDoneCallback wrap_callback(const InternalTransferDoneCallback &original);
+    TransferDoneCallback wrap_callback(const TransferDoneCallback &original);
 
     // If some wrapped callback wasn't registered to some async API (for example because the queue is full), we need to
     // remove the counters we added in `wrap_callback` (otherwise, next callback will wait forever).
diff --git a/hailort/libhailort/src/vdevice/pipeline_multiplexer.cpp b/hailort/libhailort/src/vdevice/pipeline_multiplexer.cpp
deleted file mode 100644
index c60476c..0000000
--- a/hailort/libhailort/src/vdevice/pipeline_multiplexer.cpp
+++ /dev/null
@@ -1,456 +0,0 @@
-/**
- * Copyright (c) 2022 Hailo Technologies Ltd. All rights reserved.
- * Distributed under the MIT license (https://opensource.org/licenses/MIT)
- **/
-/**
- * @file pipeline_multiplexer.cpp
- * @brief: Pipeline Multiplexer
- **/
-
-#include "hailo/hailort_common.hpp"
-#include "hailo/vstream.hpp"
-
-#include "common/utils.hpp"
-
-#include "vdevice/pipeline_multiplexer.hpp"
-
-
-namespace hailort
-{
-
-PipelineMultiplexer::PipelineMultiplexer() :
-    m_should_core_op_stop(),
-    m_input_streams_count(0),
-    m_output_streams_count(0),
-    m_next_to_write(0),
-    m_order_queue(),
-    m_currently_writing(INVALID_CORE_OP_HANDLE),
-    m_written_streams_count(0),
-    m_read_streams_count(0),
-    m_next_to_read_after_drain(INVALID_CORE_OP_HANDLE)
-{
-    assert(is_multiplexer_supported());
-}
-
-bool PipelineMultiplexer::is_multiplexer_supported()
-{
-    auto disable_multiplexer_env = std::getenv(DISABLE_MULTIPLEXER_ENV_VAR);
-    if ((nullptr != disable_multiplexer_env) && (strnlen(disable_multiplexer_env, 2) == 1) && (strncmp(disable_multiplexer_env, "1", 1) == 0)) {
-        LOGGER__WARNING("Usage of '{}' env variable is deprecated.", DISABLE_MULTIPLEXER_ENV_VAR);
-        return false;
-    }
-    return true;
-}
-
-hailo_status PipelineMultiplexer::add_core_op_instance(multiplexer_core_op_handle_t core_op_handle, CoreOp &core_op)
-{
-    std::unique_lock<std::mutex> lock(m_writing_mutex);
-    std::unique_lock<std::mutex> read_lock(m_reading_mutex);
-    assert(!contains(m_should_core_op_stop, core_op_handle));
-
-    auto is_first_instance = (0 == instances_count());
-
-    auto stream_infos = core_op.get_all_stream_infos();
-    CHECK_EXPECTED_AS_STATUS(stream_infos);
-
-    for (const auto &stream_info : stream_infos.value()) {
-        m_should_core_op_stop[core_op_handle][stream_info.name] = false;
-        if (is_first_instance) {
-            // To be filled only on first instance
-            if (HAILO_H2D_STREAM == stream_info.direction) {
-                m_input_streams_count++;
-            } else {
-                m_output_streams_count++;
-                m_is_stream_reading[stream_info.name] = false;
-            }
-        }
-    }
-
-    m_write_barriers[core_op_handle] = make_shared_nothrow<Barrier>(m_input_streams_count);
-    CHECK(nullptr != m_write_barriers[core_op_handle], HAILO_OUT_OF_HOST_MEMORY);
-    m_is_waiting_to_write[core_op_handle] = false;
-
-    return HAILO_SUCCESS;
-}
-
-void PipelineMultiplexer::set_output_vstreams_names(multiplexer_core_op_handle_t core_op_handle, const std::vector<OutputVStream> &output_vstreams)
-{
-    std::unique_lock<std::mutex> lock(m_writing_mutex);
-    for (const auto &output_vstream : output_vstreams) {
-        m_can_output_vstream_read[core_op_handle][output_vstream.name()] = true;
-    }
-    m_can_core_op_read[core_op_handle] = true;
-}
-
-bool PipelineMultiplexer::has_more_than_one_core_op_instance() const
-{
-    return instances_count() > 1;
-}
-
-size_t PipelineMultiplexer::instances_count() const
-{
-    return m_should_core_op_stop.size();
-}
-
-bool PipelineMultiplexer::should_core_op_stop(multiplexer_core_op_handle_t core_op_handle)
-{
-    for (const auto &name_flag_pair : m_should_core_op_stop[core_op_handle]) {
-        if (name_flag_pair.second) {
-            return true;
-        }
-    }
-
-    return false;
-}
-
-hailo_status PipelineMultiplexer::wait_for_write(multiplexer_core_op_handle_t core_op_handle)
-{
-    std::shared_ptr<hailort::Barrier> barrier;
-    {
-        std::unique_lock<std::mutex> lock(m_writing_mutex);
-        assert(contains(m_write_barriers, core_op_handle));
-        barrier = m_write_barriers[core_op_handle];
-    }
-    // TODO: This has no timeout
-    // TODO: HRT-8634
-    barrier->arrive_and_wait();
-    {
-        std::unique_lock<std::mutex> lock(m_writing_mutex);
-        assert(contains(m_should_core_op_stop, core_op_handle));
-        assert(contains(m_is_waiting_to_write, core_op_handle));
-
-        m_is_waiting_to_write[core_op_handle] = true;
-        hailo_status status = HAILO_SUCCESS;
-        m_writing_cv.wait(lock, [this, core_op_handle, &status] {
-            if (!has_more_than_one_core_op_instance()) {
-                return true;
-            }
-
-            if (should_core_op_stop(core_op_handle)) {
-                status = HAILO_STREAM_ABORTED_BY_USER;
-                return true; // return true so that the wait will finish
-            }
-
-            if (m_currently_writing == core_op_handle) {
-                return true;
-            }
-
-            if (!can_core_op_read(core_op_handle)) {
-                return false;
-            }
-
-            if (INVALID_CORE_OP_HANDLE == m_currently_writing) {
-                if ((m_next_to_write != core_op_handle) && m_is_waiting_to_write[m_next_to_write] && can_core_op_read(m_next_to_write)) {
-                    return false;
-                }
-
-                return true;
-            }
-
-            return false;
-        });
-        m_is_waiting_to_write[core_op_handle] = false;
-
-        if (HAILO_STREAM_ABORTED_BY_USER == status) {
-            return status;
-        }
-        CHECK_SUCCESS(status);
-
-        if (INVALID_CORE_OP_HANDLE == m_currently_writing) {
-            m_currently_writing = core_op_handle;
-            m_next_to_write = m_currently_writing;
-        }
-    }
-    m_writing_cv.notify_all();
-
-    return HAILO_SUCCESS;
-}
-
-bool PipelineMultiplexer::can_core_op_read(multiplexer_core_op_handle_t core_op_handle)
-{
-    if (should_core_op_stop(core_op_handle)) {
-        return false;
-    }
-
-    if (!contains(m_can_core_op_read, core_op_handle)) {
-        return true;
-    }
-
-    return m_can_core_op_read[core_op_handle];
-}
-
-hailo_status PipelineMultiplexer::signal_write_finish(multiplexer_core_op_handle_t core_op_handle, bool did_write_fail)
-{
-    {
-        std::unique_lock<std::mutex> lock(m_writing_mutex);
-        m_written_streams_count++;
-        if (m_written_streams_count == m_input_streams_count) {
-            m_written_streams_count = 0;
-            m_currently_writing = INVALID_CORE_OP_HANDLE;
-            m_next_to_write++;
-            m_next_to_write %= static_cast<uint32_t>(instances_count());
-
-            if (!did_write_fail) {
-                std::unique_lock<std::mutex> reading_lock(m_reading_mutex);
-                m_order_queue.push_back(core_op_handle);
-            }
-            m_reading_cv.notify_all();
-        }
-    }
-
-    m_writing_cv.notify_all();
-    return HAILO_SUCCESS;
-}
-
-Expected<uint32_t> PipelineMultiplexer::wait_for_read(multiplexer_core_op_handle_t core_op_handle, const std::string &stream_name,
-    const std::chrono::milliseconds &timeout)
-{
-    uint32_t drain_frames = 0;
-
-    {
-        std::unique_lock<std::mutex> lock(m_reading_mutex);
-
-        assert(contains(m_should_core_op_stop, core_op_handle));
-        assert(contains(m_is_stream_reading, stream_name));
-
-        hailo_status status = HAILO_SUCCESS;
-        auto wait_res = m_reading_cv.wait_for(lock, timeout, [this, core_op_handle, stream_name, &drain_frames, &status] {
-            if (m_should_core_op_stop[core_op_handle][stream_name]) {
-                status = HAILO_STREAM_ABORTED_BY_USER;
-                return true; // return true so that the wait will finish
-            }
-            if (m_is_stream_reading[stream_name]) {
-                return false;
-            }
-
-            if (m_next_to_read_after_drain == core_op_handle) {
-                drain_frames = m_num_frames_to_drain[stream_name];
-                return true;
-            }
-
-            if (m_order_queue.empty()) {
-                return false;
-            }
-
-            if (m_order_queue.front() != core_op_handle) {
-                if (!should_core_op_stop(m_order_queue.front())) {
-                    return false;
-                }
-
-                // This means the NG that is currently writing was aborted so we have to wait for it to finish processing its frames
-                if ((INVALID_CORE_OP_HANDLE != m_currently_writing) && (m_currently_writing != core_op_handle)) {
-                    return false;
-                }
-
-                uint32_t max_drain_count = get_frame_count_to_drain(core_op_handle);
-                if (0 == max_drain_count) {
-                    return false;
-                }
-
-                drain_frames = drain_aborted_in_order_queue(core_op_handle, stream_name, max_drain_count);
-            }
-
-            return true;
-        });
-        CHECK_AS_EXPECTED(wait_res, HAILO_TIMEOUT, "{} (D2H) failed with status={}, timeout={}ms", stream_name, HAILO_TIMEOUT, timeout.count());
-        if (HAILO_STREAM_ABORTED_BY_USER == status) {
-            return make_unexpected(status);
-        }
-        CHECK_SUCCESS_AS_EXPECTED(status);
-
-        m_is_stream_reading[stream_name] = true;
-    }
-
-    m_reading_cv.notify_all();
-    return drain_frames;
-}
-
-uint32_t PipelineMultiplexer::get_frame_count_to_drain(multiplexer_core_op_handle_t core_op_handle)
-{
-    uint32_t drain_count = 0;
-    for (const auto &handle : m_order_queue) {
-        if (!should_core_op_stop(handle)) {
-            if (handle == core_op_handle) {
-                // Current instance is in the front after draining
-                break;
-            } else {
-                // Someone else should drain these frames, the current instance won't be in front after draining
-                return 0;
-            }
-        }
-
-        drain_count++;
-    }
-
-    return drain_count;
-}
-
-uint32_t PipelineMultiplexer::drain_aborted_in_order_queue(multiplexer_core_op_handle_t core_op_handle, const std::string &stream_name,
-    uint32_t max_drain_count)
-{
-    // In case of multiple outputs where one or more already read the frame we need to drain one less frame
-     for (auto &name_flag_pair : m_is_stream_reading) {
-        if (name_flag_pair.second) {
-            m_num_frames_to_drain[name_flag_pair.first] = max_drain_count - 1;
-        } else {
-            m_num_frames_to_drain[name_flag_pair.first] = max_drain_count;
-        }
-    }
-
-    m_next_to_read_after_drain = core_op_handle;
-    m_read_streams_count = 0;
-    for (uint32_t i = 0; i < max_drain_count; i++) {
-        for (auto &name_flag_pair : m_is_stream_reading) {
-            name_flag_pair.second = false;
-        }
-        m_order_queue.pop_front();
-    }
-
-    return m_num_frames_to_drain[stream_name];
-}
-
-hailo_status PipelineMultiplexer::signal_read_finish()
-{
-    std::unique_lock<std::mutex> lock(m_reading_mutex);
-
-    m_read_streams_count++;
-    if (m_read_streams_count == m_output_streams_count) {
-        m_read_streams_count = 0;
-        m_order_queue.pop_front();
-        for (auto &name_flag_pair : m_is_stream_reading) {
-            name_flag_pair.second = false;
-        }
-
-        m_next_to_read_after_drain = INVALID_CORE_OP_HANDLE;
-
-        lock.unlock();
-        m_reading_cv.notify_all();
-    }
-
-    return HAILO_SUCCESS;
-}
-
-hailo_status PipelineMultiplexer::enable_stream(multiplexer_core_op_handle_t core_op_handle, const std::string &stream_name)
-{
-    {
-        std::unique_lock<std::mutex> write_lock(m_writing_mutex);
-        std::unique_lock<std::mutex> read_lock(m_reading_mutex);
-        assert(contains(m_should_core_op_stop, core_op_handle));
-        assert(contains(m_should_core_op_stop[core_op_handle], stream_name));
-
-        if (!m_should_core_op_stop[core_op_handle][stream_name]) {
-            return HAILO_SUCCESS;
-        }
-
-        m_should_core_op_stop[core_op_handle][stream_name] = false;
-
-        // TODO: should we 'enable' barrier?
-    }
-
-    m_writing_cv.notify_all();
-    m_reading_cv.notify_all();
-
-    return HAILO_SUCCESS;
-}
-
-hailo_status PipelineMultiplexer::disable_stream(multiplexer_core_op_handle_t core_op_handle, const std::string &stream_name)
-{
-    {
-        std::unique_lock<std::mutex> write_lock(m_writing_mutex);
-        std::unique_lock<std::mutex> read_lock(m_reading_mutex);
-        assert(contains(m_should_core_op_stop, core_op_handle));
-        assert(contains(m_should_core_op_stop[core_op_handle], stream_name));
-
-        if (m_should_core_op_stop[core_op_handle][stream_name]) {
-            return HAILO_SUCCESS;
-        }
-
-        m_should_core_op_stop[core_op_handle][stream_name] = true;
-
-        assert(contains(m_write_barriers, core_op_handle));
-        m_write_barriers[core_op_handle]->terminate();
-    }
-
-    m_writing_cv.notify_all();
-    m_reading_cv.notify_all();
-
-    return HAILO_SUCCESS;
-}
-
-void PipelineMultiplexer::RunOnceForStream::add_instance()
-{
-    std::unique_lock<std::mutex> lock(m_mutex);
-    m_calls_count[static_cast<uint32_t>(m_calls_count.size())] = 0;
-}
-
-void PipelineMultiplexer::RunOnceForStream::set_callback(std::function<hailo_status()> callback)
-{
-    std::unique_lock<std::mutex> lock(m_mutex);
-    m_callback = callback;
-}
-
-hailo_status PipelineMultiplexer::RunOnceForStream::run(multiplexer_core_op_handle_t core_op_handle)
-{
-    std::unique_lock<std::mutex> lock(m_mutex);
-    assert(contains(m_calls_count, core_op_handle));
-
-    m_calls_count[core_op_handle]++;
-    for (auto &handle_flag_pair : m_calls_count) {
-        if (0 == handle_flag_pair.second) {
-            return HAILO_SUCCESS;
-        }
-    }
-
-    for (auto &handle_flag_pair : m_calls_count) {
-        handle_flag_pair.second--;
-    }
-
-    return m_callback();
-}
-
-hailo_status PipelineMultiplexer::register_run_once_for_stream(const std::string &stream_name, run_once_for_stream_handle_t handle,
-    std::function<hailo_status()> callback)
-{
-    std::unique_lock<std::mutex> lock(m_register_run_once_mutex);
-    if (!contains(m_run_once_db[stream_name], handle)) {
-        m_run_once_db[stream_name][handle] = make_shared_nothrow<RunOnceForStream>();
-        CHECK(nullptr != m_run_once_db[stream_name][handle], HAILO_OUT_OF_HOST_MEMORY);
-
-        m_run_once_db[stream_name][handle]->set_callback(callback);
-    }
-
-    m_run_once_db[stream_name][handle]->add_instance();
-
-    return HAILO_SUCCESS;
-}
-
-hailo_status PipelineMultiplexer::run_once_for_stream(const std::string &stream_name, run_once_for_stream_handle_t run_once_handle,
-    multiplexer_core_op_handle_t core_op_handle)
-{
-    return m_run_once_db[stream_name][run_once_handle]->run(core_op_handle);
-}
-
-void PipelineMultiplexer::set_can_output_vstream_read(multiplexer_core_op_handle_t core_op_handle, const std::string &vstream_name, bool can_read)
-{
-    {
-        std::unique_lock<std::mutex> lock(m_writing_mutex);
-        assert(contains(m_can_output_vstream_read, core_op_handle));
-        assert(contains(m_can_output_vstream_read[core_op_handle], vstream_name));
-        assert(contains(m_can_core_op_read, core_op_handle));
-
-        m_can_output_vstream_read[core_op_handle][vstream_name] = can_read;
-
-        if (can_read != m_can_core_op_read[core_op_handle]) {
-            m_can_core_op_read[core_op_handle] = true;
-            for (const auto &name_bool_pair :  m_can_output_vstream_read[core_op_handle]) {
-                if (!name_bool_pair.second) {
-                    m_can_core_op_read[core_op_handle] = false;
-                    break;
-                }
-            }
-        }
-    }
-    m_writing_cv.notify_all();
-}
-
-} /* namespace hailort */
diff --git a/hailort/libhailort/src/vdevice/pipeline_multiplexer.hpp b/hailort/libhailort/src/vdevice/pipeline_multiplexer.hpp
deleted file mode 100644
index e9223aa..0000000
--- a/hailort/libhailort/src/vdevice/pipeline_multiplexer.hpp
+++ /dev/null
@@ -1,121 +0,0 @@
-/**
- * Copyright (c) 2022 Hailo Technologies Ltd. All rights reserved.
- * Distributed under the MIT license (https://opensource.org/licenses/MIT)
- **/
-/**
- * @file pipeline_multiplexer.hpp
- * @brief The pipeline multiplexer is a synchronization mechanism that allows communication
- *        between different pipelines that use the same low-level streams.
- **/
-
-#ifndef _HAILO_PIPELINE_MULTIPLEXER_HPP_
-#define _HAILO_PIPELINE_MULTIPLEXER_HPP_
-
-#include "hailo/event.hpp"
-
-#include "common/barrier.hpp"
-
-#include "vdevice/scheduler/scheduler.hpp"
-
-#include <mutex>
-#include <queue>
-
-
-namespace hailort
-{
-
-#define DISABLE_MULTIPLEXER_ENV_VAR "HAILO_DISABLE_MULTIPLEXER_INTERNAL"
-
-using multiplexer_core_op_handle_t = uint32_t;
-using run_once_for_stream_handle_t = uint32_t;
-
-class PipelineMultiplexer
-{
-public:
-    PipelineMultiplexer();
-
-    virtual ~PipelineMultiplexer() = default;
-    PipelineMultiplexer(const PipelineMultiplexer &other) = delete;
-    PipelineMultiplexer &operator=(const PipelineMultiplexer &other) = delete;
-    PipelineMultiplexer &operator=(PipelineMultiplexer &&other) = delete;
-    PipelineMultiplexer(PipelineMultiplexer &&other) = delete;
-
-    hailo_status add_core_op_instance(multiplexer_core_op_handle_t core_op_handle, CoreOp &core_op);
-    void set_output_vstreams_names(multiplexer_core_op_handle_t core_op_handle, const std::vector<OutputVStream> &output_vstreams);
-    bool has_more_than_one_core_op_instance() const;
-    size_t instances_count() const;
-    hailo_status wait_for_write(multiplexer_core_op_handle_t core_op_handle);
-    hailo_status signal_write_finish(multiplexer_core_op_handle_t core_op_handle, bool did_write_fail);
-    Expected<uint32_t> wait_for_read(multiplexer_core_op_handle_t core_op_handle, const std::string &stream_name,
-        const std::chrono::milliseconds &timeout);
-    hailo_status signal_read_finish();
-    hailo_status enable_stream(multiplexer_core_op_handle_t core_op_handle, const std::string &stream_name);
-    hailo_status disable_stream(multiplexer_core_op_handle_t core_op_handle, const std::string &stream_name);
-
-    hailo_status register_run_once_for_stream(const std::string &stream_name, run_once_for_stream_handle_t handle, std::function<hailo_status()> callback);
-    hailo_status run_once_for_stream(const std::string &stream_name, run_once_for_stream_handle_t run_once_handle,
-        multiplexer_core_op_handle_t core_op_handle);
-
-    void set_can_output_vstream_read(multiplexer_core_op_handle_t core_op_handle, const std::string &vstream_name, bool can_read);
-
-    static bool is_multiplexer_supported();
-
-private:
-
-    bool should_core_op_stop(multiplexer_core_op_handle_t core_op_handle);
-
-    std::unordered_map<scheduler_core_op_handle_t, std::unordered_map<stream_name_t, std::atomic_bool>> m_should_core_op_stop;
-    std::unordered_map<multiplexer_core_op_handle_t, std::atomic_bool> m_is_waiting_to_write;
-
-    uint32_t m_input_streams_count;
-    uint32_t m_output_streams_count;
-
-    multiplexer_core_op_handle_t m_next_to_write;
-    std::unordered_map<multiplexer_core_op_handle_t, std::shared_ptr<Barrier>> m_write_barriers;
-    std::deque<multiplexer_core_op_handle_t> m_order_queue;
-    std::mutex m_writing_mutex;
-    std::condition_variable m_writing_cv;
-    multiplexer_core_op_handle_t m_currently_writing;
-    std::atomic_uint32_t m_written_streams_count;
-
-    std::unordered_map<std::string, std::atomic_bool> m_is_stream_reading;
-    std::mutex m_reading_mutex;
-    std::condition_variable m_reading_cv;
-    std::atomic_uint32_t m_read_streams_count;
-    std::unordered_map<std::string, std::atomic_uint32_t> m_num_frames_to_drain;
-    multiplexer_core_op_handle_t m_next_to_read_after_drain;
-
-    std::unordered_map<multiplexer_core_op_handle_t, std::unordered_map<std::string, std::atomic_bool>> m_can_output_vstream_read;
-    std::unordered_map<multiplexer_core_op_handle_t, std::atomic_bool> m_can_core_op_read;
-
-    bool can_core_op_read(multiplexer_core_op_handle_t core_op_handle);
-    uint32_t get_frame_count_to_drain(multiplexer_core_op_handle_t core_op_handle);
-    uint32_t drain_aborted_in_order_queue(multiplexer_core_op_handle_t core_op_handle, const std::string &stream_name, uint32_t max_drain_count);
-
-    class RunOnceForStream final
-    {
-    public:
-        RunOnceForStream() {};
-
-    private:
-        void add_instance();
-        void set_callback(std::function<hailo_status()> callback);
-        hailo_status run(multiplexer_core_op_handle_t core_op_handle);
-
-        std::unordered_map<multiplexer_core_op_handle_t, std::atomic_uint32_t> m_calls_count;
-        std::function<hailo_status()> m_callback;
-        std::mutex m_mutex;
-
-        friend class PipelineMultiplexer;
-    };
-
-    // The run once map stores for each stream (by name), a map of RunOnceForStream which the user can register to.
-    // run_once_for_stream_handle_t is the handle which the user can access to his specific callback (for example, abort stream function).
-    // This is used for flushing, aborting and clear aborting streams.
-    std::unordered_map<std::string, std::unordered_map<run_once_for_stream_handle_t, std::shared_ptr<RunOnceForStream>>> m_run_once_db;
-    std::mutex m_register_run_once_mutex;
-};
-
-} /* namespace hailort */
-
-#endif /* _HAILO_PIPELINE_MULTIPLEXER_HPP_ */
diff --git a/hailort/libhailort/src/vdevice/scheduler/infer_request_accumulator.cpp b/hailort/libhailort/src/vdevice/scheduler/infer_request_accumulator.cpp
new file mode 100644
index 0000000..a2d0eaa
--- /dev/null
+++ b/hailort/libhailort/src/vdevice/scheduler/infer_request_accumulator.cpp
@@ -0,0 +1,99 @@
+/**
+ * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+**/
+/**
+ * @file infer_request_accumulator.cpp
+ **/
+
+#include "infer_request_accumulator.hpp"
+
+namespace hailort
+{
+
+InferRequestAccumulator::InferRequestAccumulator(size_t streams_count, size_t max_queue_size,
+    std::function<void(InferRequest&&)> frame_accumulated) :
+        m_streams_count(streams_count),
+        m_max_queue_size(max_queue_size),
+        m_frame_accumulated(frame_accumulated),
+        m_shutdown(false),
+        m_ongoing_infer_requests(0)
+{}
+
+hailo_status InferRequestAccumulator::add_transfer_request(const std::string &stream_name, TransferRequest &&request)
+{
+    std::lock_guard<std::mutex> lock(m_mutex);
+
+    if (m_shutdown) {
+        return HAILO_STREAM_NOT_ACTIVATED;
+    }
+
+    // Insert the transfer to next available infer request
+    auto infer_request = get_infer_request(stream_name);
+    if (!infer_request) {
+        return infer_request.status();
+    }
+    infer_request->get().emplace(stream_name, std::move(request));
+
+    // If first infer request was finished, call m_frame_accumulated on it
+    if (m_partial_infer_requests.front().size() == m_streams_count) {
+
+        m_ongoing_infer_requests++;
+        m_frame_accumulated(InferRequest{
+            std::move(m_partial_infer_requests.front()),
+            [this](hailo_status) {
+                {
+                    std::lock_guard<std::mutex> lock(m_mutex);
+                    m_ongoing_infer_requests--;
+                }
+                m_cv.notify_all();
+            }
+        });
+        m_partial_infer_requests.pop_front();
+    }
+
+    return HAILO_SUCCESS;
+}
+
+hailo_status InferRequestAccumulator::shutdown(std::chrono::milliseconds timeout)
+{
+    std::unique_lock<std::mutex> lock(m_mutex);
+
+    assert(!m_shutdown);
+    m_shutdown = true;
+
+    // Wait until m_ongoing_infer_requests==0
+    auto done = m_cv.wait_for(lock, timeout, [this]() { return m_ongoing_infer_requests == 0; });
+    CHECK(done, HAILO_TIMEOUT, "Failed shutdown, ongoing infer requests - {}", m_ongoing_infer_requests);
+
+    // Now cancel all partial request
+    for (auto &partial_request : m_partial_infer_requests) {
+        for (auto &stream_transfer_request : partial_request) {
+            stream_transfer_request.second.callback(HAILO_STREAM_ABORTED_BY_USER);
+        }
+    }
+    m_partial_infer_requests.clear();
+
+    return HAILO_SUCCESS;
+}
+
+ExpectedRef<InferRequestAccumulator::PartialInferRequest> InferRequestAccumulator::get_infer_request(
+    const std::string &stream_name)
+{
+    // Try find infer request that doesn't contain transfer for stream name.
+    for (auto &partial_infer_request : m_partial_infer_requests) {
+        if (!contains(partial_infer_request, stream_name)) {
+            return std::ref(partial_infer_request);
+        }
+    }
+
+    // Create new infer request (only if there is place in the queue)
+    if (m_partial_infer_requests.size() >= m_max_queue_size) {
+        return make_unexpected(HAILO_QUEUE_IS_FULL);
+    }
+
+    m_partial_infer_requests.emplace_back();
+    return std::ref(m_partial_infer_requests.back());
+}
+
+} /* namespace hailort */
diff --git a/hailort/libhailort/src/vdevice/scheduler/infer_request_accumulator.hpp b/hailort/libhailort/src/vdevice/scheduler/infer_request_accumulator.hpp
new file mode 100644
index 0000000..5efe857
--- /dev/null
+++ b/hailort/libhailort/src/vdevice/scheduler/infer_request_accumulator.hpp
@@ -0,0 +1,61 @@
+/**
+ * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+**/
+/**
+ * @file infer_request_accumulator.hpp
+ * @brief Class that accept frame request from all streams inside some core op, and accumulate them into a single
+ *        infer request.
+ **/
+
+#ifndef _HAILO_INFER_REQUEST_ACCUMULATOR_HPP_
+#define _HAILO_INFER_REQUEST_ACCUMULATOR_HPP_
+
+#include "stream_common/transfer_common.hpp"
+
+#include <mutex>
+#include <condition_variable>
+#include <list>
+
+namespace hailort
+{
+
+class InferRequestAccumulator final {
+public:
+    InferRequestAccumulator(size_t streams_count, size_t max_queue_size,
+        std::function<void(InferRequest&&)> frame_accumulated);
+
+    hailo_status add_transfer_request(const std::string &stream_name, TransferRequest &&request);
+
+    // All new add_transfer_request call will fail. Waits until all accumulated infer requests are done, cancel all
+    // partial requests.
+    hailo_status shutdown(std::chrono::milliseconds timeout);
+
+    size_t queue_size() const { return m_max_queue_size; }
+
+private:
+
+    using PartialInferRequest = std::unordered_map<std::string, TransferRequest>;
+
+    // Find an infer request that can contain transfer request for the given stream name.
+    ExpectedRef<PartialInferRequest> get_infer_request(const std::string &stream_name);
+
+    const size_t m_streams_count;
+    const size_t m_max_queue_size;
+    std::function<void(InferRequest)> m_frame_accumulated;
+    bool m_shutdown;
+
+    // Increasing this counter when we frame_accumulated is called, and decrease it in the callback.
+    size_t m_ongoing_infer_requests;
+
+    std::mutex m_mutex;
+    std::condition_variable m_cv;
+
+    // A partial infer request contains TransferRequest from subset of the core op streams.
+    // When a partial infer request is completed (all streams are filled), the m_frame_accumulated is called.
+    std::list<PartialInferRequest> m_partial_infer_requests;
+};
+
+} /* namespace hailort */
+
+#endif /* _HAILO_INFER_REQUEST_ACCUMULATOR_HPP_ */
diff --git a/hailort/libhailort/src/vdevice/scheduler/scheduled_core_op_state.cpp b/hailort/libhailort/src/vdevice/scheduler/scheduled_core_op_state.cpp
index 4fc0fcb..66a35ee 100644
--- a/hailort/libhailort/src/vdevice/scheduler/scheduled_core_op_state.cpp
+++ b/hailort/libhailort/src/vdevice/scheduler/scheduled_core_op_state.cpp
@@ -7,53 +7,46 @@
  * @brief: Scheduled CoreOp
  **/
 
-#include "vdevice/vdevice_stream_multiplexer_wrapper.hpp"
 #include "vdevice/scheduler/scheduler_oracle.hpp"
 #include "vdevice/scheduler/scheduled_core_op_state.hpp"
-#include "hef/hef_internal.hpp"
+#include "vdevice/vdevice_core_op.hpp"
 
 
 namespace hailort
 {
 
-ScheduledCoreOp::ScheduledCoreOp(std::shared_ptr<CoreOp> core_op, std::chrono::milliseconds timeout,
-    uint16_t max_batch_size, bool use_dynamic_batch_flow, StreamInfoVector &stream_infos) :
+ScheduledCoreOp::ScheduledCoreOp(std::shared_ptr<VDeviceCoreOp> core_op, std::chrono::milliseconds timeout,
+    uint16_t max_batch_size,  uint32_t max_ongoing_frames_per_device, bool use_dynamic_batch_flow) :
     m_core_op(core_op),
     m_last_run_time_stamp(std::chrono::steady_clock::now()),
     m_timeout(std::move(timeout)),
-    m_frame_was_sent(false),
     m_max_batch_size(max_batch_size),
+    m_max_ongoing_frames_per_device(max_ongoing_frames_per_device),
     m_use_dynamic_batch_flow(use_dynamic_batch_flow),
+    m_instances_count(1),
+    m_requested_infer_requests(0),
+    m_min_threshold(DEFAULT_SCHEDULER_MIN_THRESHOLD),
     m_priority(HAILO_SCHEDULER_PRIORITY_NORMAL),
-    m_last_device_id(INVALID_DEVICE_ID),
-    m_inputs_names(),
-    m_outputs_names()
-{
-    // Prepare empty counters for the added core-op
-    for (const auto &stream_info : stream_infos) {
-        m_min_threshold_per_stream[stream_info.name] = DEFAULT_SCHEDULER_MIN_THRESHOLD;
-        m_is_stream_enabled[stream_info.name] = true;
-        m_pending_frames.insert(stream_info.name);
-        if (HAILO_H2D_STREAM == stream_info.direction) {
-            m_inputs_names.push_back(stream_info.name);
-        } else {
-            m_outputs_names.push_back(stream_info.name);
-        }
-    }
-}
+    m_last_device_id(INVALID_DEVICE_ID)
+{}
 
-Expected<std::shared_ptr<ScheduledCoreOp>> ScheduledCoreOp::create(std::shared_ptr<CoreOp> added_core_op, StreamInfoVector &stream_infos)
+Expected<std::shared_ptr<ScheduledCoreOp>> ScheduledCoreOp::create(std::shared_ptr<VDeviceCoreOp> added_core_op,
+    StreamInfoVector &stream_infos)
 {
     auto timeout = DEFAULT_SCHEDULER_TIMEOUT;
 
     auto batch_size_expected = added_core_op->get_stream_batch_size(stream_infos[0].name);
     CHECK_EXPECTED(batch_size_expected);
-    auto max_batch_size = batch_size_expected.release();
+    const auto max_batch_size = batch_size_expected.release();
+
+    auto max_queue_size_per_device_expected = added_core_op->get_async_max_queue_size_per_device();
+    CHECK_EXPECTED(max_queue_size_per_device_expected);
+    const auto max_queue_size_per_device = max_queue_size_per_device_expected.release();
 
     // DEFAULT_BATCH_SIZE and SINGLE_CONTEXT_BATCH_SIZE support streaming and therfore we are not using dynamic batch flow
     auto use_dynamic_batch_flow = added_core_op->get_supported_features().multi_context && (max_batch_size > SINGLE_CONTEXT_BATCH_SIZE);
-    auto res = make_shared_nothrow<ScheduledCoreOp>(added_core_op, timeout, max_batch_size, use_dynamic_batch_flow,
-        stream_infos);
+    auto res = make_shared_nothrow<ScheduledCoreOp>(added_core_op, timeout, max_batch_size,
+        static_cast<uint32_t>(max_queue_size_per_device), use_dynamic_batch_flow);
     CHECK_NOT_NULL_AS_EXPECTED(res, HAILO_OUT_OF_HOST_MEMORY);
 
     return res;
@@ -61,33 +54,7 @@ Expected<std::shared_ptr<ScheduledCoreOp>> ScheduledCoreOp::create(std::shared_p
 
 uint32_t ScheduledCoreOp::get_max_ongoing_frames_per_device() const
 {
-    return std::min(get_min_input_buffers_count(), get_min_output_buffers_count());
-}
-
-uint16_t ScheduledCoreOp::get_min_input_buffers_count() const
-{
-    auto input_streams = m_core_op->get_input_streams();
-    uint16_t buffers_count = UINT16_MAX;
-    for (auto &input_stream : input_streams) {
-        InputStreamBase &vdevice_input = static_cast<InputStreamBase&>(input_stream.get());
-        if (auto pending_frames_size = vdevice_input.get_buffer_frames_size()) {
-            buffers_count = std::min(buffers_count, static_cast<uint16_t>(pending_frames_size.value()));
-        }
-    }
-    return buffers_count;
-}
-
-uint16_t ScheduledCoreOp::get_min_output_buffers_count() const
-{
-    auto output_streams = m_core_op->get_output_streams();
-    uint16_t buffers_count = UINT16_MAX;
-    for (auto &output_stream : output_streams) {
-        OutputStreamBase &vdevice_input = static_cast<OutputStreamBase&>(output_stream.get());
-        if (auto pending_frames_size = vdevice_input.get_buffer_frames_size()) {
-            buffers_count = std::min(buffers_count, static_cast<uint16_t>(pending_frames_size.value()));
-        }
-    }
-    return buffers_count;
+    return m_max_ongoing_frames_per_device;
 }
 
 bool ScheduledCoreOp::use_dynamic_batch_flow() const
@@ -95,34 +62,20 @@ bool ScheduledCoreOp::use_dynamic_batch_flow() const
     return m_use_dynamic_batch_flow;
 }
 
-hailo_status ScheduledCoreOp::set_timeout(const std::chrono::milliseconds &timeout, const stream_name_t &stream_name)
+hailo_status ScheduledCoreOp::set_timeout(const std::chrono::milliseconds &timeout)
 {
-    CHECK(!m_frame_was_sent, HAILO_INVALID_OPERATION,
-        "Setting scheduler timeout is allowed only before sending / receiving frames on the core-op.");
     m_timeout = timeout;
-
-    auto name = (stream_name.empty()) ? m_core_op->name() : stream_name;
-    LOGGER__INFO("Setting scheduler timeout of {} to {}ms", name, timeout.count());
-
+    LOGGER__INFO("Setting scheduler timeout of {} to {}ms", m_core_op->name(), timeout.count());
     return HAILO_SUCCESS;
 }
 
-hailo_status ScheduledCoreOp::set_threshold(uint32_t threshold, const stream_name_t &stream_name)
+hailo_status ScheduledCoreOp::set_threshold(uint32_t threshold)
 {
     CHECK(!use_dynamic_batch_flow() ||
         (threshold <= m_max_batch_size), HAILO_INVALID_ARGUMENT, "Threshold must be equal or lower than the maximum batch size!");
 
-    CHECK(!m_frame_was_sent, HAILO_INVALID_OPERATION,
-        "Setting scheduler threshold is allowed only before sending / receiving frames on the core-op.");
-
-    // TODO: Support setting threshold per stream. currently stream_name is always empty and de-facto we set threshold for the whole NG
-    for (auto &threshold_per_stream_pair : m_min_threshold_per_stream) {
-        threshold_per_stream_pair.second = threshold;
-    }
-
-    auto name = (stream_name.empty()) ? m_core_op->name() : stream_name;
-    LOGGER__INFO("Setting scheduler threshold of {} to {} frames", name, threshold);
-
+    m_min_threshold = threshold;
+    LOGGER__INFO("Setting scheduler threshold of {} to {} frames", m_core_op->name(), threshold);
     return HAILO_SUCCESS;
 }
 
@@ -136,6 +89,16 @@ void ScheduledCoreOp::set_priority(core_op_priority_t priority)
     m_priority = priority;
 }
 
+bool ScheduledCoreOp::is_over_threshold() const
+{
+    return m_requested_infer_requests.load() >= m_min_threshold;
+}
+
+bool ScheduledCoreOp::is_over_timeout() const
+{
+    return m_timeout <= (std::chrono::steady_clock::now() - m_last_run_time_stamp);
+}
+
 device_id_t ScheduledCoreOp::get_last_device()
 {
     return m_last_device_id;
@@ -151,9 +114,11 @@ std::shared_ptr<CoreOp> ScheduledCoreOp::get_core_op()
     return m_core_op;
 }
 
-void ScheduledCoreOp::mark_frame_sent()
+std::shared_ptr<VdmaConfigCoreOp> ScheduledCoreOp::get_vdma_core_op(const device_id_t &device_id)
 {
-    m_frame_was_sent = true;
+    auto vdma_core_op = m_core_op->get_core_op_by_device_id(device_id);
+    assert(vdma_core_op);
+    return vdma_core_op.release();
 }
 
 std::chrono::time_point<std::chrono::steady_clock> ScheduledCoreOp::get_last_run_timestamp()
@@ -166,17 +131,14 @@ void ScheduledCoreOp::set_last_run_timestamp(const std::chrono::time_point<std::
     m_last_run_time_stamp = timestamp;
 }
 
-Expected<std::chrono::milliseconds> ScheduledCoreOp::get_timeout(const stream_name_t &stream_name)
+std::chrono::milliseconds ScheduledCoreOp::get_timeout()
 {
-    CHECK_AS_EXPECTED(stream_name.empty(), HAILO_INVALID_OPERATION, "timeout per network is not supported");
-    auto timeout = m_timeout;
-    return timeout;
+    return m_timeout;
 }
 
-Expected<uint32_t> ScheduledCoreOp::get_threshold(const stream_name_t &stream_name)
+uint32_t ScheduledCoreOp::get_threshold()
 {
-    CHECK_AS_EXPECTED(contains(m_min_threshold_per_stream, stream_name), HAILO_NOT_FOUND);
-    return m_min_threshold_per_stream[stream_name].load();
+    return (m_min_threshold != DEFAULT_SCHEDULER_MIN_THRESHOLD) ? m_min_threshold : 1;
 }
 
 uint16_t ScheduledCoreOp::get_max_batch_size() const
@@ -187,60 +149,28 @@ uint16_t ScheduledCoreOp::get_max_batch_size() const
 uint16_t ScheduledCoreOp::get_burst_size() const
 {
     // When the user don't explicitly pass batch size, in order to preserve performance from previous scheduler version,
-    // we don't want to stop streaming until we transferred at least get_min_output_buffers_count() frames (This was
+    // we don't want to stop streaming until we transferred at least m_max_ongoing_frames_per_device frames (This was
     // the behaviour in previous scheduler versions).
-    return m_core_op->is_default_batch_size() ? get_min_output_buffers_count() : get_max_batch_size();
-}
-
-SchedulerCounter &ScheduledCoreOp::pending_frames()
-{
-    return m_pending_frames;
-}
-
-uint32_t ScheduledCoreOp::get_min_input_pending_frames() const
-{
-    uint32_t min_count = std::numeric_limits<uint32_t>::max();
-    for (const auto &input_name : m_inputs_names) {
-        min_count = std::min(min_count, m_pending_frames[input_name]);
-    }
-    return min_count;
-}
-
-bool ScheduledCoreOp::is_stream_enabled(const stream_name_t &stream_name) const
-{
-    return m_is_stream_enabled.at(stream_name);
+    return m_core_op->is_default_batch_size() ?
+        static_cast<uint16_t>(m_max_ongoing_frames_per_device) :
+        get_max_batch_size();
 }
 
-void ScheduledCoreOp::enable_stream(const stream_name_t &stream_name)
+void ScheduledCoreOp::add_instance()
 {
-    m_is_stream_enabled.at(stream_name) = true;
+    m_instances_count++;
 }
 
-void ScheduledCoreOp::disable_stream(const stream_name_t &stream_name)
+void ScheduledCoreOp::remove_instance()
 {
-    m_is_stream_enabled.at(stream_name) = false;
+    assert(m_instances_count > 0);
+    m_instances_count--;
 }
 
-bool ScheduledCoreOp::any_stream_disabled() const
+size_t ScheduledCoreOp::instances_count() const
 {
-    auto is_disabled = [](const std::pair<const stream_name_t, std::atomic_bool> &is_enabled) { return !is_enabled.second; };
-    return std::any_of(m_is_stream_enabled.begin(), m_is_stream_enabled.end(), is_disabled);
+    return m_instances_count;
 }
 
-bool ScheduledCoreOp::all_stream_disabled() const
-{
-    auto is_disabled = [](const std::pair<const stream_name_t, std::atomic_bool> &is_enabled) { return !is_enabled.second; };
-    return std::all_of(m_is_stream_enabled.begin(), m_is_stream_enabled.end(), is_disabled);
-}
-
-const std::vector<stream_name_t> &ScheduledCoreOp::get_inputs_names()
-{
-    return m_inputs_names;
-}
-
-const std::vector<stream_name_t> &ScheduledCoreOp::get_outputs_names()
-{
-    return m_outputs_names;
-}
 
 } /* namespace hailort */
diff --git a/hailort/libhailort/src/vdevice/scheduler/scheduled_core_op_state.hpp b/hailort/libhailort/src/vdevice/scheduler/scheduled_core_op_state.hpp
index d68a97a..e11272f 100644
--- a/hailort/libhailort/src/vdevice/scheduler/scheduled_core_op_state.hpp
+++ b/hailort/libhailort/src/vdevice/scheduler/scheduled_core_op_state.hpp
@@ -18,8 +18,6 @@
 
 #include "core_op/core_op.hpp"
 
-#include "vdevice/scheduler/scheduler_counter.hpp"
-
 #include <condition_variable>
 #include <queue>
 
@@ -27,19 +25,19 @@
 namespace hailort
 {
 
-#define DEFAULT_SCHEDULER_TIMEOUT (std::chrono::milliseconds(0))
-#define DEFAULT_SCHEDULER_MIN_THRESHOLD (0)
 constexpr const char *INVALID_DEVICE_ID = "";
 
 using core_op_priority_t = uint8_t;
 
 constexpr const uint16_t SINGLE_CONTEXT_BATCH_SIZE = 1;
 
+class VDeviceCoreOp;
+
 
 class ScheduledCoreOp
 {
 public:
-    static Expected<std::shared_ptr<ScheduledCoreOp>> create(std::shared_ptr<CoreOp> added_core_op,
+    static Expected<std::shared_ptr<ScheduledCoreOp>> create(std::shared_ptr<VDeviceCoreOp> added_core_op,
         StreamInfoVector &stream_infos);
 
     virtual ~ScheduledCoreOp()  = default;
@@ -49,13 +47,10 @@ public:
     ScheduledCoreOp(ScheduledCoreOp &&other) noexcept = delete;
 
     std::shared_ptr<CoreOp> get_core_op();
-    const std::vector<stream_name_t> &get_outputs_names();
-    const std::vector<stream_name_t> &get_inputs_names();
 
-    uint32_t get_max_ongoing_frames_per_device() const;
+    std::shared_ptr<VdmaConfigCoreOp> get_vdma_core_op(const device_id_t &device_id);
 
-    uint16_t get_min_input_buffers_count() const;
-    uint16_t get_min_output_buffers_count() const;
+    uint32_t get_max_ongoing_frames_per_device() const;
 
     uint16_t get_max_batch_size() const;
     uint16_t get_burst_size() const;
@@ -64,50 +59,44 @@ public:
     device_id_t get_last_device();
     void set_last_device(const device_id_t &device_id);
 
-    Expected<std::chrono::milliseconds> get_timeout(const stream_name_t &stream_name = "");
-    hailo_status set_timeout(const std::chrono::milliseconds &timeout, const stream_name_t &stream_name = "");
-    Expected<uint32_t> get_threshold(const stream_name_t &stream_name);
-    hailo_status set_threshold(uint32_t threshold, const stream_name_t &stream_name = "");
+    std::chrono::milliseconds get_timeout();
+    hailo_status set_timeout(const std::chrono::milliseconds &timeout);
+    uint32_t get_threshold();
+    hailo_status set_threshold(uint32_t threshold);
     core_op_priority_t get_priority();
     void set_priority(core_op_priority_t priority);
 
+    bool is_over_threshold() const;
+    bool is_over_timeout() const;
+
     std::chrono::time_point<std::chrono::steady_clock> get_last_run_timestamp();
     void set_last_run_timestamp(const std::chrono::time_point<std::chrono::steady_clock> &timestamp);
 
-    void mark_frame_sent();
-
-    SchedulerCounter &pending_frames();
-    uint32_t get_min_input_pending_frames() const;
+    std::atomic_uint32_t &requested_infer_requests() { return m_requested_infer_requests; }
 
-    bool is_stream_enabled(const stream_name_t &stream_name) const;
-    void enable_stream(const stream_name_t &stream_name);
-    void disable_stream(const stream_name_t &stream_name);
-    bool any_stream_disabled() const;
-    bool all_stream_disabled() const;
+    void add_instance();
+    void remove_instance();
+    size_t instances_count() const;
 
-    ScheduledCoreOp(std::shared_ptr<CoreOp> core_op, std::chrono::milliseconds timeout,
-        uint16_t max_batch_size, bool use_dynamic_batch_flow, StreamInfoVector &stream_infos);
+    ScheduledCoreOp(std::shared_ptr<VDeviceCoreOp> core_op, std::chrono::milliseconds timeout,
+        uint16_t max_batch_size,  uint32_t max_ongoing_frames_per_device, bool use_dynamic_batch_flow);
 
 private:
-    std::shared_ptr<CoreOp> m_core_op;
+    std::shared_ptr<VDeviceCoreOp> m_core_op;
     std::chrono::time_point<std::chrono::steady_clock> m_last_run_time_stamp;
     std::chrono::milliseconds m_timeout;
-    std::atomic_bool m_frame_was_sent;
-    uint16_t m_max_batch_size;
-    bool m_use_dynamic_batch_flow;
+    const uint16_t m_max_batch_size;
+    const uint32_t m_max_ongoing_frames_per_device;
+    const bool m_use_dynamic_batch_flow;
+    size_t m_instances_count;
 
-    // For each stream, amount of frames pending (for launch_transfer call)
-    SchedulerCounter m_pending_frames;
+    std::atomic_uint32_t m_requested_infer_requests;
 
-    std::unordered_map<stream_name_t, std::atomic_uint32_t> m_min_threshold_per_stream;
-    std::unordered_map<stream_name_t, std::atomic_bool> m_is_stream_enabled;
+    uint32_t m_min_threshold;
 
     core_op_priority_t m_priority;
 
     device_id_t m_last_device_id;
-
-    std::vector<stream_name_t> m_inputs_names;
-    std::vector<stream_name_t> m_outputs_names;
 };
 
 
diff --git a/hailort/libhailort/src/vdevice/scheduler/scheduled_stream.cpp b/hailort/libhailort/src/vdevice/scheduler/scheduled_stream.cpp
index e522cc5..a745f9d 100644
--- a/hailort/libhailort/src/vdevice/scheduler/scheduled_stream.cpp
+++ b/hailort/libhailort/src/vdevice/scheduler/scheduled_stream.cpp
@@ -24,13 +24,9 @@ Expected<std::unique_ptr<ScheduledInputStream>> ScheduledInputStream::create(
     std::map<device_id_t, std::reference_wrapper<InputStreamBase>> &&streams,
     const LayerInfo &layer_info,
     const scheduler_core_op_handle_t &core_op_handle,
-    CoreOpsSchedulerWeakPtr core_ops_scheduler,
-    EventPtr core_op_activated_event)
+    EventPtr core_op_activated_event,
+    std::shared_ptr<InferRequestAccumulator> infer_requests_accumulator)
 {
-    auto max_queue_size_per_stream = streams.begin()->second.get().get_buffer_frames_size();
-    CHECK_EXPECTED(max_queue_size_per_stream);
-    const auto max_queue_size = max_queue_size_per_stream.value() * streams.size();
-
     // In all cases, the buffer mode of the low level streams is always NOT_OWNING (the buffer is owned either by
     // ScheduledInputStream or by the user)
     for (auto &stream : streams) {
@@ -39,47 +35,14 @@ Expected<std::unique_ptr<ScheduledInputStream>> ScheduledInputStream::create(
     }
 
     auto status = HAILO_UNINITIALIZED;
-    auto local_vdevice_stream = make_unique_nothrow<ScheduledInputStream>(std::move(streams),
-        core_op_handle, std::move(core_op_activated_event), layer_info,
-        core_ops_scheduler, max_queue_size, status);
+    auto local_vdevice_stream = make_unique_nothrow<ScheduledInputStream>(std::move(streams), core_op_handle,
+        std::move(core_op_activated_event), layer_info, std::move(infer_requests_accumulator), status);
     CHECK_NOT_NULL_AS_EXPECTED(local_vdevice_stream, HAILO_OUT_OF_HOST_MEMORY);
     CHECK_SUCCESS_AS_EXPECTED(status);
 
     return local_vdevice_stream;
 }
 
-hailo_status ScheduledInputStream::launch_transfer(const device_id_t &device_id)
-{
-    auto core_ops_scheduler = m_core_ops_scheduler.lock();
-    CHECK(core_ops_scheduler, HAILO_INTERNAL_FAILURE, "core_op_scheduler was destructed");
-
-    auto pending_buffer = m_transfer_requests.dequeue();
-    CHECK_EXPECTED_AS_STATUS(pending_buffer);
-
-    auto reorder_queue_callback = m_callback_reorder_queue.wrap_callback(pending_buffer->callback);
-    pending_buffer->callback = reorder_queue_callback;
-
-    // Wrap callback with scheduler signal read finish.
-    pending_buffer->callback = [this, device_id, callback=reorder_queue_callback](hailo_status status) {
-        if (HAILO_SUCCESS == status) {
-            auto scheduler = m_core_ops_scheduler.lock();
-            assert(scheduler);
-            scheduler->signal_frame_transferred(m_core_op_handle, name(), device_id, HAILO_H2D_STREAM);
-        }
-
-        callback(status);
-    };
-
-    assert(contains(m_streams, device_id));
-    auto status = m_streams.at(device_id).get().write_async(pending_buffer.release());
-    if (HAILO_SUCCESS != status) {
-        LOGGER__ERROR("write_async on device {} failed with {}", device_id, status);
-        // The pending_buffer was already registered so we must call the callback to give the error back to the user.
-        reorder_queue_callback(status);
-    }
-    return status;
-}
-
 hailo_stream_interface_t ScheduledInputStream::get_interface() const
 {
     // All interface values of m_streams should be the same
@@ -88,77 +51,42 @@ hailo_stream_interface_t ScheduledInputStream::get_interface() const
 
 Expected<std::unique_ptr<StreamBufferPool>> ScheduledInputStream::allocate_buffer_pool()
 {
-    if (m_streams.size() == 1) {
-        // On single device, we use the stream allocate_buffer_pool for best optimization (The buffer can be circular
-        // dma buffer)
-        auto &async_stream = dynamic_cast<AsyncInputStreamBase&>(m_streams.begin()->second.get());
-        return async_stream.allocate_buffer_pool();
-    } else {
-        auto queued_pool = QueuedStreamBufferPool::create(m_transfer_requests.max_size(), get_frame_size(),
-            BufferStorageParams::create_dma());
-        CHECK_EXPECTED(queued_pool);
-
-        return std::unique_ptr<StreamBufferPool>(queued_pool.release());
-    }
+    auto queued_pool = QueuedStreamBufferPool::create(m_infer_requests_accumulator->queue_size(), get_frame_size(),
+        BufferStorageParams::create_dma());
+    CHECK_EXPECTED(queued_pool);
+    return std::unique_ptr<StreamBufferPool>(queued_pool.release());
 }
 
 size_t ScheduledInputStream::get_max_ongoing_transfers() const
 {
-    return m_transfer_requests.max_size();
+    return m_infer_requests_accumulator->queue_size();
 }
 
 hailo_status ScheduledInputStream::write_async_impl(TransferRequest &&transfer_request)
 {
-    auto core_ops_scheduler = m_core_ops_scheduler.lock();
-    CHECK(core_ops_scheduler, HAILO_INTERNAL_FAILURE, "core_op_scheduler was destructed");
+    TRACE(FrameEnqueueH2DTrace, m_core_op_handle, name());
 
-    auto status = m_transfer_requests.enqueue(std::move(transfer_request));
-    if (HAILO_QUEUE_IS_FULL == status) {
-        return status;
-    }
-    CHECK_SUCCESS(status);
-
-    status = core_ops_scheduler->signal_frame_pending(m_core_op_handle, name(), HAILO_H2D_STREAM);
-    if (HAILO_STREAM_ABORTED_BY_USER == status) {
-        return status;
+    transfer_request.callback = m_callback_reorder_queue.wrap_callback(transfer_request.callback);
+    auto status = m_infer_requests_accumulator->add_transfer_request(name(), std::move(transfer_request));
+    if (HAILO_SUCCESS != status) {
+        m_callback_reorder_queue.cancel_last_callback();
+        if (HAILO_QUEUE_IS_FULL == status) {
+            return status;
+        }
+        CHECK_SUCCESS(status);
     }
-    CHECK_SUCCESS(status);
 
     return HAILO_SUCCESS;
 }
 
-hailo_status ScheduledInputStream::abort()
-{
-    auto core_ops_scheduler = m_core_ops_scheduler.lock();
-    CHECK(core_ops_scheduler, HAILO_INTERNAL_FAILURE, "core_op_scheduler was destructed");
-
-    core_ops_scheduler->disable_stream(m_core_op_handle, name());
-
-    return AsyncInputStreamBase::abort();
-}
-
-hailo_status ScheduledInputStream::clear_abort()
-{
-    auto core_ops_scheduler = m_core_ops_scheduler.lock();
-    CHECK(core_ops_scheduler, HAILO_INTERNAL_FAILURE);
-
-    core_ops_scheduler->enable_stream(m_core_op_handle, name());
-
-    return AsyncInputStreamBase::clear_abort();
-}
-
 /** Output stream **/
 Expected<std::unique_ptr<ScheduledOutputStream>> ScheduledOutputStream::create(
     std::map<device_id_t, std::reference_wrapper<OutputStreamBase>> &&streams,
     const scheduler_core_op_handle_t &core_op_handle,
     const LayerInfo &layer_info,
     EventPtr core_op_activated_event,
-    CoreOpsSchedulerWeakPtr core_ops_scheduler)
+    std::shared_ptr<InferRequestAccumulator> infer_requests_accumulator)
 {
-    auto max_queue_size_per_stream = streams.begin()->second.get().get_buffer_frames_size();
-    CHECK_EXPECTED(max_queue_size_per_stream);
-    const auto max_queue_size = max_queue_size_per_stream.value() * streams.size();
-
     // In all cases, the buffer mode of the low level streams is always NOT_OWNING (the buffer is owned either by
     // ScheduledOutputStream or by the user)
     for (auto &stream : streams) {
@@ -169,50 +97,13 @@ Expected<std::unique_ptr<ScheduledOutputStream>> ScheduledOutputStream::create(
 
     auto status = HAILO_UNINITIALIZED;
     auto stream = make_unique_nothrow<ScheduledOutputStream>(std::move(streams), core_op_handle,
-        layer_info, std::move(core_op_activated_event), core_ops_scheduler, max_queue_size, status);
+        layer_info, std::move(core_op_activated_event), std::move(infer_requests_accumulator), status);
     CHECK_NOT_NULL_AS_EXPECTED(stream, HAILO_OUT_OF_HOST_MEMORY);
     CHECK_SUCCESS_AS_EXPECTED(status);
 
     return stream;
 }
 
-hailo_status ScheduledOutputStream::launch_transfer(const device_id_t &device_id)
-{
-    auto core_ops_scheduler = m_core_ops_scheduler.lock();
-    CHECK(core_ops_scheduler, HAILO_INTERNAL_FAILURE, "core_op_scheduler was destructed");
-
-    auto pending_buffer = m_transfer_requests.dequeue();
-    CHECK_EXPECTED_AS_STATUS(pending_buffer);
-
-    // Wrap callback with reorder queue.
-    auto reorder_queue_callback = m_callback_reorder_queue.wrap_callback(pending_buffer->callback);
-
-    // Wrap callback with scheduler signal read finish.
-    pending_buffer->callback = [this, device_id, callback=reorder_queue_callback](hailo_status status) {
-        if (HAILO_SUCCESS == status) {
-            auto scheduler = m_core_ops_scheduler.lock();
-            assert(scheduler);
-            scheduler->signal_frame_transferred(m_core_op_handle, name(), device_id, HAILO_D2H_STREAM);
-
-            if (buffer_mode() == StreamBufferMode::NOT_OWNING) {
-                // On OWNING mode this trace is called after read_impl is called.
-                TRACE(ReadFrameTrace, m_core_op_handle, name());
-            }
-        }
-
-        callback(status);
-    };
-
-    assert(contains(m_streams, device_id));
-    auto status = m_streams.at(device_id).get().read_async(pending_buffer.release());
-    if (HAILO_SUCCESS != status) {
-        LOGGER__ERROR("read_async on device {} failed with {}", device_id, status);
-        // The pending_buffer was already registered so we must call the callback to give the error back to the user.
-        reorder_queue_callback(status);
-    }
-    return status;
-}
-
 hailo_stream_interface_t ScheduledOutputStream::get_interface() const
 {
     // All interface values of m_streams should be the same
@@ -221,64 +112,30 @@ hailo_stream_interface_t ScheduledOutputStream::get_interface() const
 
 Expected<std::unique_ptr<StreamBufferPool>> ScheduledOutputStream::allocate_buffer_pool()
 {
-    if (m_streams.size() == 1) {
-        // On single device, we use the stream allocate_buffer_pool for best optimization (The buffer can be circular
-        // dma buffer)
-        auto &async_stream = dynamic_cast<AsyncOutputStreamBase&>(m_streams.begin()->second.get());
-        return async_stream.allocate_buffer_pool();
-    } else {
-        auto queued_pool = QueuedStreamBufferPool::create(m_transfer_requests.max_size(), get_frame_size(),
-            BufferStorageParams::create_dma());
-        CHECK_EXPECTED(queued_pool);
-
-        return std::unique_ptr<StreamBufferPool>(queued_pool.release());
-    }
+    auto queued_pool = QueuedStreamBufferPool::create(m_infer_requests_accumulator->queue_size(), get_frame_size(),
+        BufferStorageParams::create_dma());
+    CHECK_EXPECTED(queued_pool);
+    return std::unique_ptr<StreamBufferPool>(queued_pool.release());
 }
 
 size_t ScheduledOutputStream::get_max_ongoing_transfers() const
 {
-    return m_transfer_requests.max_size();
+    return m_infer_requests_accumulator->queue_size();
 }
 
-
 hailo_status ScheduledOutputStream::read_async_impl(TransferRequest &&transfer_request)
 {
-    auto core_ops_scheduler = m_core_ops_scheduler.lock();
-    CHECK(core_ops_scheduler, HAILO_INTERNAL_FAILURE, "core_op_scheduler was destructed");
-
-    auto status = m_transfer_requests.enqueue(std::move(transfer_request));
-    if (HAILO_QUEUE_IS_FULL == status) {
-        return status;
-    }
-    CHECK_SUCCESS(status);
-
-    status = core_ops_scheduler->signal_frame_pending(m_core_op_handle, name(), HAILO_D2H_STREAM);
-    if (HAILO_STREAM_ABORTED_BY_USER == status) {
-        return status;
+    transfer_request.callback = m_callback_reorder_queue.wrap_callback(transfer_request.callback);
+    auto status = m_infer_requests_accumulator->add_transfer_request(name(), std::move(transfer_request));
+    if (HAILO_SUCCESS != status) {
+        m_callback_reorder_queue.cancel_last_callback();
+        if (HAILO_QUEUE_IS_FULL == status) {
+            return status;
+        }
+        CHECK_SUCCESS(status);
     }
-    CHECK_SUCCESS(status);
 
     return HAILO_SUCCESS;
 }
 
-hailo_status ScheduledOutputStream::abort()
-{
-    auto core_ops_scheduler = m_core_ops_scheduler.lock();
-    CHECK(core_ops_scheduler, HAILO_INTERNAL_FAILURE, "core_op_scheduler was destructed");
-
-    core_ops_scheduler->disable_stream(m_core_op_handle, name());
-
-    return AsyncOutputStreamBase::abort();
-}
-
-hailo_status ScheduledOutputStream::clear_abort()
-{
-    auto core_ops_scheduler = m_core_ops_scheduler.lock();
-    CHECK(core_ops_scheduler, HAILO_INTERNAL_FAILURE);
-
-    core_ops_scheduler->enable_stream(m_core_op_handle, name());
-
-    return AsyncOutputStreamBase::clear_abort();
-}
-
 } /* namespace hailort */
diff --git a/hailort/libhailort/src/vdevice/scheduler/scheduled_stream.hpp b/hailort/libhailort/src/vdevice/scheduler/scheduled_stream.hpp
index ce994d6..e96ddf0 100644
--- a/hailort/libhailort/src/vdevice/scheduler/scheduled_stream.hpp
+++ b/hailort/libhailort/src/vdevice/scheduler/scheduled_stream.hpp
@@ -18,7 +18,7 @@
 #include "stream_common/async_stream_base.hpp"
 #include "vdevice/vdevice_internal.hpp"
 #include "vdevice/callback_reorder_queue.hpp"
-#include "vdevice/scheduler/scheduler.hpp"
+#include "vdevice/scheduler/infer_request_accumulator.hpp"
 #include "stream_common/stream_buffer_pool.hpp"
 #include "stream_common/async_stream_base.hpp"
 #include "vdma/vdma_device.hpp"
@@ -34,24 +34,21 @@ public:
         std::map<device_id_t, std::reference_wrapper<InputStreamBase>> &&streams,
         const LayerInfo &layer_info,
         const scheduler_core_op_handle_t &core_op_handle,
-        CoreOpsSchedulerWeakPtr core_ops_scheduler,
-        EventPtr core_op_activated_event);
+        EventPtr core_op_activated_event,
+        std::shared_ptr<InferRequestAccumulator> infer_requests_accumulator);
 
     ScheduledInputStream(
         std::map<device_id_t, std::reference_wrapper<InputStreamBase>> &&streams,
         const scheduler_core_op_handle_t &core_op_handle,
         EventPtr &&core_op_activated_event,
         const LayerInfo &layer_info,
-        CoreOpsSchedulerWeakPtr core_ops_scheduler,
-        size_t max_queue_size,
+        std::shared_ptr<InferRequestAccumulator> &&infer_requests_accumulator,
         hailo_status &status) :
-            AsyncInputStreamBase(layer_info, streams.begin()->second.get().get_interface(),
-                                   std::move(core_op_activated_event), status),
+            AsyncInputStreamBase(layer_info, std::move(core_op_activated_event), status),
             m_streams(std::move(streams)),
-            m_core_ops_scheduler(core_ops_scheduler),
             m_core_op_handle(core_op_handle),
-            m_transfer_requests(max_queue_size),
-            m_callback_reorder_queue(max_queue_size) // TODO HRT-1058 - use reorder queue only when needed
+            m_infer_requests_accumulator(infer_requests_accumulator),
+            m_callback_reorder_queue(infer_requests_accumulator->queue_size()) // TODO HRT-1058 - use reorder queue only when needed
     {}
 
     virtual hailo_stream_interface_t get_interface() const override;
@@ -60,27 +57,13 @@ public:
     virtual size_t get_max_ongoing_transfers() const override;
     virtual hailo_status write_async_impl(TransferRequest &&transfer_request) override;
 
-    virtual hailo_status launch_transfer(const device_id_t &device_id) override;
-    virtual hailo_status abort() override;
-    virtual hailo_status clear_abort() override;
 
     virtual bool is_scheduled() override final { return true; };
 
-    // Returns the amount of frames buffered on a single device.
-    virtual Expected<size_t> get_buffer_frames_size() const override
-    {
-        return m_streams.begin()->second.get().get_buffer_frames_size();
-    }
-
 private:
     std::map<device_id_t, std::reference_wrapper<InputStreamBase>> m_streams;
-    CoreOpsSchedulerWeakPtr m_core_ops_scheduler;
     scheduler_core_op_handle_t m_core_op_handle;
-
-    // All buffers written by the user using write_async are first stored in this queue.
-    // When the scheduler decides to activate the network on a specific device, send_pending_buffer is called, and
-    // the buffers are sent to the underlying stream.
-    SafeQueue<TransferRequest> m_transfer_requests;
+    std::shared_ptr<InferRequestAccumulator> m_infer_requests_accumulator;
 
     CallbackReorderQueue m_callback_reorder_queue;
 };
@@ -92,62 +75,55 @@ public:
         const scheduler_core_op_handle_t &core_op_handle,
         const LayerInfo &layer_info,
         EventPtr core_op_activated_event,
-        CoreOpsSchedulerWeakPtr core_ops_scheduler);
+        std::shared_ptr<InferRequestAccumulator> infer_requests_accumulator);
 
     ScheduledOutputStream(
         std::map<device_id_t, std::reference_wrapper<OutputStreamBase>> &&streams,
         const scheduler_core_op_handle_t &core_op_handle,
         const LayerInfo &layer_info,
         EventPtr &&core_op_activated_event,
-        CoreOpsSchedulerWeakPtr core_ops_scheduler,
-        size_t max_queue_size,
+        std::shared_ptr<InferRequestAccumulator> &&infer_requests_accumulator,
         hailo_status &status) :
-            AsyncOutputStreamBase(layer_info, streams.begin()->second.get().get_interface(),
-                                  std::move(core_op_activated_event), status),
+            AsyncOutputStreamBase(layer_info, std::move(core_op_activated_event), status),
             m_streams(std::move(streams)),
-            m_core_ops_scheduler(core_ops_scheduler),
             m_core_op_handle(core_op_handle),
-            m_transfer_requests(max_queue_size),
-            m_callback_reorder_queue(max_queue_size) // TODO HRT-1058 - use reorder queue only when needed
+            m_infer_requests_accumulator(infer_requests_accumulator),
+            m_callback_reorder_queue(infer_requests_accumulator->queue_size()) // TODO HRT-1058 - use reorder queue only when needed
     {}
 
-    virtual hailo_status launch_transfer(const device_id_t &device_id) override;
-
-    virtual hailo_status abort() override;
-    virtual hailo_status clear_abort() override;
-
     virtual hailo_stream_interface_t get_interface() const override;
 
     virtual Expected<std::unique_ptr<StreamBufferPool>> allocate_buffer_pool() override;
     virtual size_t get_max_ongoing_transfers() const override;
+    virtual hailo_status read_async(TransferRequest &&transfer_request) override
+    {
+        transfer_request.callback = [original_callback=transfer_request.callback, this](hailo_status status) {
+            original_callback(status);
+            if ((HAILO_SUCCESS == status) && (INVALID_CORE_OP_HANDLE != m_core_op_handle)) {
+                TRACE(FrameDequeueD2HTrace, m_core_op_handle, name());
+            }
+        };
+        return AsyncOutputStreamBase::read_async(std::move(transfer_request));
+    }
+
     virtual hailo_status read_async_impl(TransferRequest &&transfer_request) override;
 
     virtual bool is_scheduled() override final { return true; };
 
-    // Returns the amount of frames buffered on a single device.
-    virtual Expected<size_t> get_buffer_frames_size() const override
-    {
-        return m_streams.begin()->second.get().get_buffer_frames_size();
-    }
-
     virtual hailo_status read_impl(MemoryView user_buffer) override
     {
         auto status = AsyncOutputStreamBase::read_impl(user_buffer);
-        if (HAILO_SUCCESS == status) {
-            TRACE(ReadFrameTrace, m_core_op_handle, name());
+        if ((HAILO_SUCCESS == status) && (INVALID_CORE_OP_HANDLE != m_core_op_handle)) {
+            TRACE(FrameDequeueD2HTrace, m_core_op_handle, name());
         }
         return status;
     }
 
+
 private:
     std::map<device_id_t, std::reference_wrapper<OutputStreamBase>> m_streams;
-    CoreOpsSchedulerWeakPtr m_core_ops_scheduler;
     scheduler_core_op_handle_t m_core_op_handle;
-
-    // All buffers written by the user using write_async are first stored in this queue.
-    // When the scheduler decides to activate the network on a specific device, send_pending_buffer is called, and
-    // the buffers are sent to the underlying stream.
-    SafeQueue<TransferRequest> m_transfer_requests;
+    std::shared_ptr<InferRequestAccumulator> m_infer_requests_accumulator;
 
     CallbackReorderQueue m_callback_reorder_queue;
 };
diff --git a/hailort/libhailort/src/vdevice/scheduler/scheduler.cpp b/hailort/libhailort/src/vdevice/scheduler/scheduler.cpp
index 1656d1e..a8163bd 100644
--- a/hailort/libhailort/src/vdevice/scheduler/scheduler.cpp
+++ b/hailort/libhailort/src/vdevice/scheduler/scheduler.cpp
@@ -13,7 +13,7 @@
 #include "vdevice/scheduler/scheduler.hpp"
 #include "vdevice/vdevice_core_op.hpp"
 #include "vdevice/scheduler/scheduler_oracle.hpp"
-#include "vdevice/vdevice_stream_multiplexer_wrapper.hpp"
+#include "vdma/vdma_config_manager.hpp"
 #include "hef/hef_internal.hpp"
 
 #include <fstream>
@@ -44,34 +44,40 @@ Expected<CoreOpsSchedulerPtr> CoreOpsScheduler::create_round_robin(std::vector<s
 }
 
 hailo_status CoreOpsScheduler::add_core_op(scheduler_core_op_handle_t core_op_handle,
-     std::shared_ptr<CoreOp> added_cng)
+     std::shared_ptr<VDeviceCoreOp> added_cng)
 {
     std::unique_lock<std::shared_timed_mutex> lock(m_scheduler_mutex);
 
-    auto stream_infos = added_cng->get_all_stream_infos();
-    CHECK_EXPECTED_AS_STATUS(stream_infos);
+    auto scheduled_core_op_it = m_scheduled_core_ops.find(core_op_handle);
+    if (scheduled_core_op_it != m_scheduled_core_ops.end()) {
+        scheduled_core_op_it->second->add_instance();
+    } else {
+        auto stream_infos = added_cng->get_all_stream_infos();
+        CHECK_EXPECTED_AS_STATUS(stream_infos);
 
-    auto scheduled_core_op = ScheduledCoreOp::create(added_cng, stream_infos.value());
-    CHECK_EXPECTED_AS_STATUS(scheduled_core_op);
+        auto scheduled_core_op = ScheduledCoreOp::create(added_cng, stream_infos.value());
+        CHECK_EXPECTED_AS_STATUS(scheduled_core_op);
 
-    m_scheduled_core_ops.emplace(core_op_handle, scheduled_core_op.release());
+        m_scheduled_core_ops.emplace(core_op_handle, scheduled_core_op.release());
 
-    for (const auto &pair : m_devices) {
-        auto &device_info = pair.second;
-        for (const auto &stream_info : stream_infos.value()) {
-            device_info->ongoing_frames[core_op_handle].insert(stream_info.name);
-        }
-    }
+        // To allow multiple instances of the same phyiscal core op, we don't limit the queue here. Each core-op and
+        // scheduled should limit themself. Since the ctor accept no argument, we init it using operator[].
+        // TODO HRT-12136: limit the queue size (based on instances count)
+        m_infer_requests[core_op_handle];
 
-    const core_op_priority_t normal_priority = HAILO_SCHEDULER_PRIORITY_NORMAL;
-    m_core_op_priority[normal_priority].emplace_back(core_op_handle);
-    if (!contains(m_next_core_op, normal_priority)) {
-        m_next_core_op[normal_priority] = 0;
+        const core_op_priority_t normal_priority = HAILO_SCHEDULER_PRIORITY_NORMAL;
+        m_core_op_priority[normal_priority].add(core_op_handle);
     }
 
     return HAILO_SUCCESS;
 }
 
+void CoreOpsScheduler::remove_core_op(scheduler_core_op_handle_t core_op_handle)
+{
+    std::unique_lock<std::shared_timed_mutex> lock(m_scheduler_mutex);
+    m_scheduled_core_ops.at(core_op_handle)->remove_instance();
+}
+
 void CoreOpsScheduler::shutdown()
 {
     // Locking shared_lock since we don't touch the internal scheduler structures.
@@ -80,20 +86,9 @@ void CoreOpsScheduler::shutdown()
 
     // After the scheduler thread have stopped, we can safely deactivate all core ops
     for (const auto &pair : m_devices) {
-        auto &device_info = pair.second;
-        if (INVALID_CORE_OP_HANDLE != device_info->current_core_op_handle) {
-            auto current_core_op = m_scheduled_core_ops.at(device_info->current_core_op_handle)->get_core_op();
-            auto current_core_op_bundle = std::dynamic_pointer_cast<VDeviceCoreOp>(current_core_op);
-            assert(nullptr != current_core_op_bundle);
-            auto vdma_core_op = current_core_op_bundle->get_core_op_by_device_id(device_info->device_id);
-            if (!vdma_core_op) {
-                LOGGER__ERROR("Error retrieving core-op in scheduler destructor");
-            } else {
-                if (HAILO_SUCCESS != VdmaConfigManager::deactivate_core_op(vdma_core_op.value())) {
-                    LOGGER__ERROR("Error deactivating core-op when destroying scheduler");
-                }
-                device_info->current_core_op_handle = INVALID_CORE_OP_HANDLE;
-            }
+        auto status = deactivate_core_op(pair.first);
+        if (HAILO_SUCCESS != status) {
+            LOGGER__ERROR("Error deactivating core-op when destroying scheduler {}", status);
         }
     }
 }
@@ -102,8 +97,8 @@ hailo_status CoreOpsScheduler::switch_core_op(const scheduler_core_op_handle_t &
 {
     auto scheduled_core_op = m_scheduled_core_ops.at(core_op_handle);
     assert(contains(m_devices, device_id));
-    assert(is_device_idle(device_id));
     auto curr_device_info = m_devices[device_id];
+    assert(curr_device_info->is_idle());
     curr_device_info->is_switching_core_op = false;
 
     const auto burst_size = scheduled_core_op->get_burst_size();
@@ -122,25 +117,15 @@ hailo_status CoreOpsScheduler::switch_core_op(const scheduler_core_op_handle_t &
     curr_device_info->current_batch_size = hw_batch_size;
 
     if ((core_op_handle != curr_device_info->current_core_op_handle) || (!has_same_hw_batch_size_as_previous)) {
-        auto next_active_cng = scheduled_core_op->get_core_op();
-        auto next_active_cng_wrapper = std::dynamic_pointer_cast<VDeviceCoreOp>(next_active_cng);
-        assert(nullptr != next_active_cng_wrapper);
-        auto next_active_cng_expected = next_active_cng_wrapper->get_core_op_by_device_id(curr_device_info->device_id);
-        CHECK_EXPECTED_AS_STATUS(next_active_cng_expected);
+        auto next_core_op = get_vdma_core_op(core_op_handle, device_id);
 
-        std::shared_ptr<VdmaConfigCoreOp> current_active_vdma_cng = nullptr;
+        std::shared_ptr<VdmaConfigCoreOp> current_core_op = nullptr;
         if (curr_device_info->current_core_op_handle != INVALID_CORE_OP_HANDLE) {
-            auto current_active_cng = m_scheduled_core_ops.at(curr_device_info->current_core_op_handle)->get_core_op();
-            auto current_active_cng_bundle = std::dynamic_pointer_cast<VDeviceCoreOp>(current_active_cng);
-            assert(nullptr != current_active_cng_bundle);
-            auto current_active_cng_expected = current_active_cng_bundle->get_core_op_by_device_id(curr_device_info->device_id);
-            CHECK_EXPECTED_AS_STATUS(current_active_cng_expected);
-            current_active_vdma_cng = current_active_cng_expected.release();
+            current_core_op = get_vdma_core_op(curr_device_info->current_core_op_handle, device_id);
         }
 
         const bool is_batch_switch = (core_op_handle == curr_device_info->current_core_op_handle);
-        auto status = VdmaConfigManager::switch_core_op(current_active_vdma_cng, next_active_cng_expected.value(), hw_batch_size,
-            is_batch_switch);
+        auto status = VdmaConfigManager::switch_core_op(current_core_op, next_core_op, hw_batch_size, is_batch_switch);
         CHECK_SUCCESS(status, "Failed switching core-op");
     }
 
@@ -148,15 +133,26 @@ hailo_status CoreOpsScheduler::switch_core_op(const scheduler_core_op_handle_t &
     curr_device_info->current_core_op_handle = core_op_handle;
 
     auto status = send_all_pending_buffers(core_op_handle, device_id, frames_count);
-    if (HAILO_STREAM_ABORTED_BY_USER == status) {
-        LOGGER__INFO("send_all_pending_buffers has failed with status=HAILO_STREAM_ABORTED_BY_USER");
-        return status;
-    }
     CHECK_SUCCESS(status);
 
     return HAILO_SUCCESS;
 }
 
+hailo_status CoreOpsScheduler::deactivate_core_op(const device_id_t &device_id)
+{
+    const auto core_op_handle = m_devices[device_id]->current_core_op_handle;
+    if (INVALID_CORE_OP_HANDLE == core_op_handle) {
+        return HAILO_SUCCESS;
+    }
+
+    auto vdma_core_op = get_vdma_core_op(core_op_handle, device_id);
+    auto status = VdmaConfigManager::deactivate_core_op(vdma_core_op);
+    CHECK_SUCCESS(status, "Scheduler failed deactivate core op on {}", device_id);
+
+    m_devices[device_id]->current_core_op_handle = INVALID_CORE_OP_HANDLE;
+    return HAILO_SUCCESS;
+}
+
 hailo_status CoreOpsScheduler::send_all_pending_buffers(const scheduler_core_op_handle_t &core_op_handle, const device_id_t &device_id, uint32_t burst_size)
 {
     auto current_device_info = m_devices[device_id];
@@ -171,39 +167,38 @@ hailo_status CoreOpsScheduler::send_all_pending_buffers(const scheduler_core_op_
             current_device_info->frames_left_before_stop_streaming--;
         }
 
-        for (auto &input_stream : scheduled_core_op->get_core_op()->get_input_streams()) {
-            const auto &stream_name = input_stream.get().name();
-            scheduled_core_op->pending_frames().decrease(stream_name);
-            current_device_info->ongoing_frames[core_op_handle].increase(stream_name);
-
-            // After launching the transfer, signal_frame_transferred may be called (and ongoing frames will be
-            // decreased).
-            auto &input_stream_base = static_cast<InputStreamBase&>(input_stream.get());
-            auto status = input_stream_base.launch_transfer(device_id);
-            if (HAILO_STREAM_ABORTED_BY_USER == status) {
-                LOGGER__INFO("launch_transfer has failed with status=HAILO_STREAM_ABORTED_BY_USER");
-                return status;
-            }
-            CHECK_SUCCESS(status);
-        }
+        auto status = infer_async(core_op_handle, device_id);
+        CHECK_SUCCESS(status);
+    }
 
-        for (auto &output_stream : scheduled_core_op->get_core_op()->get_output_streams()) {
-            const auto &stream_name = output_stream.get().name();
-            scheduled_core_op->pending_frames().decrease(stream_name);
-            current_device_info->ongoing_frames[core_op_handle].increase(stream_name);
-
-            // After launching the transfer, signal_frame_transferred may be called (and ongoing frames will be
-            // decreased).
-            auto &output_stream_base = static_cast<OutputStreamBase&>(output_stream.get());
-            auto status = output_stream_base.launch_transfer(device_id);
-            if (HAILO_STREAM_ABORTED_BY_USER == status) {
-                LOGGER__INFO("launch_transfer has failed with status=HAILO_STREAM_ABORTED_BY_USER");
-                return status;
-            }
-            CHECK_SUCCESS(status);
-        }
+    scheduled_core_op->set_last_device(device_id);
+    return HAILO_SUCCESS;
+}
 
-        scheduled_core_op->set_last_device(device_id);
+hailo_status CoreOpsScheduler::infer_async(const scheduler_core_op_handle_t &core_op_handle,
+    const device_id_t &device_id)
+{
+    auto current_device_info = m_devices[device_id];
+    assert(core_op_handle == current_device_info->current_core_op_handle);
+    auto scheduled_core_op = m_scheduled_core_ops.at(core_op_handle);
+    auto vdma_core_op = get_vdma_core_op(core_op_handle, device_id);
+
+    auto infer_request = dequeue_infer_request(core_op_handle);
+    CHECK_EXPECTED_AS_STATUS(infer_request);
+
+    current_device_info->ongoing_infer_requests.fetch_add(1);
+
+    auto original_callback = infer_request->callback;
+    infer_request->callback = [current_device_info, this, original_callback](hailo_status status) {
+        current_device_info->ongoing_infer_requests.fetch_sub(1);
+        m_scheduler_thread.signal();
+        original_callback(status);
+    };
+    auto status = vdma_core_op->infer_async(infer_request.release());
+    if (HAILO_SUCCESS != status) {
+        current_device_info->ongoing_infer_requests.fetch_sub(1);
+        original_callback(status);
+        CHECK_SUCCESS(status);
     }
 
     return HAILO_SUCCESS;
@@ -215,118 +210,36 @@ CoreOpsScheduler::ReadyInfo CoreOpsScheduler::is_core_op_ready(const scheduler_c
     ReadyInfo result;
     result.is_ready = false;
 
-    if (should_core_op_stop(core_op_handle)) {
-        // Do not switch to an aborted core-op
-        return result;
-    }
-
     auto scheduled_core_op = m_scheduled_core_ops.at(core_op_handle);
 
-    std::vector<bool> over_threshold;
-    over_threshold.reserve(scheduled_core_op->get_inputs_names().size());
-    std::vector<bool> over_timeout;
-    over_timeout.reserve(scheduled_core_op->get_inputs_names().size());
+    result.is_ready = (get_frames_ready_to_transfer(core_op_handle, device_id) > 0);
 
     if (check_threshold) {
-        for (const auto &name : scheduled_core_op->get_inputs_names()) {
-            auto threshold_exp = scheduled_core_op->get_threshold(name);
-            if (!threshold_exp) {
-                LOGGER__ERROR("Failed to get threshold for stream {}", name);
-                return result;
-            }
-            auto threshold = (DEFAULT_SCHEDULER_MIN_THRESHOLD == threshold_exp.value()) ? 1 : threshold_exp.value();
-            auto timeout_exp = scheduled_core_op->get_timeout();
-            if (!timeout_exp) {
-                LOGGER__ERROR("Failed to get timeout for stream {}", name);
-                return result;
-            }
-            auto timeout = timeout_exp.release();
-
-            // Check if there arent enough write requests to reach threshold and timeout didnt passed
-            const auto write_requests = scheduled_core_op->pending_frames()[name];
-            auto stream_over_threshold = write_requests >= threshold;
-            auto stream_over_timeout = timeout <= (std::chrono::steady_clock::now() - scheduled_core_op->get_last_run_timestamp());
-            over_threshold.push_back(stream_over_threshold);
-            over_timeout.push_back(stream_over_timeout);
-            if (stream_over_threshold || stream_over_timeout) {
-                continue;
-            } else {
-                result.is_ready = false;
-                return result;
-            }
+        result.over_threshold = scheduled_core_op->is_over_threshold();
+        result.over_timeout = scheduled_core_op->is_over_timeout();
+
+        if (!result.over_threshold && !result.over_timeout){
+            result.is_ready = false;
         }
-        result.over_threshold = std::all_of(over_threshold.begin(), over_threshold.end(), [](auto over) { return over; });
-        result.over_timeout = std::all_of(over_timeout.begin(), over_timeout.end(), [](auto over) { return over; });
     }
 
-    result.is_ready = (get_frames_ready_to_transfer(core_op_handle, device_id) > 0);
-
     return result;
 }
 
-hailo_status CoreOpsScheduler::signal_frame_pending(const scheduler_core_op_handle_t &core_op_handle,
-    const std::string &stream_name, hailo_stream_direction_t direction)
+hailo_status CoreOpsScheduler::enqueue_infer_request(const scheduler_core_op_handle_t &core_op_handle,
+    InferRequest &&infer_request)
 {
     std::shared_lock<std::shared_timed_mutex> lock(m_scheduler_mutex);
-    auto scheduled_core_op = m_scheduled_core_ops.at(core_op_handle);
-
-    if (should_core_op_stop(core_op_handle)) {
-        return HAILO_STREAM_ABORTED_BY_USER;
-    }
-
-    if (HAILO_H2D_STREAM == direction) {
-        TRACE(WriteFrameTrace, core_op_handle, stream_name);
-        scheduled_core_op->mark_frame_sent();
-    }
-
-    scheduled_core_op->pending_frames().increase(stream_name);
-    m_scheduler_thread.signal();
-
-    return HAILO_SUCCESS;
-}
-
-void CoreOpsScheduler::signal_frame_transferred(const scheduler_core_op_handle_t &core_op_handle,
-    const std::string &stream_name, const device_id_t &device_id, hailo_stream_direction_t stream_direction)
-{
-    std::shared_lock<std::shared_timed_mutex> lock(m_scheduler_mutex);
-
-    auto scheduled_core_op = m_scheduled_core_ops.at(core_op_handle);
-
-    m_devices[device_id]->ongoing_frames[core_op_handle].decrease(stream_name);
-    if (HAILO_D2H_STREAM == stream_direction) {
-        TRACE(OutputVdmaEnqueueTrace, device_id, core_op_handle, stream_name);
-    }
 
-    m_scheduler_thread.signal();
-}
-
-bool CoreOpsScheduler::is_device_idle(const device_id_t &device_id)
-{
-    const auto &device_info = m_devices[device_id];
-    auto core_op_handle = device_info->current_core_op_handle;
-    if (INVALID_CORE_OP_HANDLE == core_op_handle) {
-        // If no core-op is running, consider it as drained
-        return true;
-    }
+    CHECK(m_scheduled_core_ops.at(core_op_handle)->instances_count() > 0, HAILO_INTERNAL_FAILURE,
+        "Trying to enqueue infer request on a core-op with instances_count==0");
 
-    if (m_scheduled_core_ops.at(core_op_handle)->all_stream_disabled()) {
-        // We treat core-op as drained only if all streams are aborted - to make sure there aren't any ongoing transfers
-        return true;
+    auto status = m_infer_requests.at(core_op_handle).enqueue(std::move(infer_request));
+    if (HAILO_SUCCESS == status) {
+        m_scheduled_core_ops.at(core_op_handle)->requested_infer_requests().fetch_add(1);
+        m_scheduler_thread.signal();
     }
-
-    return m_devices[device_id]->is_idle();
-}
-
-void CoreOpsScheduler::enable_stream(const scheduler_core_op_handle_t &core_op_handle, const std::string &stream_name)
-{
-    std::shared_lock<std::shared_timed_mutex> lock(m_scheduler_mutex);
-    m_scheduled_core_ops.at(core_op_handle)->enable_stream(stream_name);
-}
-
-void CoreOpsScheduler::disable_stream(const scheduler_core_op_handle_t &core_op_handle, const std::string &stream_name)
-{
-    std::shared_lock<std::shared_timed_mutex> lock(m_scheduler_mutex);
-    m_scheduled_core_ops.at(core_op_handle)->disable_stream(stream_name);
+    return status;
 }
 
 hailo_status CoreOpsScheduler::set_timeout(const scheduler_core_op_handle_t &core_op_handle, const std::chrono::milliseconds &timeout, const std::string &/*network_name*/)
@@ -354,31 +267,24 @@ hailo_status CoreOpsScheduler::set_priority(const scheduler_core_op_handle_t &co
 {
     CHECK(priority <= HAILO_SCHEDULER_PRIORITY_MAX, HAILO_INVALID_ARGUMENT);
 
-    // Remove core of from previous priority map
     std::unique_lock<std::shared_timed_mutex> lock(m_scheduler_mutex);
-    auto old_priority = m_scheduled_core_ops.at(core_op_handle)->get_priority();
-    auto &priority_vector = m_core_op_priority[old_priority];
-    auto it = std::find(priority_vector.begin(), priority_vector.end(), core_op_handle);
-    CHECK(it != priority_vector.end(), HAILO_INTERNAL_FAILURE);
-    priority_vector.erase(it);
-    m_next_core_op[old_priority] = 0; // Avoiding overflow by reseting next core op.
+
+    auto scheduled_core_op = m_scheduled_core_ops.at(core_op_handle);
+
+    // Remove core op from previous priority map
+    auto &priority_group = m_core_op_priority[scheduled_core_op->get_priority()];
+    assert(priority_group.contains(core_op_handle));
+    priority_group.erase(core_op_handle);
 
     // Add it to the new priority map.
     m_scheduled_core_ops.at(core_op_handle)->set_priority(priority);
-    m_core_op_priority[priority].push_back(core_op_handle);
-    if (!contains(m_next_core_op, priority)) {
-        m_next_core_op[priority] = 0;
-    }
+    m_core_op_priority[priority].add(core_op_handle);
+
 
     TRACE(SetCoreOpPriorityTrace, core_op_handle, priority);
     return HAILO_SUCCESS;
 }
 
-bool CoreOpsScheduler::should_core_op_stop(const scheduler_core_op_handle_t &core_op_handle)
-{
-    return m_scheduled_core_ops.at(core_op_handle)->any_stream_disabled();
-}
-
 hailo_status CoreOpsScheduler::optimize_streaming_if_enabled(const scheduler_core_op_handle_t &core_op_handle)
 {
     auto scheduled_core_op = m_scheduled_core_ops.at(core_op_handle);
@@ -392,56 +298,104 @@ hailo_status CoreOpsScheduler::optimize_streaming_if_enabled(const scheduler_cor
             !CoreOpsSchedulerOracle::should_stop_streaming(*this, scheduled_core_op->get_priority(), device_info->device_id) &&
             (get_frames_ready_to_transfer(core_op_handle, device_info->device_id) >= DEFAULT_BURST_SIZE)) {
             auto status = send_all_pending_buffers(core_op_handle, device_info->device_id, DEFAULT_BURST_SIZE);
-            if (HAILO_STREAM_ABORTED_BY_USER == status) {
-                LOGGER__INFO("send_all_pending_buffers has failed with status=HAILO_STREAM_ABORTED_BY_USER");
-                return status;
-            }
             CHECK_SUCCESS(status);
         }
     }
     return HAILO_SUCCESS;
 }
 
+Expected<InferRequest> CoreOpsScheduler::dequeue_infer_request(scheduler_core_op_handle_t core_op_handle)
+{
+    auto infer_request = m_infer_requests.at(core_op_handle).dequeue();
+    CHECK_EXPECTED(infer_request);
+
+    m_scheduled_core_ops.at(core_op_handle)->requested_infer_requests().fetch_sub(1);
+    return infer_request.release();
+}
+
 uint16_t CoreOpsScheduler::get_frames_ready_to_transfer(scheduler_core_op_handle_t core_op_handle,
     const device_id_t &device_id) const
 {
     auto scheduled_core_op = m_scheduled_core_ops.at(core_op_handle);
     auto device_info = m_devices.at(device_id);
 
+    if (scheduled_core_op->instances_count() == 0) {
+        // We don't want to schedule/execute core ops with instances_count() == 0. There may still be
+        // requested_infer_requests until shutdown_core_op is called.
+        // TODO: HRT-12218 after dequeue all infer requests for the instance in remove_core_op, this flow can be
+        // removed any simplified (since on this case requested_infer_requests == 0).
+        return 0;
+    }
+
     const auto max_ongoing_frames = scheduled_core_op->get_max_ongoing_frames_per_device();
-    const auto ongoing_frames = device_info->ongoing_frames[core_op_handle].get_max_value();
+    const uint32_t ongoing_frames = (device_info->current_core_op_handle == core_op_handle) ?
+        device_info->ongoing_infer_requests.load() : 0;
     assert(ongoing_frames <= max_ongoing_frames);
 
-    const auto pending_frames = scheduled_core_op->pending_frames().get_min_value();
+    const uint32_t requested_frames = scheduled_core_op->requested_infer_requests();
+
+    return static_cast<uint16_t>(std::min(requested_frames, max_ongoing_frames - ongoing_frames));
+}
+
+std::shared_ptr<VdmaConfigCoreOp> CoreOpsScheduler::get_vdma_core_op(scheduler_core_op_handle_t core_op_handle,
+    const device_id_t &device_id)
+{
+    return m_scheduled_core_ops.at(core_op_handle)->get_vdma_core_op(device_id);
+}
+
+void CoreOpsScheduler::shutdown_core_op(scheduler_core_op_handle_t core_op_handle)
+{
+    // Deactivate core op from all devices
+    for (const auto &device_state : m_devices) {
+        if (device_state.second->current_core_op_handle == core_op_handle) {
+            auto status = deactivate_core_op(device_state.first);
+            if (HAILO_SUCCESS != status) {
+                LOGGER__ERROR("Scheduler failed deactivate core op on {}", device_state.first);
+                // continue
+            }
+        }
+    }
 
-    return static_cast<uint16_t>(std::min(pending_frames, max_ongoing_frames - ongoing_frames));
+    // Cancel all requests on the queue
+    auto core_op = m_scheduled_core_ops.at(core_op_handle);
+    while (core_op->requested_infer_requests() > 0) {
+        auto request = dequeue_infer_request(core_op_handle);
+        assert(request);
+        for (auto &transfer : request->transfers) {
+            transfer.second.callback(HAILO_STREAM_ABORTED_BY_USER);
+        }
+        request->callback(HAILO_STREAM_ABORTED_BY_USER);
+    }
 }
 
 void CoreOpsScheduler::schedule()
 {
     std::shared_lock<std::shared_timed_mutex> lock(m_scheduler_mutex);
-    m_scheduled_core_ops.for_each([this](const std::pair<vdevice_core_op_handle_t, ScheduledCoreOpPtr> &core_op_pair) {
+    // First, we are using streaming optimization (where switch is not needed)
+    for (auto &core_op_pair : m_scheduled_core_ops) {
         auto status = optimize_streaming_if_enabled(core_op_pair.first);
         if ((HAILO_SUCCESS != status) &&
             (HAILO_STREAM_ABORTED_BY_USER != status)) {
             LOGGER__ERROR("optimize_streaming_if_enabled thread failed with status={}", status);
         }
+    };
 
-    });
-
+    // Now, get decisions which requires core op switch
     auto oracle_decisions = CoreOpsSchedulerOracle::get_oracle_decisions(*this);
-
     for (const auto &run_params : oracle_decisions) {
         auto status = switch_core_op(run_params.core_op_handle, run_params.device_id);
-        if (HAILO_STREAM_ABORTED_BY_USER == status) {
-            continue;
-        }
-
         if (HAILO_SUCCESS != status) {
             LOGGER__ERROR("Scheduler thread failed with status={}", status);
             break;
         }
     }
+
+    // Finally, we want to deactivate all core ops with instances_count() == 0
+    for (auto &core_op_pair : m_scheduled_core_ops) {
+        if (core_op_pair.second->instances_count() == 0) {
+            shutdown_core_op(core_op_pair.first);
+        }
+    }
 }
 
 CoreOpsScheduler::SchedulerThread::SchedulerThread(CoreOpsScheduler &scheduler) :
diff --git a/hailort/libhailort/src/vdevice/scheduler/scheduler.hpp b/hailort/libhailort/src/vdevice/scheduler/scheduler.hpp
index 94159f6..ebcdf09 100644
--- a/hailort/libhailort/src/vdevice/scheduler/scheduler.hpp
+++ b/hailort/libhailort/src/vdevice/scheduler/scheduler.hpp
@@ -36,6 +36,8 @@ using CoreOpsSchedulerWeakPtr = std::weak_ptr<CoreOpsScheduler>;
 
 using stream_name_t = std::string;
 
+class VDeviceCoreOp;
+
 class CoreOpsScheduler : public SchedulerBase
 {
 public:
@@ -50,20 +52,14 @@ public:
     CoreOpsScheduler &operator=(CoreOpsScheduler &&other) = delete;
     CoreOpsScheduler(CoreOpsScheduler &&other) noexcept = delete;
 
-    hailo_status add_core_op(scheduler_core_op_handle_t core_op_handle, std::shared_ptr<CoreOp> added_core_op);
+    hailo_status add_core_op(scheduler_core_op_handle_t core_op_handle, std::shared_ptr<VDeviceCoreOp> added_core_op);
+    void remove_core_op(scheduler_core_op_handle_t core_op_handle);
 
     // Shutdown the scheduler, stops interrupt thread and deactivate all core ops from all devices. This operation
     // is not recoverable.
     void shutdown();
 
-    hailo_status signal_frame_pending(const scheduler_core_op_handle_t &core_op_handle, const std::string &stream_name,
-        hailo_stream_direction_t direction);
-
-    void signal_frame_transferred(const scheduler_core_op_handle_t &core_op_handle,
-        const std::string &stream_name, const device_id_t &device_id, hailo_stream_direction_t direction);
-
-    void enable_stream(const scheduler_core_op_handle_t &core_op_handle, const std::string &stream_name);
-    void disable_stream(const scheduler_core_op_handle_t &core_op_handle, const std::string &stream_name);
+    hailo_status enqueue_infer_request(const scheduler_core_op_handle_t &core_op_handle, InferRequest &&infer_request);
 
     hailo_status set_timeout(const scheduler_core_op_handle_t &core_op_handle, const std::chrono::milliseconds &timeout, const std::string &network_name);
     hailo_status set_threshold(const scheduler_core_op_handle_t &core_op_handle, uint32_t threshold, const std::string &network_name);
@@ -71,19 +67,23 @@ public:
 
     virtual ReadyInfo is_core_op_ready(const scheduler_core_op_handle_t &core_op_handle, bool check_threshold,
         const device_id_t &device_id) override;
-    virtual bool is_device_idle(const device_id_t &device_id) override;
 
 private:
     hailo_status switch_core_op(const scheduler_core_op_handle_t &core_op_handle, const device_id_t &device_id);
+    hailo_status deactivate_core_op(const device_id_t &device_id);
 
     hailo_status send_all_pending_buffers(const scheduler_core_op_handle_t &core_op_handle, const device_id_t &device_id, uint32_t burst_size);
-
-    bool should_core_op_stop(const scheduler_core_op_handle_t &core_op_handle);
+    hailo_status infer_async(const scheduler_core_op_handle_t &core_op_handle, const device_id_t &device_id);
 
     hailo_status optimize_streaming_if_enabled(const scheduler_core_op_handle_t &core_op_handle);
 
+    Expected<InferRequest> dequeue_infer_request(scheduler_core_op_handle_t core_op_handle);
     uint16_t get_frames_ready_to_transfer(scheduler_core_op_handle_t core_op_handle, const device_id_t &device_id) const;
 
+    std::shared_ptr<VdmaConfigCoreOp> get_vdma_core_op(scheduler_core_op_handle_t core_op_handle,
+        const device_id_t &device_id);
+
+    void shutdown_core_op(scheduler_core_op_handle_t core_op_handle);
     void schedule();
 
     class SchedulerThread final {
@@ -109,12 +109,15 @@ private:
         std::thread m_thread;
     };
 
-    ThreadSafeMap<vdevice_core_op_handle_t, ScheduledCoreOpPtr> m_scheduled_core_ops;
+    std::unordered_map<vdevice_core_op_handle_t, ScheduledCoreOpPtr> m_scheduled_core_ops;
+
+    using InferRequestQueue = SafeQueue<InferRequest>;
+    std::unordered_map<vdevice_core_op_handle_t, InferRequestQueue> m_infer_requests;
 
     // This shared mutex guards accessing the scheduler data structures including:
     //   - m_scheduled_core_ops
+    //   - m_infer_requests
     //   - m_core_op_priority
-    //   - m_next_core_op
     // Any function that is modifing these structures (for example by adding/removing items) must lock this mutex using
     // unique_lock. Any function accessing these structures (for example access to
     // m_scheduled_core_ops.at(core_op_handle) can use shared_lock.
diff --git a/hailort/libhailort/src/vdevice/scheduler/scheduler_base.hpp b/hailort/libhailort/src/vdevice/scheduler/scheduler_base.hpp
index 3c77316..e07d62d 100644
--- a/hailort/libhailort/src/vdevice/scheduler/scheduler_base.hpp
+++ b/hailort/libhailort/src/vdevice/scheduler/scheduler_base.hpp
@@ -17,7 +17,6 @@
 #include "common/filesystem.hpp"
 
 #include "stream_common/stream_internal.hpp"
-#include "vdevice/scheduler/scheduler_counter.hpp"
 
 #include <condition_variable>
 
@@ -39,22 +38,14 @@ struct ActiveDeviceInfo {
         current_core_op_handle(INVALID_CORE_OP_HANDLE), next_core_op_handle(INVALID_CORE_OP_HANDLE), is_switching_core_op(false), 
         current_batch_size(0),
         frames_left_before_stop_streaming(0),
-        device_id(device_id), device_arch(device_arch)
+        ongoing_infer_requests(0),
+        device_id(device_id),
+        device_arch(device_arch)
     {}
 
-    uint32_t get_ongoing_frames() const
-    {
-        if (current_core_op_handle == INVALID_CORE_OP_HANDLE) {
-            // No ongoing frames
-            return 0;
-        }
-
-        return ongoing_frames.at(current_core_op_handle).get_max_value();
-    }
-
     bool is_idle() const
     {
-        return 0 == get_ongoing_frames();
+        return 0 == ongoing_infer_requests;
     }
 
     scheduler_core_op_handle_t current_core_op_handle;
@@ -66,14 +57,59 @@ struct ActiveDeviceInfo {
     // (even if there is another core op ready).
     size_t frames_left_before_stop_streaming;
 
-    // For each stream (both input and output) we store a counter for all ongoing frames. We increase the counter when
-    // launching transfer and decrease it when we get the transfer callback called.
-    std::unordered_map<scheduler_core_op_handle_t, SchedulerCounter> ongoing_frames;
+    std::atomic_uint32_t ongoing_infer_requests;
 
     device_id_t device_id;
     std::string device_arch;
 };
 
+// Group of core ops with the same priority.
+class PriorityGroup {
+public:
+    PriorityGroup() = default;
+
+    void add(scheduler_core_op_handle_t core_op_handle)
+    {
+        m_core_ops.emplace_back(core_op_handle);
+    }
+
+    void erase(scheduler_core_op_handle_t core_op_handle)
+    {
+        auto it = std::find(m_core_ops.begin(), m_core_ops.end(), core_op_handle);
+        assert(it != m_core_ops.end());
+        m_core_ops.erase(it);
+        m_next_core_op_index = 0; // Avoiding overflow by reseting next core op.
+    }
+
+    bool contains(scheduler_core_op_handle_t core_op_handle) const
+    {
+        return ::hailort::contains(m_core_ops, core_op_handle);
+    }
+
+    // Returns a core op at m_next_core_op_index + relative_index
+    scheduler_core_op_handle_t get(size_t relative_index) const
+    {
+        assert(relative_index < m_core_ops.size());
+        const auto abs_index = (m_next_core_op_index + relative_index) % m_core_ops.size();
+        return m_core_ops[abs_index];
+    }
+
+    void set_next(size_t relative_index)
+    {
+        assert(relative_index <= m_core_ops.size()); // allowing wrap around
+        m_next_core_op_index = (m_next_core_op_index + relative_index) % m_core_ops.size();
+    }
+
+    size_t size() const { return m_core_ops.size(); }
+
+private:
+    std::vector<scheduler_core_op_handle_t> m_core_ops;
+
+    // index inside core_ops vector, next core to be executed from this priority. Used to implement round robin on the
+    // group.
+    size_t m_next_core_op_index = 0;
+};
+
 
 class SchedulerBase
 {
@@ -91,7 +127,6 @@ public:
 
     virtual ReadyInfo is_core_op_ready(const scheduler_core_op_handle_t &core_op_handle, bool check_threshold,
         const device_id_t &device_id) = 0;
-    virtual bool is_device_idle(const device_id_t &device_id) = 0;
 
     virtual uint32_t get_device_count() const
     {
@@ -108,21 +143,11 @@ public:
         return m_devices;
     }
 
-    virtual std::map<core_op_priority_t, std::vector<scheduler_core_op_handle_t>> get_core_op_priority_map()
+    virtual std::map<core_op_priority_t, PriorityGroup> &get_core_op_priority_map()
     {
         return m_core_op_priority;
     }
 
-    virtual scheduler_core_op_handle_t get_next_core_op(core_op_priority_t priority) const
-    {
-        return m_next_core_op.at(priority);
-    }
-
-    virtual void set_next_core_op(const core_op_priority_t priority, const scheduler_core_op_handle_t &core_op_handle)
-    {
-        m_next_core_op.at(priority) = core_op_handle;
-    }
-
 protected:
     SchedulerBase(hailo_scheduling_algorithm_t algorithm, std::vector<std::string> &devices_ids,
          std::vector<std::string> &devices_arch) : m_algorithm(algorithm)
@@ -140,10 +165,9 @@ protected:
 
     std::map<device_id_t, std::shared_ptr<ActiveDeviceInfo>> m_devices;
 
-    std::map<core_op_priority_t, std::vector<scheduler_core_op_handle_t>> m_core_op_priority;
+    std::map<core_op_priority_t, PriorityGroup> m_core_op_priority;
 
     hailo_scheduling_algorithm_t m_algorithm;
-    std::unordered_map<core_op_priority_t, scheduler_core_op_handle_t> m_next_core_op;
 };
 
 } /* namespace hailort */
diff --git a/hailort/libhailort/src/vdevice/scheduler/scheduler_counter.hpp b/hailort/libhailort/src/vdevice/scheduler/scheduler_counter.hpp
deleted file mode 100644
index cf40f3d..0000000
--- a/hailort/libhailort/src/vdevice/scheduler/scheduler_counter.hpp
+++ /dev/null
@@ -1,99 +0,0 @@
-/**
- * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved.
- * Distributed under the MIT license (https://opensource.org/licenses/MIT)
-**/
-/**
- * @file scheduler_counter.hpp
- * @brief Counter object that wraps a single counter per stream.
- **/
-
-#ifndef _HAILO_SCHEDULER_COUNTER_HPP_
-#define _HAILO_SCHEDULER_COUNTER_HPP_
-
-#include "common/utils.hpp"
-
-#include <unordered_map>
-#include <cassert>
-#include <atomic>
-
-namespace hailort
-{
-
-using stream_name_t = std::string;
-
-class SchedulerCounter
-{
-public:
-    SchedulerCounter() : m_map()
-    {}
-
-    void insert(const stream_name_t &name)
-    {
-        assert(!contains(m_map, name));
-        m_map[name] = 0;
-    }
-
-    uint32_t operator[](const stream_name_t &name) const
-    {
-        assert(contains(m_map, name));
-        return m_map.at(name);
-    }
-
-    void increase(const stream_name_t &name)
-    {
-        assert(contains(m_map, name));
-        m_map[name]++;
-    }
-
-    void decrease(const stream_name_t &name)
-    {
-        assert(contains(m_map, name));
-        assert(m_map[name] > 0);
-        m_map[name]--;
-    }
-
-    uint32_t get_min_value() const
-    {
-        return get_min_value_of_unordered_map(m_map);
-    }
-
-    uint32_t get_max_value() const
-    {
-        return get_max_value_of_unordered_map(m_map);
-    }
-
-    bool all_values_bigger_or_equal(uint32_t value) const
-    {
-        for (const auto &pair : m_map) {
-            if (value > pair.second) {
-                return false;
-            }
-        }
-        return true;
-    }
-
-    bool empty() const
-    {
-        for (const auto &pair : m_map) {
-            if (0 != pair.second) {
-                return false;
-            }
-        }
-        return true;
-    }
-
-    void reset()
-    {
-        for (auto &pair : m_map) {
-            pair.second = 0;
-        }
-    }
-
-private:
-    std::unordered_map<stream_name_t, std::atomic_uint32_t> m_map;
-};
-
-
-} /* namespace hailort */
-
-#endif /* _HAILO_SCHEDULER_COUNTER_HPP_ */
diff --git a/hailort/libhailort/src/vdevice/scheduler/scheduler_oracle.cpp b/hailort/libhailort/src/vdevice/scheduler/scheduler_oracle.cpp
index b6106e1..26bc448 100644
--- a/hailort/libhailort/src/vdevice/scheduler/scheduler_oracle.cpp
+++ b/hailort/libhailort/src/vdevice/scheduler/scheduler_oracle.cpp
@@ -17,14 +17,13 @@ namespace hailort
 scheduler_core_op_handle_t CoreOpsSchedulerOracle::choose_next_model(SchedulerBase &scheduler, const device_id_t &device_id, bool check_threshold)
 {
     auto device_info = scheduler.get_device_info(device_id);
-    auto priority_map = scheduler.get_core_op_priority_map();
+    auto &priority_map = scheduler.get_core_op_priority_map();
     for (auto iter = priority_map.rbegin(); iter != priority_map.rend(); ++iter) {
-        auto priority_group_size = iter->second.size();
+        auto &priority_group = iter->second;
 
-        for (uint32_t i = 0; i < priority_group_size; i++) {
-            uint32_t index = scheduler.get_next_core_op(iter->first) + i;
-            index %= static_cast<uint32_t>(priority_group_size);
-            auto core_op_handle = iter->second[index];
+        // Iterate all core ops inside the priority group starting from priority_group next core op
+        for (uint32_t i = 0; i < priority_group.size(); i++) {
+            auto core_op_handle = priority_group.get(i);
             auto ready_info = scheduler.is_core_op_ready(core_op_handle, check_threshold, device_id);
             if (ready_info.is_ready) {
                 // In cases device is idle the check_threshold is not needed, therefore is false.
@@ -32,9 +31,8 @@ scheduler_core_op_handle_t CoreOpsSchedulerOracle::choose_next_model(SchedulerBa
                 TRACE(OracleDecisionTrace, switch_because_idle, device_id, core_op_handle, ready_info.over_threshold, ready_info.over_timeout);
                 device_info->is_switching_core_op = true;
                 device_info->next_core_op_handle = core_op_handle;
-                // Set next to run as next in round-robin
-                index = ((index + 1) % static_cast<uint32_t>(priority_group_size));
-                scheduler.set_next_core_op(iter->first, index);
+                 // Set next to run as next in round-robin
+                priority_group.set_next(i + 1);
                 return core_op_handle;
             }
         }
@@ -52,14 +50,13 @@ bool CoreOpsSchedulerOracle::should_stop_streaming(SchedulerBase &scheduler, cor
     }
 
     // Now check if there is another qualified core op.
-    auto priority_map = scheduler.get_core_op_priority_map();
+    const auto &priority_map = scheduler.get_core_op_priority_map();
     for (auto iter = priority_map.rbegin(); (iter != priority_map.rend()) && (iter->first >= core_op_priority); ++iter) {
-        auto priority_group_size = iter->second.size();
+        auto &priority_group = iter->second;
 
-        for (uint32_t i = 0; i < priority_group_size; i++) {
-            uint32_t index = scheduler.get_next_core_op(iter->first) + i;
-            index %= static_cast<uint32_t>(priority_group_size);
-            auto core_op_handle = iter->second[index];
+        // Iterate all core ops inside the priority group starting from next_core_op_index
+        for (uint32_t i = 0; i < priority_group.size(); i++) {
+            auto core_op_handle = priority_group.get(i);
             if (!is_core_op_active(scheduler, core_op_handle) && scheduler.is_core_op_ready(core_op_handle, true, device_id).is_ready) {
                 return true;
             }
@@ -96,7 +93,7 @@ std::vector<RunParams> CoreOpsSchedulerOracle::get_oracle_decisions(SchedulerBas
         }
 
         // Check if device is idle
-        if (!active_device_info->is_switching_core_op && scheduler.is_device_idle(active_device_info->device_id)) {
+        if (!active_device_info->is_switching_core_op && active_device_info->is_idle()) {
             const bool CHECK_THRESHOLD = true;
             auto core_op_handle = choose_next_model(scheduler, active_device_info->device_id, CHECK_THRESHOLD);
             if (core_op_handle == INVALID_CORE_OP_HANDLE) {
diff --git a/hailort/libhailort/src/vdevice/vdevice.cpp b/hailort/libhailort/src/vdevice/vdevice.cpp
index 8b5cd8b..1b54131 100644
--- a/hailort/libhailort/src/vdevice/vdevice.cpp
+++ b/hailort/libhailort/src/vdevice/vdevice.cpp
@@ -12,6 +12,7 @@
 #include "hailo/hailort.h"
 #include "hailo/vdevice.hpp"
 #include "hailo/hailort_defaults.hpp"
+#include "hailo/infer_model.hpp"
 #include "utils/profiler/tracer_macros.hpp"
 
 #include "vdevice/vdevice_internal.hpp"
@@ -100,6 +101,21 @@ Expected<ConfigureNetworkParams> VDevice::create_configure_params(Hef &hef, cons
     return hef.create_configure_params(stream_interface.release(), network_group_name);
 }
 
+hailo_status VDevice::dma_map(void *address, size_t size, hailo_stream_direction_t direction)
+{
+    (void) address;
+    (void) size;
+    (void) direction;
+    return HAILO_NOT_IMPLEMENTED;
+}
+
+hailo_status VDevice::dma_unmap(void *address, hailo_stream_direction_t direction)
+{
+    (void) address;
+    (void) direction;
+    return HAILO_NOT_IMPLEMENTED;
+}
+
 hailo_status VDevice::before_fork()
 {
     return HAILO_SUCCESS;
@@ -182,7 +198,7 @@ Expected<hailo_stream_interface_t> VDeviceHandle::get_default_streams_interface(
     return vdevice.value()->get_default_streams_interface();
 }
 
-Expected<InferModel> VDeviceHandle::create_infer_model(const std::string &hef_path)
+Expected<std::shared_ptr<InferModel>> VDeviceHandle::create_infer_model(const std::string &hef_path)
 {
     auto &manager = SharedResourceManager<std::string, VDeviceBase>::get_instance();
     auto vdevice = manager.resource_lookup(m_handle);
@@ -206,17 +222,27 @@ VDeviceClient::VDeviceClient(std::unique_ptr<HailoRtRpcClient> client, VDeviceId
     std::vector<std::unique_ptr<Device>> &&devices) :
         m_client(std::move(client)),
         m_identifier(std::move(identifier)),
-        m_devices(std::move(devices))
+        m_devices(std::move(devices)),
+        m_is_listener_thread_running(false)
 {}
 
 VDeviceClient::~VDeviceClient()
 {
+    auto status = finish_listener_thread();
+    if (status != HAILO_SUCCESS) {
+        LOGGER__CRITICAL("Failed to finish_listener_thread in VDevice");
+    }
+
     // Note: We clear m_network_groups to prevent double destruction on ConfiguredNetworkGroupBase.
     // Explanation: When the VDeviceClient is destructed, it's members are destructed last.
     // That would cause the m_network_groups (vector of ConfiguredNetworkGroupClient) to be destructed after the vdevice in the service.
     // The vdevice in the service will destruct the ConfiguredNetworkGroupBase,
     // and then the ConfiguredNetworkGroupClient destructor will be called - causing double destruction on ConfiguredNetworkGroupBase.
-    m_network_groups.clear();
+    {
+        std::unique_lock<std::mutex> lock(m_mutex);
+        m_network_groups.clear();
+    }
+
     auto pid = OsUtils::get_curr_pid();
     auto reply = m_client->VDevice_release(m_identifier, pid);
     if (reply != HAILO_SUCCESS) {
@@ -226,8 +252,12 @@ VDeviceClient::~VDeviceClient()
 
 hailo_status VDeviceClient::before_fork()
 {
+    m_is_listener_thread_running = false;
+
     HailoRtRpcClientUtils::get_instance().before_fork();
     m_client.reset();
+    m_cb_listener_thread.reset();
+
     return HAILO_SUCCESS;
 }
 
@@ -246,12 +276,21 @@ hailo_status VDeviceClient::create_client()
 hailo_status VDeviceClient::after_fork_in_parent()
 {
     HailoRtRpcClientUtils::get_instance().after_fork_in_parent();
-    return create_client();
+    auto status = create_client();
+    CHECK_SUCCESS(status);
+
+    auto listener_status = start_listener_thread(m_identifier);
+    CHECK_SUCCESS(listener_status);
+
+    return HAILO_SUCCESS;
 }
 
 hailo_status VDeviceClient::after_fork_in_child()
 {
     HailoRtRpcClientUtils::get_instance().after_fork_in_child();
+    auto listener_status = start_listener_thread(m_identifier);
+    CHECK_SUCCESS(listener_status);
+
     return HAILO_SUCCESS;
 }
 
@@ -299,11 +338,76 @@ Expected<ConfiguredNetworkGroupVector> VDeviceClient::configure(Hef &hef,
         CHECK_NOT_NULL_AS_EXPECTED(network_group, HAILO_OUT_OF_HOST_MEMORY);
 
         networks.emplace_back(network_group);
-        m_network_groups.push_back(network_group);
+        {
+            std::unique_lock<std::mutex> lock(m_mutex);
+            m_network_groups.emplace(ng_handle, network_group);
+        }
     }
+
+    // Init listener thread only in case configure happens with async api
+    if ((configure_params.size() > 0) &&
+            configure_params.begin()->second.stream_params_by_name.begin()->second.flags == HAILO_STREAM_FLAGS_ASYNC) {
+        auto init_status = start_listener_thread(m_identifier);
+        CHECK_SUCCESS_AS_EXPECTED(init_status);
+    }
+
     return networks;
 }
 
+hailo_status VDeviceClient::start_listener_thread(VDeviceIdentifier identifier)
+{
+    if (m_is_listener_thread_running) {
+        return HAILO_SUCCESS;
+    }
+
+    m_cb_listener_thread = make_unique_nothrow<AsyncThread<hailo_status>>("SVC_LISTENER", [this, identifier] () {
+        return this->listener_run_in_thread(identifier);
+    });
+    CHECK_NOT_NULL(m_cb_listener_thread, HAILO_OUT_OF_HOST_MEMORY);
+    m_is_listener_thread_running = true;
+
+    return HAILO_SUCCESS;
+}
+
+hailo_status VDeviceClient::listener_run_in_thread(VDeviceIdentifier identifier)
+{
+    grpc::ChannelArguments ch_args;
+    ch_args.SetMaxReceiveMessageSize(-1);
+    auto channel = grpc::CreateCustomChannel(hailort::HAILORT_SERVICE_ADDRESS, grpc::InsecureChannelCredentials(), ch_args);
+    auto client = make_unique_nothrow<HailoRtRpcClient>(channel);
+    CHECK_NOT_NULL(client, HAILO_OUT_OF_HOST_MEMORY);
+
+    while (m_is_listener_thread_running) {
+        auto callback_id = client->VDevice_get_callback_id(identifier);
+        if (callback_id.status() == HAILO_SHUTDOWN_EVENT_SIGNALED) {
+            LOGGER__INFO("Shutdown event was signaled in listener_run_in_thread");
+            break;
+        }
+        CHECK_EXPECTED_AS_STATUS(callback_id);
+
+        std::shared_ptr<ConfiguredNetworkGroupClient> ng_ptr;
+        {
+            std::unique_lock<std::mutex> lock(m_mutex);
+            assert(contains(m_network_groups, callback_id->network_group_handle()));
+            ng_ptr = m_network_groups.at(callback_id->network_group_handle());
+        }
+        auto status = ng_ptr->execute_callback(callback_id.value());
+        CHECK_SUCCESS(status);
+    }
+
+    return HAILO_SUCCESS;
+}
+
+hailo_status VDeviceClient::finish_listener_thread()
+{
+    m_is_listener_thread_running = false;
+    auto status = m_client->VDevice_finish_callback_listener(m_identifier);
+    CHECK_SUCCESS(status);
+
+    m_cb_listener_thread.reset();
+    return HAILO_SUCCESS;
+}
+
 Expected<std::vector<std::reference_wrapper<Device>>> VDeviceClient::get_physical_devices() const
 {
     std::vector<std::reference_wrapper<Device>> devices_refs;
@@ -326,12 +430,6 @@ Expected<hailo_stream_interface_t> VDeviceClient::get_default_streams_interface(
     return m_client->VDevice_get_default_streams_interface(m_identifier);
 }
 
-Expected<InferModel> VDeviceClient::create_infer_model(const std::string &hef_path)
-{
-    (void)hef_path;
-    return make_unexpected(HAILO_NOT_IMPLEMENTED);
-}
-
 #endif // HAILO_SUPPORT_MULTI_PROCESS
 
 
@@ -403,6 +501,7 @@ hailo_status VDeviceBase::validate_params(const hailo_vdevice_params_t &params)
 Expected<std::unique_ptr<VDeviceBase>> VDeviceBase::create(const hailo_vdevice_params_t &params)
 {
     TRACE(InitProfilerProtoTrace);
+    TRACE(MonitorStartTrace);
 
     auto devices_expected = create_devices(params);
     CHECK_EXPECTED(devices_expected);
@@ -427,8 +526,6 @@ Expected<std::unique_ptr<VDeviceBase>> VDeviceBase::create(const hailo_vdevice_p
     }
     LOGGER__INFO("{}", vdevice_ids);
 
-    TRACE(MonitorStartTrace, uint32_t(device_ids.size()));
-
     CoreOpsSchedulerPtr scheduler_ptr;
     if (HAILO_SCHEDULING_ALGORITHM_NONE != params.scheduling_algorithm) {
         if (HAILO_SCHEDULING_ALGORITHM_ROUND_ROBIN == params.scheduling_algorithm) {
@@ -458,7 +555,7 @@ VDeviceBase::~VDeviceBase()
         // all interrupt dispatcher threads are idle.
         m_core_ops_scheduler->shutdown();
     }
-    TRACE(DumpProfilerState);
+    TRACE(DumpProfilerStateTrace);
 }
 
 Expected<ConfiguredNetworkGroupVector> VDeviceBase::configure(Hef &hef,
@@ -475,31 +572,47 @@ Expected<ConfiguredNetworkGroupVector> VDeviceBase::configure(Hef &hef,
 
     for (const auto &network_params_pair : local_config_params.value()) {
         std::vector<std::shared_ptr<CoreOp>> core_ops;
-        const bool use_multiplexer = should_use_multiplexer(network_params_pair.second);
+        const bool use_multiplexer = should_use_multiplexer();
 
         std::shared_ptr<VDeviceCoreOp> identical_core_op = nullptr;
         if (use_multiplexer) {
             for (auto &network_group : m_vdevice_core_ops) {
-                if (network_group->multiplexer_supported() && network_group->equals(hef, network_params_pair)) {
+                if (network_group->equals(hef, network_params_pair)) {
                     identical_core_op = network_group;
                     break;
                 }
             }
         }
-        std::shared_ptr<VDeviceCoreOp> vdevice_network_group = nullptr;
+        std::shared_ptr<VDeviceCoreOp> vdevice_core_op = nullptr;
         if (identical_core_op) {
-            auto vdevice_network_group_exp = VDeviceCoreOp::duplicate(identical_core_op, network_params_pair.second);
-            CHECK_EXPECTED(vdevice_network_group_exp);
-            vdevice_network_group = vdevice_network_group_exp.release();
+            auto vdevice_core_op_exp = VDeviceCoreOp::duplicate(identical_core_op, network_params_pair.second);
+            CHECK_EXPECTED(vdevice_core_op_exp);
+            vdevice_core_op = vdevice_core_op_exp.release();
         } else {
-            auto vdevice_network_group_expected = create_vdevice_network_group(hef, network_params_pair, use_multiplexer);
-            CHECK_EXPECTED(vdevice_network_group_expected);
-            vdevice_network_group = vdevice_network_group_expected.release();
-            m_vdevice_core_ops.push_back(vdevice_network_group);
+            auto vdevice_core_op_exp = create_vdevice_core_op(hef, network_params_pair);
+            CHECK_EXPECTED(vdevice_core_op_exp);
+            vdevice_core_op = vdevice_core_op_exp.release();
+            m_vdevice_core_ops.emplace_back(vdevice_core_op);
+        }
+
+        if (m_core_ops_scheduler) {
+            auto status = m_core_ops_scheduler->add_core_op(vdevice_core_op->core_op_handle(), vdevice_core_op);
+            CHECK_SUCCESS_AS_EXPECTED(status);
+
+            // On scheduler, the streams are always activated
+            for (auto &input : vdevice_core_op->get_input_streams()) {
+                status = dynamic_cast<InputStreamBase&>(input.get()).activate_stream();
+                CHECK_SUCCESS_AS_EXPECTED(status);
+            }
+
+            for (auto &output : vdevice_core_op->get_output_streams()) {
+                status = dynamic_cast<OutputStreamBase&>(output.get()).activate_stream();
+                CHECK_SUCCESS_AS_EXPECTED(status);
+            }
         }
 
-        core_ops.push_back(vdevice_network_group);
-        auto metadata = hef.pimpl->network_group_metadata(vdevice_network_group->name());
+        core_ops.push_back(vdevice_core_op);
+        auto metadata = hef.pimpl->network_group_metadata(vdevice_core_op->name());
         auto net_group_expected = ConfiguredNetworkGroupBase::create(network_params_pair.second, std::move(core_ops), std::move(metadata));
         CHECK_EXPECTED(net_group_expected);
         auto network_group_ptr = net_group_expected.release();
@@ -514,7 +627,7 @@ Expected<ConfiguredNetworkGroupVector> VDeviceBase::configure(Hef &hef,
     return added_network_groups;
 }
 
-Expected<InferModel> VDeviceBase::create_infer_model(const std::string &hef_path)
+Expected<std::shared_ptr<InferModel>> VDevice::create_infer_model(const std::string &hef_path)
 {
     auto hef_expected = Hef::create(hef_path);
     CHECK_EXPECTED(hef_expected);
@@ -545,7 +658,10 @@ Expected<InferModel> VDeviceBase::create_infer_model(const std::string &hef_path
         outputs.emplace(vstream_info.name, std::move(stream));
     }
 
-    return InferModel(*this, std::move(hef), std::move(inputs), std::move(outputs));
+    auto res = make_shared_nothrow<InferModel>(InferModel(*this, std::move(hef), std::move(inputs), std::move(outputs)));
+    CHECK_NOT_NULL_AS_EXPECTED(res, HAILO_OUT_OF_HOST_MEMORY);
+
+    return res;
 }
 
 Expected<hailo_stream_interface_t> VDeviceBase::get_default_streams_interface() const
@@ -584,6 +700,8 @@ Expected<std::map<device_id_t, std::unique_ptr<Device>>> VDeviceBase::create_dev
             CHECK_EXPECTED(device_arch);
             CHECK_AS_EXPECTED(HAILO_ARCH_HAILO8L != device_arch.value(), HAILO_INVALID_OPERATION,
                 "VDevice with multiple devices is not supported on HAILO_ARCH_HAILO8L. device {} is HAILO_ARCH_HAILO8L", device_id);
+            CHECK_AS_EXPECTED(HAILO_ARCH_HAILO15M != device_arch.value(), HAILO_INVALID_OPERATION,
+                "VDevice with multiple devices is not supported on HAILO_ARCH_HAILO15M. device {} is HAILO_ARCH_HAILO15M", device_id);
         }
 
         auto dev_type = Device::get_device_type(device_id);
@@ -663,75 +781,54 @@ Expected<NetworkGroupsParamsMap> VDeviceBase::create_local_config_params(Hef &he
     return local_config_params;
 }
 
-Expected<std::shared_ptr<VDeviceCoreOp>> VDeviceBase::create_vdevice_network_group(Hef &hef,
-    const std::pair<const std::string, ConfigureNetworkParams> &params, bool use_multiplexer)
+Expected<std::shared_ptr<CoreOp>> VDeviceBase::create_physical_core_op(Device &device, Hef &hef,
+    const std::string &core_op_name, const ConfigureNetworkParams &params)
 {
-    std::map<device_id_t, std::shared_ptr<CoreOp>> core_ops;
-
-    // configure all the devices to this ng and then push the core ops to bundle vector
-	for (const auto &pair : m_devices) {
-        auto &device = pair.second;
-
-        ConfigureNetworkParams low_level_params = params.second;
-        if (m_core_ops_scheduler) {
-            // When the scheduler is enabled, all low level streams must be async (even if the user uses sync API).
-            for (auto &stream_params : low_level_params.stream_params_by_name) {
-                stream_params.second.flags |= HAILO_STREAM_FLAGS_ASYNC;
-            }
-        }
-
-        auto ng_vector = device->configure(hef, { std::make_pair(params.first, low_level_params) });
-        CHECK_EXPECTED(ng_vector);
-
-        assert(1 == ng_vector->size());
-        auto network_group_base = std::dynamic_pointer_cast<ConfiguredNetworkGroupBase>(ng_vector.value()[0]);
-
-        auto networks_info = network_group_base->get_network_infos();
-        CHECK_EXPECTED(networks_info);
-        if (m_core_ops_scheduler && 1 < networks_info->size()) {
-            LOGGER__WARNING("Configuring '{}' which is a multi-networks model with scheduler enabled."
-                " The model will be scheduled only when all inputs and outputs of the network group will be ready",
-                network_group_base->name());
+    ConfigureNetworkParams params_copy = params;
+    if (m_core_ops_scheduler) {
+        // When the scheduler is enabled, all low level streams must be async (even if the user uses sync API).
+        for (auto &stream_params : params_copy.stream_params_by_name) {
+            stream_params.second.flags |= HAILO_STREAM_FLAGS_ASYNC;
         }
+    }
 
-        auto ng_core_ops = network_group_base->get_core_ops();
-        // To support several ng_core_ops, one should return vector of VDeviceCoreOp.
-        CHECK_AS_EXPECTED(ng_core_ops.size() == 1, HAILO_NOT_IMPLEMENTED,
-            "Only one core op for network group is supported");
+    auto ng_vector = device.configure(hef, { std::make_pair(core_op_name, params_copy) });
+    CHECK_EXPECTED(ng_vector);
 
-        core_ops.emplace(device->get_dev_id(), ng_core_ops[0]);
-    }
+    assert(1 == ng_vector->size());
+    auto &network_group_base = dynamic_cast<ConfiguredNetworkGroupBase&>(*ng_vector.value()[0]);
 
-    std::shared_ptr<PipelineMultiplexer> multiplexer = nullptr;
-    if (use_multiplexer) {
-        multiplexer = make_shared_nothrow<PipelineMultiplexer>();
-        CHECK_NOT_NULL_AS_EXPECTED(multiplexer, HAILO_OUT_OF_HOST_MEMORY);
+    auto networks_info = network_group_base.get_network_infos();
+    CHECK_EXPECTED(networks_info);
+    if (m_core_ops_scheduler && (networks_info->size() > 1)) {
+        LOGGER__WARNING("Configuring '{}' which is a multi-networks model with scheduler enabled."
+            " The model will be scheduled only when all inputs and outputs of the network group will be ready",
+            core_op_name);
     }
 
-    auto core_op_handle = allocate_core_op_handle();
-
-    auto vdevice_network_group_exp = VDeviceCoreOp::create(m_active_core_op_holder, params.second, core_ops,
-        m_core_ops_scheduler, core_op_handle, multiplexer, hef.hash());
-    CHECK_EXPECTED(vdevice_network_group_exp);
-    auto vdevice_network_group = vdevice_network_group_exp.release();
+    auto ng_core_ops = network_group_base.get_core_ops();
+    CHECK_AS_EXPECTED(ng_core_ops.size() == 1, HAILO_NOT_IMPLEMENTED,
+        "Only one core op for network group is supported");
 
-    if (m_core_ops_scheduler) {
-        auto status = m_core_ops_scheduler->add_core_op(core_op_handle, vdevice_network_group);
-        CHECK_SUCCESS_AS_EXPECTED(status);
+    auto core_op = ng_core_ops[0];
+    return core_op;
+}
 
-        // On scheduler, the streams are always activated
-        for (auto &input : vdevice_network_group->get_input_streams()) {
-            status = dynamic_cast<InputStreamBase&>(input.get()).activate_stream();
-            CHECK_SUCCESS_AS_EXPECTED(status);
-        }
+Expected<std::shared_ptr<VDeviceCoreOp>> VDeviceBase::create_vdevice_core_op(Hef &hef,
+    const std::pair<const std::string, ConfigureNetworkParams> &params)
+{
+    std::map<device_id_t, std::shared_ptr<CoreOp>> physical_core_ops;
 
-        for (auto &output : vdevice_network_group->get_output_streams()) {
-            status = dynamic_cast<OutputStreamBase&>(output.get()).activate_stream();
-            CHECK_SUCCESS_AS_EXPECTED(status);
-        }
+	for (const auto &device : m_devices) {
+        auto physical_core_op = create_physical_core_op(*device.second, hef, params.first, params.second);
+        CHECK_EXPECTED(physical_core_op);
+        physical_core_ops.emplace(device.first, physical_core_op.release());
     }
 
-    return vdevice_network_group;
+    auto core_op_handle = allocate_core_op_handle();
+
+    return VDeviceCoreOp::create(m_active_core_op_holder, params.second, physical_core_ops,
+        m_core_ops_scheduler, core_op_handle, hef.hash());
 }
 
 vdevice_core_op_handle_t VDeviceBase::allocate_core_op_handle()
@@ -739,24 +836,16 @@ vdevice_core_op_handle_t VDeviceBase::allocate_core_op_handle()
     return m_next_core_op_handle++;
 }
 
-bool VDeviceBase::should_use_multiplexer(const ConfigureNetworkParams &network_params)
+bool VDeviceBase::should_use_multiplexer()
 {
-    const auto &stream_params_by_name = network_params.stream_params_by_name;
-    const auto input_counts = std::count_if(stream_params_by_name.begin(), stream_params_by_name.end(),
-       [](const std::pair<std::string, hailo_stream_parameters_t> &stream_params) {
-            return HAILO_H2D_STREAM == stream_params.second.direction;
-       });
-
-    const bool has_async_stream = std::any_of(stream_params_by_name.begin(), stream_params_by_name.end(),
-         [](const std::pair<std::string, hailo_stream_parameters_t> &stream_params) {
-            return 0 != (stream_params.second.flags & HAILO_STREAM_FLAGS_ASYNC);
-        });
-
-    return
-        PipelineMultiplexer::is_multiplexer_supported() &&
-        m_core_ops_scheduler &&
-        input_counts == 1 && // TODO (HRT-8634): Support multi-inputs NGs (multi networks)
-        !has_async_stream;   // TODO (HRT-10557): Support async multiplexer
+    auto disable_multiplexer_env = std::getenv(DISABLE_MULTIPLEXER_ENV_VAR);
+    bool disabled_by_flag = (nullptr != disable_multiplexer_env) &&
+        (strnlen(disable_multiplexer_env, 2) == 1) &&
+        (strncmp(disable_multiplexer_env, "1", 1) == 0);
+    if (disabled_by_flag) {
+        LOGGER__WARNING("Usage of '{}' env variable is deprecated.", DISABLE_MULTIPLEXER_ENV_VAR);
+    }
+    return (!disabled_by_flag && m_core_ops_scheduler);
 }
 
 } /* namespace hailort */
diff --git a/hailort/libhailort/src/vdevice/vdevice_core_op.cpp b/hailort/libhailort/src/vdevice/vdevice_core_op.cpp
index 100aee4..153a885 100644
--- a/hailort/libhailort/src/vdevice/vdevice_core_op.cpp
+++ b/hailort/libhailort/src/vdevice/vdevice_core_op.cpp
@@ -10,7 +10,6 @@
 #include "vdevice/vdevice_core_op.hpp"
 #include "vdevice/scheduler/scheduled_stream.hpp"
 #include "vdevice/vdevice_native_stream.hpp"
-#include "vdevice/vdevice_stream_multiplexer_wrapper.hpp"
 #include "net_flow/pipeline/vstream_internal.hpp"
 
 #define INVALID_BATCH_SIZE (-1)
@@ -23,79 +22,105 @@ Expected<std::shared_ptr<VDeviceCoreOp>> VDeviceCoreOp::create(ActiveCoreOpHolde
     const ConfigureNetworkParams &configure_params,
     const std::map<device_id_t, std::shared_ptr<CoreOp>> &core_ops,
     CoreOpsSchedulerWeakPtr core_ops_scheduler, vdevice_core_op_handle_t core_op_handle,
-    std::shared_ptr<PipelineMultiplexer> multiplexer,
     const std::string &hef_hash)
 {
-    auto status = HAILO_UNINITIALIZED;
 
-    for (auto &core_op : core_ops)
-    {
+    for (auto &core_op : core_ops) {
         core_op.second->set_vdevice_core_op_handle(core_op_handle);
-        for (auto &stream : core_op.second->get_input_streams())
-        {
-            auto &native_stream = static_cast<VDeviceNativeInputStream&>(stream.get());
-            native_stream.set_vdevice_core_op_handle(core_op_handle);
+        for (auto &stream : core_op.second->get_input_streams()) {
+            auto &stream_base = dynamic_cast<InputStreamBase&>(stream.get());
+            stream_base.set_vdevice_core_op_handle(core_op_handle);
+        }
+        for (auto &stream : core_op.second->get_output_streams()) {
+            auto &stream_base = dynamic_cast<OutputStreamBase&>(stream.get());
+            stream_base.set_vdevice_core_op_handle(core_op_handle);
         }
     }
 
-    VDeviceCoreOp object(active_core_op_holder, configure_params, std::move(core_ops), core_ops_scheduler,
-        core_op_handle, multiplexer, hef_hash, status);
-    CHECK_SUCCESS_AS_EXPECTED(status);
+    // On HcpConfigCoreOp, we don't support get_async_max_queue_size (and the core op doesn't use the queue).
+    auto per_device_queue_size = core_ops.begin()->second->get_async_max_queue_size();
+    const auto queue_size = per_device_queue_size ? (*per_device_queue_size * core_ops.size()) : 0;
 
-    int batch_size = INVALID_BATCH_SIZE;
-    bool batch_size_equals = std::all_of(configure_params.network_params_by_name.begin(),
-        configure_params.network_params_by_name.end(), [&](std::pair<std::string, hailo_network_parameters_t> n_param_map) {
-        return n_param_map.second.batch_size == configure_params.network_params_by_name.begin()->second.batch_size;
-    });
-    if (batch_size_equals) {
-        batch_size = configure_params.network_params_by_name.begin()->second.batch_size;
-    }
+    auto status = HAILO_UNINITIALIZED;
+    auto vdevice_core_op = make_shared_nothrow<VDeviceCoreOp>(active_core_op_holder, configure_params,
+        std::move(core_ops), core_ops_scheduler, core_op_handle, hef_hash, queue_size, status);
+    CHECK_NOT_NULL_AS_EXPECTED(vdevice_core_op, HAILO_OUT_OF_HOST_MEMORY);
+    CHECK_SUCCESS_AS_EXPECTED(status);
 
-    // TODO HRT-11373: remove is_nms from monitor
-    TRACE(AddCoreOpTrace, "", object.name(), DEFAULT_SCHEDULER_TIMEOUT.count(), DEFAULT_SCHEDULER_MIN_THRESHOLD,
-        core_op_handle, object.is_nms(), batch_size);
-    status = object.create_vdevice_streams_from_config_params();
+    status = vdevice_core_op->create_vdevice_streams_from_config_params();
     CHECK_SUCCESS_AS_EXPECTED(status);
 
-    auto obj_ptr = make_shared_nothrow<VDeviceCoreOp>(std::move(object));
-    CHECK_NOT_NULL_AS_EXPECTED(obj_ptr, HAILO_OUT_OF_HOST_MEMORY);
+    status = vdevice_core_op->add_to_trace();
+    CHECK_SUCCESS_AS_EXPECTED(status);
 
-    return obj_ptr;
+    return vdevice_core_op;
 }
 
 Expected<std::shared_ptr<VDeviceCoreOp>> VDeviceCoreOp::duplicate(std::shared_ptr<VDeviceCoreOp> other,
     const ConfigureNetworkParams &configure_params)
 {
-    auto status = HAILO_UNINITIALIZED;
     auto copy = other->m_core_ops;
 
-    VDeviceCoreOp object(other->m_active_core_op_holder, configure_params, std::move(copy), other->m_core_ops_scheduler,
-        other->m_core_op_handle, other->m_multiplexer, other->m_hef_hash, status);
-    CHECK_SUCCESS_AS_EXPECTED(status);
+    // If m_infer_requests_accumulator does not exists (if the scheduler is not in use), we don't need queue size, so 
+    // we pass 0.
+    const auto queue_size = other->m_infer_requests_accumulator ?
+        other->m_infer_requests_accumulator->queue_size() : 0;
 
-    status = object.create_vdevice_streams_from_duplicate(other);
+    auto status = HAILO_UNINITIALIZED;
+    auto vdevice_core_op = make_shared_nothrow<VDeviceCoreOp>(other->m_active_core_op_holder, configure_params,
+        std::move(copy), other->m_core_ops_scheduler, other->m_core_op_handle,
+        other->m_hef_hash, queue_size, status);
+    CHECK_NOT_NULL_AS_EXPECTED(vdevice_core_op, HAILO_OUT_OF_HOST_MEMORY);
     CHECK_SUCCESS_AS_EXPECTED(status);
 
-    auto obj_ptr = make_shared_nothrow<VDeviceCoreOp>(std::move(object));
-    CHECK_NOT_NULL_AS_EXPECTED(obj_ptr, HAILO_OUT_OF_HOST_MEMORY);
+    status = vdevice_core_op->create_vdevice_streams_from_config_params();
+    CHECK_SUCCESS_AS_EXPECTED(status);
 
-    return obj_ptr;
+    return vdevice_core_op;
 }
 
-
 VDeviceCoreOp::VDeviceCoreOp(ActiveCoreOpHolder &active_core_op_holder,
     const ConfigureNetworkParams &configure_params,
     const std::map<device_id_t, std::shared_ptr<CoreOp>> &core_ops,
     CoreOpsSchedulerWeakPtr core_ops_scheduler, vdevice_core_op_handle_t core_op_handle,
-    std::shared_ptr<PipelineMultiplexer> multiplexer, const std::string &hef_hash, hailo_status &status) :
+    const std::string &hef_hash, size_t max_queue_size,
+    hailo_status &status) :
         CoreOp(configure_params, core_ops.begin()->second->m_metadata, active_core_op_holder, status),
         m_core_ops(std::move(core_ops)),
         m_core_ops_scheduler(core_ops_scheduler),
         m_core_op_handle(core_op_handle),
-        m_multiplexer(multiplexer),
-        m_multiplexer_handle(0),
-        m_hef_hash(hef_hash)
-{}
+        m_hef_hash(hef_hash),
+        m_infer_requests_accumulator(nullptr)
+{
+    if (HAILO_SUCCESS != status) {
+        // Failure from base class
+        return;
+    }
+
+    if (m_core_ops_scheduler.lock() && (max_queue_size > 0)) {
+        const auto streams_count = m_config_params.stream_params_by_name.size();
+        auto infer_request_accumulator =
+            make_shared_nothrow<InferRequestAccumulator>(streams_count, max_queue_size,
+                [this](InferRequest &&infer_request) {
+                    auto scheduler = m_core_ops_scheduler.lock();
+                    if (!scheduler) {
+                        LOGGER__ERROR("Frame accumulator is supported only when scheduler is enabled");
+                        return;
+                    }
+                    auto status = scheduler->enqueue_infer_request(m_core_op_handle, std::move(infer_request));
+                    if (HAILO_SUCCESS != status) {
+                        LOGGER__ERROR("Failed to enqueue infer request with status={}", status);
+                    }
+                });
+        if (!infer_request_accumulator) {
+            LOGGER__ERROR("Failed to allocated infer request accumulator");
+            status = HAILO_OUT_OF_HOST_MEMORY;
+            return;
+        }
+
+        m_infer_requests_accumulator = infer_request_accumulator;
+    }
+}
 
 Expected<hailo_stream_interface_t> VDeviceCoreOp::get_default_streams_interface()
 {
@@ -143,26 +168,6 @@ hailo_status VDeviceCoreOp::create_vdevice_streams_from_config_params()
         }
     }
 
-    for (const auto &input_stream : m_input_streams) {
-        if (HAILO_STREAM_INTERFACE_ETH == input_stream.second->get_interface()) {
-            continue;
-        }
-        auto expected_queue_size = input_stream.second->get_buffer_frames_size();
-        CHECK_EXPECTED_AS_STATUS(expected_queue_size);
-    }
-    for (const auto &output_stream : m_output_streams) {
-        if (HAILO_STREAM_INTERFACE_ETH == output_stream.second->get_interface()) {
-            continue;
-        }
-        auto expected_queue_size = output_stream.second->get_buffer_frames_size();
-        CHECK_EXPECTED_AS_STATUS(expected_queue_size);
-    }
-
-    if (m_multiplexer) {
-        auto status = m_multiplexer->add_core_op_instance(m_multiplexer_handle, *this);
-        CHECK_SUCCESS(status);
-    }
-
     return HAILO_SUCCESS;
 }
 
@@ -178,28 +183,19 @@ hailo_status VDeviceCoreOp::create_input_vdevice_stream_from_config_params(const
         auto &core_op = pair.second;
         auto stream = core_op->get_input_stream_by_name(stream_name);
         CHECK(stream, HAILO_INTERNAL_FAILURE);
-        TRACE(CreateCoreOpInputStreamsTrace, device_id, name(), stream_name, (uint32_t)stream->get().get_buffer_frames_size().value(),
-            core_op->vdevice_core_op_handle());
+
         low_level_streams.emplace(device_id, stream.release());
     }
 
     std::shared_ptr<InputStreamBase> input_stream = nullptr;
 
     if (m_core_ops_scheduler.lock()) {
+        assert(m_infer_requests_accumulator);
         auto scheduled_stream = ScheduledInputStream::create(std::move(low_level_streams),
-            edge_layer.value(), m_core_op_handle, m_core_ops_scheduler, m_core_op_activated_event);
+            edge_layer.value(), m_core_op_handle, m_core_op_activated_event, m_infer_requests_accumulator);
         CHECK_EXPECTED_AS_STATUS(scheduled_stream);
 
-        if (m_multiplexer) {
-            auto multiplexer_stream = VDeviceInputStreamMultiplexerWrapper::create(scheduled_stream.release(),
-                edge_layer->network_name, m_multiplexer);
-            CHECK_EXPECTED_AS_STATUS(multiplexer_stream);
-
-            input_stream = multiplexer_stream.release();
-        } else {
-            input_stream = scheduled_stream.release();
-        }
-
+        input_stream = scheduled_stream.release();
     } else {
         auto max_batch_size = get_stream_batch_size(stream_name);
         CHECK_EXPECTED_AS_STATUS(max_batch_size);
@@ -229,28 +225,18 @@ hailo_status VDeviceCoreOp::create_output_vdevice_stream_from_config_params(cons
         auto &core_op = pair.second;
         auto stream = core_op->get_output_stream_by_name(stream_name);
         CHECK(stream, HAILO_INTERNAL_FAILURE);
-        TRACE(CreateCoreOpOutputStreamsTrace, device_id, name(), stream_name, (uint32_t)stream->get().get_buffer_frames_size().value(),
-            core_op->vdevice_core_op_handle());
         low_level_streams.emplace(device_id, stream.release());
     }
 
     std::shared_ptr<OutputStreamBase> output_stream = nullptr;
 
     if (m_core_ops_scheduler.lock()) {
-        auto scheduled_stream = ScheduledOutputStream::create(std::move(low_level_streams), m_core_op_handle,
-            edge_layer.value(), m_core_op_activated_event, m_core_ops_scheduler);
+        assert(m_infer_requests_accumulator);
+        auto scheduled_stream = ScheduledOutputStream::create(std::move(low_level_streams),
+            m_core_op_handle, edge_layer.value(), m_core_op_activated_event, m_infer_requests_accumulator);
         CHECK_EXPECTED_AS_STATUS(scheduled_stream);
 
-        if (m_multiplexer) {
-            auto multiplexer_stream = VDeviceOutputStreamMultiplexerWrapper::create(scheduled_stream.release(),
-                edge_layer->network_name, m_multiplexer);
-            CHECK_EXPECTED_AS_STATUS(multiplexer_stream);
-
-            output_stream = multiplexer_stream.release();
-        } else {
-            output_stream = scheduled_stream.release();
-        }
-
+        output_stream = scheduled_stream.release();
     } else {
         auto max_batch_size = get_stream_batch_size(stream_name);
         CHECK_EXPECTED_AS_STATUS(max_batch_size);
@@ -268,56 +254,6 @@ hailo_status VDeviceCoreOp::create_output_vdevice_stream_from_config_params(cons
     return HAILO_SUCCESS;
 }
 
-hailo_status VDeviceCoreOp::create_vdevice_streams_from_duplicate(std::shared_ptr<VDeviceCoreOp> other)
-{
-    // TODO - HRT-6931 - raise error on this case
-    if (((m_config_params.latency & HAILO_LATENCY_MEASURE) == HAILO_LATENCY_MEASURE) && (1 < m_core_ops.size())) {
-        LOGGER__WARNING("Latency measurement is not supported on more than 1 physical device.");
-    }
-
-    assert(other->m_multiplexer != nullptr);
-    m_multiplexer_handle = other->multiplexer_duplicates_count() + 1;
-
-    for (const auto &stream_parameters_pair : m_config_params.stream_params_by_name) {
-        switch (stream_parameters_pair.second.direction) {
-        case HAILO_H2D_STREAM:
-        {
-            auto other_stream = other->get_input_stream_by_name(stream_parameters_pair.first);
-            CHECK_EXPECTED_AS_STATUS(other_stream);
-            auto &other_stream_wrapper = dynamic_cast<VDeviceInputStreamMultiplexerWrapper&>(other_stream->get());
-
-            auto copy = other_stream_wrapper.clone(m_multiplexer_handle);
-            CHECK_EXPECTED_AS_STATUS(copy);
-
-            auto status = add_input_stream(copy.release(), stream_parameters_pair.second);
-            CHECK_SUCCESS(status);
-            break;
-        }
-        case HAILO_D2H_STREAM:
-        {
-            auto other_stream = other->get_output_stream_by_name(stream_parameters_pair.first);
-            CHECK_EXPECTED_AS_STATUS(other_stream);
-            auto &other_stream_wrapper = dynamic_cast<VDeviceOutputStreamMultiplexerWrapper&>(other_stream->get());
-
-            auto copy = other_stream_wrapper.clone(m_multiplexer_handle);
-            CHECK_EXPECTED_AS_STATUS(copy);
-
-            auto status = add_output_stream(copy.release(), stream_parameters_pair.second);
-            CHECK_SUCCESS(status);
-            break;
-        }
-        default:
-            LOGGER__ERROR("stream name {} direction is invalid.", stream_parameters_pair.first);
-            return HAILO_INVALID_ARGUMENT;
-        }
-    }
-
-    auto status = other->m_multiplexer->add_core_op_instance(m_multiplexer_handle, *this);
-    CHECK_SUCCESS(status);
-
-    return HAILO_SUCCESS;
-}
-
 vdevice_core_op_handle_t VDeviceCoreOp::core_op_handle() const
 {
     return m_core_op_handle;
@@ -380,24 +316,6 @@ Expected<vdma::BoundaryChannelPtr> VDeviceCoreOp::get_boundary_vdma_channel_by_s
     return m_core_ops.begin()->second->get_boundary_vdma_channel_by_stream_name(stream_name);
 }
 
-void VDeviceCoreOp::set_vstreams_multiplexer_callbacks(std::vector<OutputVStream> &output_vstreams)
-{
-    if (nullptr == m_multiplexer) {
-        return;
-    }
-
-    m_multiplexer->set_output_vstreams_names(m_multiplexer_handle, output_vstreams);
-
-    for (auto &vstream : output_vstreams) {
-        static_cast<OutputVStreamImpl&>(*vstream.m_vstream).set_on_vstream_cant_read_callback([this, name = vstream.name()] () {
-            m_multiplexer->set_can_output_vstream_read(m_multiplexer_handle, name, false);
-        });
-        static_cast<OutputVStreamImpl&>(*vstream.m_vstream).set_on_vstream_can_read_callback([this, name = vstream.name()] () {
-            m_multiplexer->set_can_output_vstream_read(m_multiplexer_handle, name, true);
-        });
-    }
-}
-
 hailo_status VDeviceCoreOp::activate_impl(uint16_t dynamic_batch_size)
 {
     assert(!m_core_ops_scheduler.lock());
@@ -436,6 +354,54 @@ hailo_status VDeviceCoreOp::deactivate_impl()
     return status;
 }
 
+hailo_status VDeviceCoreOp::shutdown()
+{
+    hailo_status status = HAILO_SUCCESS; // Success oriented
+
+    auto abort_status = abort_low_level_streams();
+    if (HAILO_SUCCESS != abort_status) {
+        LOGGER__ERROR("Failed abort low level streams {}", abort_status);
+        status = abort_status;
+    }
+
+    if (m_core_ops_scheduler.lock()) {
+
+        auto deactivate_streams_status = deactivate_low_level_streams();
+        if (HAILO_SUCCESS != deactivate_streams_status) {
+            status = deactivate_streams_status;
+            // continue
+        }
+
+        m_core_ops_scheduler.lock()->remove_core_op(m_core_op_handle);
+
+        assert(m_infer_requests_accumulator);
+        auto queue_size = get_async_max_queue_size();
+        assert(queue_size);
+
+        const auto timeout = DEFAULT_TRANSFER_TIMEOUT * (*queue_size);
+        auto accumulator_shutdown_status = m_infer_requests_accumulator->shutdown(timeout);
+        if (HAILO_SUCCESS != accumulator_shutdown_status) {
+            status = accumulator_shutdown_status;
+            // continue
+        }
+
+        // Here, we can shutdown the VdmaConfigCoreOps if shutdown was called on last instance. We don't do it for now,
+        // since after shutdown the user can create another instance of this core op (so we need the resources
+        // available).
+
+    } else {
+        for (auto &core_op : m_core_ops) {
+            auto shutdown_status = core_op.second->shutdown();
+            if (HAILO_SUCCESS != status) {
+                LOGGER__ERROR("Failed shutdown core op for device {}", core_op.first);
+                status = shutdown_status; // continue on failure
+            }
+        }
+    }
+
+    return status;
+}
+
 Expected<std::shared_ptr<VdmaConfigCoreOp>> VDeviceCoreOp::get_core_op_by_device_id(const device_id_t &device_id)
 {
     CHECK_AS_EXPECTED(m_core_ops.count(device_id), HAILO_INVALID_ARGUMENT);
@@ -444,6 +410,11 @@ Expected<std::shared_ptr<VdmaConfigCoreOp>> VDeviceCoreOp::get_core_op_by_device
     return core_op;
 }
 
+Expected<size_t> VDeviceCoreOp::get_async_max_queue_size_per_device() const
+{
+    return m_core_ops.begin()->second->get_async_max_queue_size();
+}
+
 Expected<HwInferResults> VDeviceCoreOp::run_hw_infer_estimator()
 {
     CHECK_AS_EXPECTED(1 == m_core_ops.size(), HAILO_INVALID_OPERATION,
@@ -458,4 +429,32 @@ Expected<Buffer> VDeviceCoreOp::get_intermediate_buffer(const IntermediateBuffer
     return m_core_ops.begin()->second->get_intermediate_buffer(key);
 }
 
+hailo_status VDeviceCoreOp::add_to_trace()
+{
+    const auto batch_size = get_stream_batch_size(m_config_params.stream_params_by_name.begin()->first);
+    CHECK_EXPECTED_AS_STATUS(batch_size);
+
+    TRACE(AddCoreOpTrace, name(), DEFAULT_SCHEDULER_TIMEOUT.count(), DEFAULT_SCHEDULER_MIN_THRESHOLD,
+        m_core_op_handle, *batch_size);
+
+    const auto stream_interface = get_default_streams_interface();
+    CHECK_EXPECTED_AS_STATUS(stream_interface);
+
+    if (*stream_interface != HAILO_STREAM_INTERFACE_ETH) {
+        for (const auto &core_op : m_core_ops) {
+            auto queue_size_exp = core_op.second->get_async_max_queue_size();
+            CHECK_EXPECTED_AS_STATUS(queue_size_exp);
+            const uint32_t queue_size = static_cast<uint32_t>(*queue_size_exp);
+
+            for (const auto &stream_params : m_config_params.stream_params_by_name) {
+                (stream_params.second.direction == HAILO_H2D_STREAM) ?
+                    TRACE(AddStreamH2DTrace, core_op.first, name(), stream_params.first, queue_size, m_core_op_handle) :
+                    TRACE(AddStreamD2HTrace, core_op.first, name(), stream_params.first, queue_size, m_core_op_handle);
+            }
+        }
+    }
+
+    return HAILO_SUCCESS;
+}
+
 } /* namespace hailort */
diff --git a/hailort/libhailort/src/vdevice/vdevice_core_op.hpp b/hailort/libhailort/src/vdevice/vdevice_core_op.hpp
index f0b1dd6..b9c3af6 100644
--- a/hailort/libhailort/src/vdevice/vdevice_core_op.hpp
+++ b/hailort/libhailort/src/vdevice/vdevice_core_op.hpp
@@ -17,7 +17,7 @@
 #include "hailo/vstream.hpp"
 
 #include "vdevice/scheduler/scheduler.hpp"
-#include "vdevice/pipeline_multiplexer.hpp"
+#include "vdevice/scheduler/infer_request_accumulator.hpp"
 #include "utils/profiler/tracer_macros.hpp"
 
 #include <cstdint>
@@ -35,7 +35,7 @@ public:
         const ConfigureNetworkParams &configure_params,
         const std::map<device_id_t, std::shared_ptr<CoreOp>> &core_ops,
         CoreOpsSchedulerWeakPtr core_ops_scheduler, vdevice_core_op_handle_t core_op_handle,
-        std::shared_ptr<PipelineMultiplexer> multiplexer, const std::string &hef_hash);
+        const std::string &hef_hash);
 
     static Expected<std::shared_ptr<VDeviceCoreOp>> duplicate(std::shared_ptr<VDeviceCoreOp> other,
         const ConfigureNetworkParams &configure_params);
@@ -60,21 +60,6 @@ public:
         return false;
     }
 
-    uint32_t multiplexer_duplicates_count() const
-    {
-        if (m_multiplexer) {
-            assert(m_multiplexer->instances_count() > 0);
-            return static_cast<uint32_t>(m_multiplexer->instances_count() - 1);
-        } else {
-            return 0;
-        }
-    }
-
-    bool multiplexer_supported() const
-    {
-        return nullptr != m_multiplexer;
-    }
-
     virtual Expected<hailo_stream_interface_t> get_default_streams_interface() override;
 
     virtual Expected<std::shared_ptr<LatencyMetersMap>> get_latency_meters() override;
@@ -87,8 +72,6 @@ public:
     virtual hailo_status set_scheduler_threshold(uint32_t threshold, const std::string &network_name) override;
     virtual hailo_status set_scheduler_priority(uint8_t priority, const std::string &network_name) override;
 
-    void set_vstreams_multiplexer_callbacks(std::vector<OutputVStream> &output_vstreams) override;
-
     virtual hailo_status wait_for_activation(const std::chrono::milliseconds &timeout) override
     {
         CHECK(!m_core_ops_scheduler.lock(), HAILO_INVALID_OPERATION,
@@ -99,20 +82,25 @@ public:
 
     virtual hailo_status activate_impl(uint16_t dynamic_batch_size) override;
     virtual hailo_status deactivate_impl() override;
+    virtual hailo_status shutdown() override;
 
+    size_t devices_count() const { return m_core_ops.size(); }
     Expected<std::shared_ptr<VdmaConfigCoreOp>> get_core_op_by_device_id(const device_id_t &device_bdf_id);
 
+    Expected<size_t> get_async_max_queue_size_per_device() const;
+
     virtual Expected<HwInferResults> run_hw_infer_estimator() override;
     virtual Expected<Buffer> get_intermediate_buffer(const IntermediateBufferKey &) override;
 
-private:
     VDeviceCoreOp(ActiveCoreOpHolder &active_core_op_holder,
         const ConfigureNetworkParams &configure_params,
         const std::map<device_id_t, std::shared_ptr<CoreOp>> &core_ops,
         CoreOpsSchedulerWeakPtr core_ops_scheduler, scheduler_core_op_handle_t core_op_handle,
-        std::shared_ptr<PipelineMultiplexer> multiplexer, // TODO: multiplexer handle
-        const std::string &hef_hash, hailo_status &status);
+        const std::string &hef_hash,
+        size_t max_queue_size,
+        hailo_status &status);
 
+private:
     hailo_status create_vdevice_streams_from_config_params();
     hailo_status create_input_vdevice_stream_from_config_params(
         const hailo_stream_parameters_t &stream_params, const std::string &stream_name);
@@ -121,12 +109,14 @@ private:
 
     hailo_status create_vdevice_streams_from_duplicate(std::shared_ptr<VDeviceCoreOp> other);
 
+    hailo_status add_to_trace();
+
     std::map<device_id_t, std::shared_ptr<CoreOp>> m_core_ops;
     CoreOpsSchedulerWeakPtr m_core_ops_scheduler;
     const vdevice_core_op_handle_t m_core_op_handle;
-    std::shared_ptr<PipelineMultiplexer> m_multiplexer;
-    multiplexer_core_op_handle_t m_multiplexer_handle;
     std::string m_hef_hash;
+
+    std::shared_ptr<InferRequestAccumulator> m_infer_requests_accumulator;
 };
 
 }
diff --git a/hailort/libhailort/src/vdevice/vdevice_internal.hpp b/hailort/libhailort/src/vdevice/vdevice_internal.hpp
index 6a059dd..4e5ed9e 100644
--- a/hailort/libhailort/src/vdevice/vdevice_internal.hpp
+++ b/hailort/libhailort/src/vdevice/vdevice_internal.hpp
@@ -24,6 +24,7 @@
 #include "hailo/hailort.h"
 #include "hailo/vdevice.hpp"
 
+#include "common/async_thread.hpp"
 #include "vdma/vdma_device.hpp"
 #include "vdma/vdma_config_manager.hpp"
 #include "vdevice/vdevice_core_op.hpp"
@@ -37,7 +38,7 @@
 namespace hailort
 {
 
-
+#define DISABLE_MULTIPLEXER_ENV_VAR "HAILO_DISABLE_MULTIPLEXER_INTERNAL"
 class VDeviceBase : public VDevice
 {
 public:
@@ -78,11 +79,37 @@ public:
         return m_core_ops_scheduler;
     }
 
-    virtual Expected<InferModel> create_infer_model(const std::string &hef_path) override;
-
     // Currently only homogeneous vDevice is allow (= all devices are from the same type)
     virtual Expected<hailo_stream_interface_t> get_default_streams_interface() const override;
 
+    virtual hailo_status dma_map(void *address, size_t size, hailo_stream_direction_t direction) override
+    {
+        for (const auto &pair : m_devices) {
+            auto &device = pair.second;
+            const auto status = device->dma_map(address, size, direction);
+            CHECK_SUCCESS(status);
+        }
+        return HAILO_SUCCESS;
+    }
+
+    virtual hailo_status dma_unmap(void *address, hailo_stream_direction_t direction) override
+    {
+        hailo_status status = HAILO_SUCCESS;
+        for (const auto &pair : m_devices) {
+            auto &device = pair.second;
+            // Best effort, propagate first error
+            const auto unmap_status = device->dma_unmap(address, direction);
+            if (HAILO_SUCCESS != unmap_status) {
+                LOGGER__ERROR("Failed unmapping user buffer {} with status {}", address, unmap_status);
+                if (HAILO_SUCCESS == status) {
+                    status = unmap_status;
+                }
+            }
+        }
+
+        return status;
+    }
+
     static hailo_status validate_params(const hailo_vdevice_params_t &params);
 
 private:
@@ -93,9 +120,11 @@ private:
     static Expected<std::map<device_id_t, std::unique_ptr<Device>>> create_devices(const hailo_vdevice_params_t &params);
     static Expected<std::vector<std::string>> get_device_ids(const hailo_vdevice_params_t &params);
     Expected<NetworkGroupsParamsMap> create_local_config_params(Hef &hef, const NetworkGroupsParamsMap &configure_params);
-    Expected<std::shared_ptr<VDeviceCoreOp>> create_vdevice_network_group(Hef &hef,
-        const std::pair<const std::string, ConfigureNetworkParams> &params, bool use_multiplexer);
-    bool should_use_multiplexer(const ConfigureNetworkParams &params);
+    Expected<std::shared_ptr<VDeviceCoreOp>> create_vdevice_core_op(Hef &hef,
+        const std::pair<const std::string, ConfigureNetworkParams> &params);
+    Expected<std::shared_ptr<CoreOp>> create_physical_core_op(Device &device, Hef &hef, const std::string &core_op_name,
+        const ConfigureNetworkParams &params);
+    bool should_use_multiplexer();
     vdevice_core_op_handle_t allocate_core_op_handle();
 
     std::map<device_id_t, std::unique_ptr<Device>> m_devices;
@@ -108,6 +137,8 @@ private:
 };
 
 #ifdef HAILO_SUPPORT_MULTI_PROCESS
+using network_group_handle_t = uint32_t;
+
 class VDeviceClient : public VDevice
 {
 public:
@@ -126,7 +157,6 @@ public:
 
     Expected<std::vector<std::string>> get_physical_devices_ids() const override;
     Expected<hailo_stream_interface_t> get_default_streams_interface() const override;
-    virtual Expected<InferModel> create_infer_model(const std::string &hef_path) override;
 
     virtual hailo_status before_fork() override;
     virtual hailo_status after_fork_in_parent() override;
@@ -136,11 +166,19 @@ private:
     VDeviceClient(std::unique_ptr<HailoRtRpcClient> client, VDeviceIdentifier &&identifier, std::vector<std::unique_ptr<hailort::Device>> &&devices);
 
     hailo_status create_client();
+    hailo_status start_listener_thread(VDeviceIdentifier identifier);
+    hailo_status listener_run_in_thread(VDeviceIdentifier identifier);
+    hailo_status finish_listener_thread();
 
     std::unique_ptr<HailoRtRpcClient> m_client;
     VDeviceIdentifier m_identifier;
     std::vector<std::unique_ptr<Device>> m_devices;
-    std::vector<std::shared_ptr<ConfiguredNetworkGroup>> m_network_groups;
+
+    std::mutex m_mutex;
+    std::unordered_map<network_group_handle_t, std::shared_ptr<ConfiguredNetworkGroupClient>> m_network_groups;
+
+    AsyncThreadPtr<hailo_status> m_cb_listener_thread;
+    std::atomic_bool m_is_listener_thread_running;
 };
 
 #endif // HAILO_SUPPORT_MULTI_PROCESS
@@ -162,7 +200,7 @@ public:
     Expected<std::vector<std::reference_wrapper<Device>>> get_physical_devices() const override;
     Expected<std::vector<std::string>> get_physical_devices_ids() const override;
     Expected<hailo_stream_interface_t> get_default_streams_interface() const override;
-    Expected<InferModel> create_infer_model(const std::string &hef_path) override;
+    Expected<std::shared_ptr<InferModel>> create_infer_model(const std::string &hef_path) override;
 
 private:
     VDeviceHandle(uint32_t handle);
diff --git a/hailort/libhailort/src/vdevice/vdevice_native_stream.cpp b/hailort/libhailort/src/vdevice/vdevice_native_stream.cpp
index 05b00b9..39ab8a3 100644
--- a/hailort/libhailort/src/vdevice/vdevice_native_stream.cpp
+++ b/hailort/libhailort/src/vdevice/vdevice_native_stream.cpp
@@ -22,7 +22,7 @@ Expected<std::unique_ptr<VDeviceNativeInputStream>> VDeviceNativeInputStream::cr
     vdevice_core_op_handle_t core_op_handle)
 {
     std::unique_ptr<CallbackReorderQueue> reorder_queue = nullptr;
-    if (auto max_queue_size_per_stream = streams.begin()->second.get().get_buffer_frames_size()) {
+    if (auto max_queue_size_per_stream = streams.begin()->second.get().get_async_max_queue_size()) {
         const auto max_queue_size = max_queue_size_per_stream.value() * streams.size();
         reorder_queue = make_unique_nothrow<CallbackReorderQueue>(max_queue_size);
         CHECK_NOT_NULL_AS_EXPECTED(reorder_queue, HAILO_OUT_OF_HOST_MEMORY);
@@ -60,13 +60,13 @@ hailo_status VDeviceNativeInputStream::deactivate_stream()
     return HAILO_SUCCESS;
 }
 
-hailo_status VDeviceNativeInputStream::abort()
+hailo_status VDeviceNativeInputStream::abort_impl()
 {
     auto status = HAILO_SUCCESS; // Best effort
     for (auto &pair: m_streams){
         const auto &device_id = pair.first;
         auto &stream = pair.second;
-        auto abort_status = stream.get().abort();
+        auto abort_status = stream.get().abort_impl();
         if (HAILO_SUCCESS != status) {
             LOGGER__ERROR("Failed to abort input stream. (status: {} device: {})", status, device_id);
             status = abort_status;
@@ -75,13 +75,13 @@ hailo_status VDeviceNativeInputStream::abort()
     return status;
 }
 
-hailo_status VDeviceNativeInputStream::clear_abort()
+hailo_status VDeviceNativeInputStream::clear_abort_impl()
 {
     auto status = HAILO_SUCCESS; // Best effort
     for (auto &pair: m_streams){
         const auto &device_id = pair.first;
         auto &stream = pair.second;
-        auto clear_abort_status = stream.get().clear_abort();
+        auto clear_abort_status = stream.get().clear_abort_impl();
         if ((HAILO_SUCCESS != clear_abort_status) && (HAILO_STREAM_NOT_ACTIVATED != clear_abort_status)) {
             LOGGER__ERROR("Failed to clear abort input stream. (status: {} device: {})", clear_abort_status, device_id);
             status = clear_abort_status;
@@ -114,12 +114,6 @@ hailo_stream_interface_t VDeviceNativeInputStream::get_interface() const
     return m_streams.begin()->second.get().get_interface();
 }
 
-Expected<size_t> VDeviceNativeInputStream::get_buffer_frames_size() const
-{
-    // All get_buffer_frames_size values of m_streams should be the same
-    return m_streams.begin()->second.get().get_buffer_frames_size();
-}
-
 hailo_status VDeviceNativeInputStream::flush()
 {
     auto status = HAILO_SUCCESS; // Best effort
@@ -137,7 +131,7 @@ hailo_status VDeviceNativeInputStream::flush()
 
 hailo_status VDeviceNativeInputStream::write_impl(const MemoryView &buffer)
 {
-    TRACE(WriteFrameTrace, m_core_op_handle, name());
+    TRACE(FrameEnqueueH2DTrace, m_core_op_handle, name());
 
     auto status = next_stream().write_impl(buffer);
     if ((HAILO_STREAM_ABORTED_BY_USER == status) || (HAILO_STREAM_NOT_ACTIVATED == status)){
@@ -161,19 +155,26 @@ Expected<size_t> VDeviceNativeInputStream::get_async_max_queue_size() const
     // since we transfer an entire batch for each device at a time (so even if we have place
     // to transfer in other streams, we first finishes the batch).
     // To overcome this problem, we check how many "batches" we can transfer at a time (batch_count_queued)
-    // and make sure the queue for each stream contains a specific batch. We can potentaily transfer
-    // the resuide of the batch from last device, but then we will have problems with non-batch aligned
+    // and make sure the queue for each stream contains a specific batch. We can potentially transfer
+    // the residue of the batch from last device, but then we will have problems with non-batch aligned
     // transfers.
     auto &first_stream = m_streams.begin()->second.get();
     const auto max_queue_per_stream = first_stream.get_async_max_queue_size();
     CHECK_EXPECTED(max_queue_per_stream);
 
-    assert(*max_queue_per_stream >= m_batch_size);
-
-    const auto batch_count_queued = *max_queue_per_stream / m_batch_size;
-    const auto actual_queue_per_stream = m_batch_size * batch_count_queued;
-
-    return actual_queue_per_stream * m_streams.size();
+    if (*max_queue_per_stream >= m_batch_size) {
+        const auto batch_count_queued = *max_queue_per_stream / m_batch_size;
+        const auto actual_queue_per_stream = m_batch_size * batch_count_queued;
+        return actual_queue_per_stream * m_streams.size();
+    } else {
+        size_t max_queue_size = 0;
+        for (size_t i = 1; i <= *max_queue_per_stream; i++) {
+            if ((m_batch_size % i) == 0) {
+                max_queue_size = i;
+            }
+        }
+        return max_queue_size * m_streams.size();
+    }
 }
 
 hailo_status VDeviceNativeInputStream::write_async(TransferRequest &&transfer_request)
@@ -182,7 +183,7 @@ hailo_status VDeviceNativeInputStream::write_async(TransferRequest &&transfer_re
     CHECK(m_callback_reorder_queue, HAILO_INVALID_OPERATION, "Stream does not support async api");
     transfer_request.callback = m_callback_reorder_queue->wrap_callback(transfer_request.callback);
 
-    TRACE(WriteFrameTrace, m_core_op_handle, name());
+    TRACE(FrameEnqueueH2DTrace, m_core_op_handle, name());
 
     auto status = next_stream().write_async(std::move(transfer_request));
     if (HAILO_SUCCESS != status) {
@@ -220,7 +221,7 @@ Expected<std::unique_ptr<VDeviceNativeOutputStream>> VDeviceNativeOutputStream::
     vdevice_core_op_handle_t core_op_handle)
 {
     std::unique_ptr<CallbackReorderQueue> reorder_queue = nullptr;
-    if (auto max_queue_size_per_stream = streams.begin()->second.get().get_buffer_frames_size()) {
+    if (auto max_queue_size_per_stream = streams.begin()->second.get().get_async_max_queue_size()) {
         const auto max_queue_size = max_queue_size_per_stream.value() * streams.size();
         reorder_queue = make_unique_nothrow<CallbackReorderQueue>(max_queue_size);
         CHECK_NOT_NULL_AS_EXPECTED(reorder_queue, HAILO_OUT_OF_HOST_MEMORY);
@@ -258,13 +259,13 @@ hailo_status VDeviceNativeOutputStream::deactivate_stream()
     return HAILO_SUCCESS;
 }
 
-hailo_status VDeviceNativeOutputStream::abort()
+hailo_status VDeviceNativeOutputStream::abort_impl()
 {
     auto status = HAILO_SUCCESS; // Best effort
     for (const auto &pair : m_streams) {
         const auto &device_id = pair.first;
         auto &stream = pair.second;
-        auto abort_status = stream.get().abort();
+        auto abort_status = stream.get().abort_impl();
         if (HAILO_SUCCESS != status) {
             LOGGER__ERROR("Failed to abort output stream. (status: {} device: {})", status, device_id);
             status = abort_status;
@@ -274,13 +275,13 @@ hailo_status VDeviceNativeOutputStream::abort()
     return status;
 }
 
-hailo_status VDeviceNativeOutputStream::clear_abort()
+hailo_status VDeviceNativeOutputStream::clear_abort_impl()
 {
     auto status = HAILO_SUCCESS; // Best effort
     for (const auto &pair : m_streams) {
         const auto &device_id = pair.first;
         auto &stream = pair.second;
-        auto clear_abort_status = stream.get().clear_abort();
+        auto clear_abort_status = stream.get().clear_abort_impl();
         if ((HAILO_SUCCESS != clear_abort_status) && (HAILO_STREAM_NOT_ACTIVATED != clear_abort_status)) {
             LOGGER__ERROR("Failed to clear abort output stream. (status: {} device: {})", clear_abort_status, device_id);
             status = clear_abort_status;
@@ -313,12 +314,6 @@ hailo_stream_interface_t VDeviceNativeOutputStream::get_interface() const
     return m_streams.begin()->second.get().get_interface();
 }
 
-Expected<size_t> VDeviceNativeOutputStream::get_buffer_frames_size() const
-{
-    // All get_buffer_frames_size values of m_streams should be the same
-    return m_streams.begin()->second.get().get_buffer_frames_size();
-}
-
 hailo_status VDeviceNativeOutputStream::read_impl(MemoryView buffer)
 {
     auto status = next_stream().read_impl(buffer);
@@ -328,7 +323,9 @@ hailo_status VDeviceNativeOutputStream::read_impl(MemoryView buffer)
     }
     CHECK_SUCCESS(status, "Failed read from stream (device: {})", m_next_transfer_stream);
 
-    TRACE(ReadFrameTrace, m_core_op_handle, name());
+    if (INVALID_CORE_OP_HANDLE != m_core_op_handle) {
+        TRACE(FrameDequeueD2HTrace, m_core_op_handle, name());
+    }
 
     advance_stream();
     return HAILO_SUCCESS;
@@ -352,12 +349,19 @@ Expected<size_t> VDeviceNativeOutputStream::get_async_max_queue_size() const
     const auto max_queue_per_stream = first_stream.get_async_max_queue_size();
     CHECK_EXPECTED(max_queue_per_stream);
 
-    assert(*max_queue_per_stream >= m_batch_size);
-
-    const auto batch_count_queued = *max_queue_per_stream / m_batch_size;
-    const auto actual_queue_per_stream = m_batch_size * batch_count_queued;
-
-    return actual_queue_per_stream * m_streams.size();
+    if (*max_queue_per_stream >= m_batch_size) {
+        const auto batch_count_queued = *max_queue_per_stream / m_batch_size;
+        const auto actual_queue_per_stream = m_batch_size * batch_count_queued;
+        return actual_queue_per_stream * m_streams.size();
+    } else {
+        size_t max_queue_size = 0;
+        for (size_t i = 1; i <= *max_queue_per_stream; i++) {
+            if ((m_batch_size % i) == 0) {
+                max_queue_size = i;
+            }
+        }
+        return max_queue_size * m_streams.size();
+    }
 }
 
 hailo_status VDeviceNativeOutputStream::read_async(TransferRequest &&transfer_request)
@@ -369,11 +373,10 @@ hailo_status VDeviceNativeOutputStream::read_async(TransferRequest &&transfer_re
     auto reorder_queue_callback = m_callback_reorder_queue->wrap_callback(transfer_request.callback);
 
     transfer_request.callback = [this, callback=reorder_queue_callback](hailo_status status) {
-        if (HAILO_SUCCESS == status) {
-            TRACE(ReadFrameTrace, m_core_op_handle, name());
-        }
-
         callback(status);
+        if ((HAILO_SUCCESS == status) && (INVALID_CORE_OP_HANDLE != m_core_op_handle)) {
+            TRACE(FrameDequeueD2HTrace, m_core_op_handle, name());
+        }
     };
 
     auto status = next_stream().read_async(std::move(transfer_request));
@@ -387,6 +390,15 @@ hailo_status VDeviceNativeOutputStream::read_async(TransferRequest &&transfer_re
     return HAILO_SUCCESS;
 }
 
+hailo_status VDeviceNativeOutputStream::read_unaligned_address_async(const MemoryView &buffer,
+    const TransferDoneCallback &user_callback)
+{
+    auto status = next_stream().read_unaligned_address_async(buffer, user_callback);
+    CHECK_SUCCESS(status);
+    advance_stream();
+    return HAILO_SUCCESS;
+}
+
 OutputStreamBase &VDeviceNativeOutputStream::next_stream()
 {
     return m_streams.at(m_next_transfer_stream).get();
diff --git a/hailort/libhailort/src/vdevice/vdevice_native_stream.hpp b/hailort/libhailort/src/vdevice/vdevice_native_stream.hpp
index 452095c..14cc53d 100644
--- a/hailort/libhailort/src/vdevice/vdevice_native_stream.hpp
+++ b/hailort/libhailort/src/vdevice/vdevice_native_stream.hpp
@@ -40,8 +40,7 @@ public:
         vdevice_core_op_handle_t core_op_handle,
         std::unique_ptr<CallbackReorderQueue> &&callback_reorder_queue,
         hailo_status &status) :
-            InputStreamBase(layer_info, streams.begin()->second.get().get_interface(),
-                            std::move(core_op_activated_event), status),
+            InputStreamBase(layer_info, std::move(core_op_activated_event), status),
             m_streams(std::move(streams)),
             m_next_transfer_stream(m_streams.begin()->first),
             m_acc_frames(0),
@@ -53,15 +52,14 @@ public:
     virtual hailo_status set_buffer_mode(StreamBufferMode buffer_mode) override;
     virtual hailo_status activate_stream() override;
     virtual hailo_status deactivate_stream() override;
-    virtual hailo_status abort() override;
-    virtual hailo_status clear_abort() override;
+    virtual hailo_status abort_impl() override;
+    virtual hailo_status clear_abort_impl() override;
     virtual bool is_scheduled() override { return false; };
 
     virtual hailo_stream_interface_t get_interface() const override;
     virtual std::chrono::milliseconds get_timeout() const override;
     virtual hailo_status set_timeout(std::chrono::milliseconds timeout) override;
 
-    virtual Expected<size_t> get_buffer_frames_size() const override;
     virtual hailo_status flush() override;
 
     virtual hailo_status write_impl(const MemoryView &buffer) override;
@@ -98,44 +96,31 @@ public:
         vdevice_core_op_handle_t core_op_handle,
         std::unique_ptr<CallbackReorderQueue> &&callback_reorder_queue,
         hailo_status &status) :
-            OutputStreamBase(layer_info, streams.begin()->second.get().get_interface(),
-                                    std::move(core_op_activated_event), status),
+            OutputStreamBase(layer_info, std::move(core_op_activated_event), status),
             m_streams(std::move(streams)),
             m_next_transfer_stream(m_streams.begin()->first),
             m_acc_frames(0),
             m_batch_size(batch_size),
             m_core_op_handle(core_op_handle),
             m_callback_reorder_queue(std::move(callback_reorder_queue))
-    {
-        for (auto &output_stream :  m_streams) {
-            if (HAILO_STREAM_INTERFACE_ETH != output_stream.second.get().get_interface()) {
-                auto register_status = output_stream.second.get().register_interrupt_callback(
-                    [core_op_handle=m_core_op_handle, name=name(), device_id=output_stream.first]() {
-                        TRACE(OutputVdmaEnqueueTrace, device_id, core_op_handle, name);
-                    }
-                );
-                if (HAILO_SUCCESS != register_status) {
-                    LOGGER__ERROR("Failing register interrupt callback {}", register_status);
-                }
-            }
-        }
-    }
+    {}
 
     virtual hailo_status set_buffer_mode(StreamBufferMode buffer_mode) override;
     virtual hailo_status activate_stream() override;
     virtual hailo_status deactivate_stream() override;
-    virtual hailo_status abort() override;
-    virtual hailo_status clear_abort() override;
+    virtual hailo_status abort_impl() override;
+    virtual hailo_status clear_abort_impl() override;
     virtual bool is_scheduled() override { return false; };
     virtual hailo_stream_interface_t get_interface() const override;
     virtual std::chrono::milliseconds get_timeout() const override;
     virtual hailo_status set_timeout(std::chrono::milliseconds timeout) override;
-    virtual Expected<size_t> get_buffer_frames_size() const override;
 
     virtual hailo_status read_impl(MemoryView buffer) override;
 
     virtual hailo_status wait_for_async_ready(size_t transfer_size, std::chrono::milliseconds timeout) override;
     virtual hailo_status read_async(TransferRequest &&transfer_request) override;
+    virtual hailo_status read_unaligned_address_async(const MemoryView &buffer,
+        const TransferDoneCallback &user_callback) override;
     virtual Expected<size_t> get_async_max_queue_size() const override;
 
 private:
diff --git a/hailort/libhailort/src/vdevice/vdevice_stream_multiplexer_wrapper.cpp b/hailort/libhailort/src/vdevice/vdevice_stream_multiplexer_wrapper.cpp
deleted file mode 100644
index 7aeec0d..0000000
--- a/hailort/libhailort/src/vdevice/vdevice_stream_multiplexer_wrapper.cpp
+++ /dev/null
@@ -1,384 +0,0 @@
-#include "vdevice/vdevice_stream_multiplexer_wrapper.hpp"
-
-namespace hailort
-{
-
-hailo_status VDeviceInputStreamMultiplexerWrapper::set_buffer_mode(StreamBufferMode buffer_mode)
-{
-    // Buffer is not owned by this class, so we just forward the request to base stream.
-    return m_base_stream->set_buffer_mode(buffer_mode);
-}
-
-const hailo_stream_info_t &VDeviceInputStreamMultiplexerWrapper::get_info() const
-{
-    return m_base_stream->get_info();
-}
-
-const CONTROL_PROTOCOL__nn_stream_config_t &VDeviceInputStreamMultiplexerWrapper::get_nn_stream_config()
-{
-    return m_base_stream->get_nn_stream_config();
-}
-
-hailo_status VDeviceInputStreamMultiplexerWrapper::activate_stream()
-{
-    return m_base_stream->activate_stream();
-}
-
-hailo_status VDeviceInputStreamMultiplexerWrapper::deactivate_stream()
-{
-    return m_base_stream->deactivate_stream();
-}
-
-hailo_stream_interface_t VDeviceInputStreamMultiplexerWrapper::get_interface() const
-{
-    return m_base_stream->get_interface();
-}
-
-std::chrono::milliseconds VDeviceInputStreamMultiplexerWrapper::get_timeout() const
-{
-    return m_base_stream->get_timeout();
-}
-
-hailo_status VDeviceInputStreamMultiplexerWrapper::abort()
-{
-    if (*m_is_aborted) {
-        return HAILO_SUCCESS;
-    }
-    *m_is_aborted = true;
-
-    auto status = m_multiplexer->disable_stream(m_core_op_multiplexer_handle, name());
-    CHECK_SUCCESS(status);
-
-    m_base_stream->notify_all();
-
-    status = m_multiplexer->run_once_for_stream(name(), INPUT_RUN_ONCE_HANDLE__ABORT, m_core_op_multiplexer_handle);
-    CHECK_SUCCESS(status);
-
-    return HAILO_SUCCESS;
-}
-
-hailo_status VDeviceInputStreamMultiplexerWrapper::clear_abort()
-{
-    if (!(*m_is_aborted)) {
-        return HAILO_SUCCESS;
-    }
-    *m_is_aborted = false;
-
-    auto status = m_multiplexer->enable_stream(m_core_op_multiplexer_handle, name());
-    CHECK_SUCCESS(status);
-
-    status = m_multiplexer->run_once_for_stream(name(), INPUT_RUN_ONCE_HANDLE__CLEAR_ABORT, m_core_op_multiplexer_handle);
-    CHECK_SUCCESS(status);
-
-    m_base_stream->notify_all();
-
-    return HAILO_SUCCESS;
-}
-
-bool VDeviceInputStreamMultiplexerWrapper::is_scheduled()
-{
-    // Multiplexer can only work with scheduler
-    assert(m_base_stream->is_scheduled());
-    return true;
-}
-
-hailo_status VDeviceInputStreamMultiplexerWrapper::launch_transfer(const device_id_t &device_id)
-{
-    return m_base_stream->launch_transfer(device_id);
-}
-
-Expected<size_t> VDeviceInputStreamMultiplexerWrapper::get_buffer_frames_size() const
-{
-    return m_base_stream->get_buffer_frames_size();
-}
-
-hailo_status VDeviceInputStreamMultiplexerWrapper::write_impl(const MemoryView &buffer)
-{
-    auto status = m_multiplexer->wait_for_write(m_core_op_multiplexer_handle);
-    if (HAILO_STREAM_ABORTED_BY_USER == status) {
-        return status;
-    }
-    CHECK_SUCCESS(status);
-
-    auto write_status = m_base_stream->write_impl(buffer, [this]() { return m_is_aborted->load(); });
-    status = m_multiplexer->signal_write_finish(m_core_op_multiplexer_handle, write_status != HAILO_SUCCESS);
-    CHECK_SUCCESS(status);
-    if (HAILO_STREAM_ABORTED_BY_USER == write_status) {
-        return write_status;
-    }
-    CHECK_SUCCESS(write_status);
-
-    return HAILO_SUCCESS;
-}
-
-hailo_status VDeviceInputStreamMultiplexerWrapper::set_timeout(std::chrono::milliseconds timeout)
-{
-    return m_base_stream->set_timeout(timeout);
-}
-
-hailo_status VDeviceInputStreamMultiplexerWrapper::flush()
-{
-    return m_multiplexer->run_once_for_stream(name(), INPUT_RUN_ONCE_HANDLE__FLUSH, m_core_op_multiplexer_handle);
-}
-
-Expected<std::unique_ptr<VDeviceInputStreamMultiplexerWrapper>> VDeviceInputStreamMultiplexerWrapper::create(
-    std::shared_ptr<ScheduledInputStream> base_stream,
-    std::string network_name, std::shared_ptr<PipelineMultiplexer> multiplexer,
-    multiplexer_core_op_handle_t core_op_multiplexer_handle)
-{
-    assert(base_stream->is_scheduled());
-    hailo_status status = HAILO_UNINITIALIZED;
-    std::unique_ptr<VDeviceInputStreamMultiplexerWrapper> wrapper(
-        new (std::nothrow) VDeviceInputStreamMultiplexerWrapper(base_stream, network_name, multiplexer,
-            core_op_multiplexer_handle, status));
-    CHECK_NOT_NULL_AS_EXPECTED(wrapper, HAILO_OUT_OF_HOST_MEMORY);
-    CHECK_SUCCESS_AS_EXPECTED(status);
-
-    return wrapper;
-}
-
-Expected<std::unique_ptr<VDeviceInputStreamMultiplexerWrapper>> VDeviceInputStreamMultiplexerWrapper::clone(
-    multiplexer_core_op_handle_t core_op_multiplexer_handle)
-{
-    auto wrapper = create(m_base_stream, m_network_name, m_multiplexer, core_op_multiplexer_handle);
-    CHECK_EXPECTED(wrapper);
-
-    return wrapper;
-}
-
-VDeviceInputStreamMultiplexerWrapper::VDeviceInputStreamMultiplexerWrapper(
-    std::shared_ptr<ScheduledInputStream> base_stream,
-    std::string network_name, std::shared_ptr<PipelineMultiplexer> multiplexer,
-    multiplexer_core_op_handle_t core_op_multiplexer_handle, hailo_status &status) :
-    InputStreamBase(base_stream->get_layer_info(), base_stream->get_interface(),
-        base_stream->get_core_op_activated_event(), status),
-    m_base_stream(base_stream),
-    m_multiplexer(multiplexer),
-    m_core_op_multiplexer_handle(core_op_multiplexer_handle),
-    m_network_name(network_name),
-    m_is_aborted()
-{
-    if (HAILO_SUCCESS != status) {
-        // Parent returned error
-        return;
-    }
-
-    m_is_aborted = make_unique_nothrow<std::atomic_bool>(false);
-    if (nullptr == m_is_aborted) {
-        status = HAILO_OUT_OF_HOST_MEMORY;
-        LOGGER__ERROR("Failed to allocate memory! status = {}", status);
-        return;
-    }
-    status = multiplexer->register_run_once_for_stream(base_stream->name(), INPUT_RUN_ONCE_HANDLE__FLUSH, [this]
-    {
-        return m_base_stream->flush();
-    });
-    if (HAILO_SUCCESS != status) {
-        LOGGER__ERROR("register_run_once_for_stream failed! status = {}", status);
-        return;
-    }
-
-    status = multiplexer->register_run_once_for_stream(base_stream->name(), INPUT_RUN_ONCE_HANDLE__ABORT, [this]
-    {
-        return m_base_stream->abort();
-    });
-    if (HAILO_SUCCESS != status) {
-        LOGGER__ERROR("register_run_once_for_stream failed! status = {}", status);
-        return;
-    }
-
-    status = multiplexer->register_run_once_for_stream(base_stream->name(), INPUT_RUN_ONCE_HANDLE__CLEAR_ABORT, [this]
-    {
-        return m_base_stream->clear_abort();
-    });
-    if (HAILO_SUCCESS != status) {
-        LOGGER__ERROR("register_run_once_for_stream failed! status = {}", status);
-        return;
-    }
-}
-
-hailo_status VDeviceOutputStreamMultiplexerWrapper::set_buffer_mode(StreamBufferMode buffer_mode)
-{
-    // Buffer is not owned by this class, so we just forward the request to base stream.
-    return m_base_stream->set_buffer_mode(buffer_mode);
-}
-
-const hailo_stream_info_t &VDeviceOutputStreamMultiplexerWrapper::get_info() const
-{
-    return m_base_stream->get_info();
-}
-
-const CONTROL_PROTOCOL__nn_stream_config_t &VDeviceOutputStreamMultiplexerWrapper::get_nn_stream_config()
-{
-    return m_base_stream->get_nn_stream_config();
-}
-
-hailo_status VDeviceOutputStreamMultiplexerWrapper::activate_stream()
-{
-    return m_base_stream->activate_stream();
-}
-
-hailo_status VDeviceOutputStreamMultiplexerWrapper::deactivate_stream()
-{
-    return m_base_stream->deactivate_stream();
-}
-
-hailo_stream_interface_t VDeviceOutputStreamMultiplexerWrapper::get_interface() const
-{
-    return m_base_stream->get_interface();
-}
-
-std::chrono::milliseconds VDeviceOutputStreamMultiplexerWrapper::get_timeout() const
-{
-    return m_base_stream->get_timeout();
-}
-
-hailo_status VDeviceOutputStreamMultiplexerWrapper::launch_transfer(const device_id_t &device_id)
-{
-    return m_base_stream->launch_transfer(device_id);
-}
-
-hailo_status VDeviceOutputStreamMultiplexerWrapper::abort()
-{
-    if (*m_is_aborted) {
-        return HAILO_SUCCESS;
-    }
-    *m_is_aborted = true;
-
-    auto status = m_multiplexer->disable_stream(m_core_op_multiplexer_handle, name());
-    CHECK_SUCCESS(status);
-
-    status = m_multiplexer->run_once_for_stream(name(), OUTPUT_RUN_ONCE_HANDLE__ABORT, m_core_op_multiplexer_handle);
-    CHECK_SUCCESS(status);
-
-    return HAILO_SUCCESS;
-}
-
-hailo_status VDeviceOutputStreamMultiplexerWrapper::clear_abort()
-{
-    if (!(*m_is_aborted)) {
-        return HAILO_SUCCESS;
-    }
-    *m_is_aborted = false;
-
-    auto status = m_multiplexer->enable_stream(m_core_op_multiplexer_handle, name());
-    CHECK_SUCCESS(status);
-
-    status = m_multiplexer->run_once_for_stream(name(), OUTPUT_RUN_ONCE_HANDLE__CLEAR_ABORT, m_core_op_multiplexer_handle);
-    CHECK_SUCCESS(status);
-
-    return HAILO_SUCCESS;
-}
-
-bool VDeviceOutputStreamMultiplexerWrapper::is_scheduled()
-{
-    // Multiplexer can only work with scheduler
-    assert(m_base_stream->is_scheduled());
-    return true;
-}
-
-Expected<size_t> VDeviceOutputStreamMultiplexerWrapper::get_buffer_frames_size() const
-{
-    return m_base_stream->get_buffer_frames_size();
-}
-
-hailo_status VDeviceOutputStreamMultiplexerWrapper::read_impl(MemoryView buffer)
-{
-    uint32_t frames_to_drain_count = 0;
-    auto expected_drain_count = m_multiplexer->wait_for_read(m_core_op_multiplexer_handle, name(),
-        m_base_stream->get_timeout());
-    if (HAILO_STREAM_ABORTED_BY_USER == expected_drain_count.status()) {
-        return expected_drain_count.status();
-    }
-    CHECK_EXPECTED_AS_STATUS(expected_drain_count);
-
-    frames_to_drain_count = expected_drain_count.release();
-
-    for (uint32_t i = 0; i < frames_to_drain_count; i++) {
-        auto status = m_base_stream->read(buffer);
-        if ((HAILO_STREAM_ABORTED_BY_USER == status) || (HAILO_STREAM_NOT_ACTIVATED == status)) {
-            return status;
-        }
-        CHECK_SUCCESS(status);
-    }
-
-    auto status = m_base_stream->read(buffer);
-    if ((HAILO_STREAM_ABORTED_BY_USER == status) || (HAILO_STREAM_NOT_ACTIVATED == status)) {
-        return status;
-    }
-    CHECK_SUCCESS(status);
-
-    status = m_multiplexer->signal_read_finish();
-    CHECK_SUCCESS(status);
-
-    return HAILO_SUCCESS;
-}
-
-hailo_status VDeviceOutputStreamMultiplexerWrapper::set_timeout(std::chrono::milliseconds timeout)
-{
-    return m_base_stream->set_timeout(timeout);
-}
-
-Expected<std::unique_ptr<VDeviceOutputStreamMultiplexerWrapper>> VDeviceOutputStreamMultiplexerWrapper::create(
-    std::shared_ptr<OutputStreamBase> base_stream,
-    std::string network_name, std::shared_ptr<PipelineMultiplexer> multiplexer,
-    multiplexer_core_op_handle_t core_op_multiplexer_handle)
-{
-    assert(base_stream->is_scheduled());
-    hailo_status status = HAILO_UNINITIALIZED;
-    std::unique_ptr<VDeviceOutputStreamMultiplexerWrapper> wrapper(
-        new (std::nothrow) VDeviceOutputStreamMultiplexerWrapper(base_stream, network_name, multiplexer,
-            core_op_multiplexer_handle, status));
-    CHECK_NOT_NULL_AS_EXPECTED(wrapper, HAILO_OUT_OF_HOST_MEMORY);
-
-    return wrapper;
-}
-
-Expected<std::unique_ptr<VDeviceOutputStreamMultiplexerWrapper>> VDeviceOutputStreamMultiplexerWrapper::clone(
-    multiplexer_core_op_handle_t core_op_multiplexer_handle)
-{
-    auto wrapper = create(m_base_stream, m_network_name, m_multiplexer, core_op_multiplexer_handle);
-    CHECK_EXPECTED(wrapper);
-
-    return wrapper;
-}
-
-VDeviceOutputStreamMultiplexerWrapper::VDeviceOutputStreamMultiplexerWrapper(
-        std::shared_ptr<OutputStreamBase> base_stream,
-        std::string network_name, std::shared_ptr<PipelineMultiplexer> multiplexer,
-        multiplexer_core_op_handle_t core_op_multiplexer_handle, hailo_status &status) :
-    OutputStreamBase(base_stream->get_layer_info(), base_stream->get_info(),
-        base_stream->m_nn_stream_config, base_stream->get_core_op_activated_event()),
-    m_base_stream(base_stream),
-    m_multiplexer(multiplexer),
-    m_core_op_multiplexer_handle(core_op_multiplexer_handle),
-    m_network_name(network_name),
-    m_is_aborted()
-{
-    m_is_aborted = make_unique_nothrow<std::atomic_bool>(false);
-    if (nullptr == m_is_aborted) {
-        status = HAILO_OUT_OF_HOST_MEMORY;
-        LOGGER__ERROR("Failed to allocate memory! status = {}", status);
-        return;
-    }
-
-    status = multiplexer->register_run_once_for_stream(m_base_stream->name(), OUTPUT_RUN_ONCE_HANDLE__ABORT, [this]
-    {
-        return m_base_stream->abort();
-    });
-    if (HAILO_SUCCESS != status) {
-        LOGGER__ERROR("register_run_once_for_stream failed! status = {}", status);
-        return;
-    }
-
-    status = multiplexer->register_run_once_for_stream(m_base_stream->name(), OUTPUT_RUN_ONCE_HANDLE__CLEAR_ABORT, [this]
-    {
-        return m_base_stream->clear_abort();
-    });
-    if (HAILO_SUCCESS != status) {
-        LOGGER__ERROR("register_run_once_for_stream failed! status = {}", status);
-        return;
-    }
-}
-
-} /* namespace hailort */
diff --git a/hailort/libhailort/src/vdevice/vdevice_stream_multiplexer_wrapper.hpp b/hailort/libhailort/src/vdevice/vdevice_stream_multiplexer_wrapper.hpp
deleted file mode 100644
index b2c55a6..0000000
--- a/hailort/libhailort/src/vdevice/vdevice_stream_multiplexer_wrapper.hpp
+++ /dev/null
@@ -1,118 +0,0 @@
-/**
- * Copyright (c) 2022 Hailo Technologies Ltd. All rights reserved.
- * Distributed under the MIT license (https://opensource.org/licenses/MIT)
- **/
-/**
- * @file vdevice_stream_multiplexer_wrapper.hpp
- * @brief Wrapper classes for VDeviceInputStream and VDeviceOutputStream
- **/
-
-#ifndef HAILO_VDEVICE_STREAM_MULTIPLEXER_WRAPPER_HPP_
-#define HAILO_VDEVICE_STREAM_MULTIPLEXER_WRAPPER_HPP_
-
-#include "hailo/expected.hpp"
-
-#include "stream_common/stream_internal.hpp"
-#include "vdevice/scheduler/scheduled_stream.hpp"
-#include "vdevice/pipeline_multiplexer.hpp"
-
-
-namespace hailort
-{
-
-enum input_run_once_handle_t {
-    INPUT_RUN_ONCE_HANDLE__FLUSH,
-    INPUT_RUN_ONCE_HANDLE__ABORT,
-    INPUT_RUN_ONCE_HANDLE__CLEAR_ABORT
-};
-
-enum output_run_once_handle_t {
-    OUTPUT_RUN_ONCE_HANDLE__ABORT,
-    OUTPUT_RUN_ONCE_HANDLE__CLEAR_ABORT
-};
-
-class VDeviceInputStreamMultiplexerWrapper : public InputStreamBase {
-public:
-    virtual ~VDeviceInputStreamMultiplexerWrapper() = default;
-    static Expected<std::unique_ptr<VDeviceInputStreamMultiplexerWrapper>> create(
-        std::shared_ptr<ScheduledInputStream> base_stream,
-        std::string network_name, std::shared_ptr<PipelineMultiplexer> multiplexer,
-        multiplexer_core_op_handle_t core_op_multiplexer_handle = 0);
-    Expected<std::unique_ptr<VDeviceInputStreamMultiplexerWrapper>> clone(multiplexer_core_op_handle_t core_op_multiplexer_handle);
-
-    virtual hailo_status set_buffer_mode(StreamBufferMode buffer_mode) override;
-    virtual const hailo_stream_info_t &get_info() const override;
-    virtual const CONTROL_PROTOCOL__nn_stream_config_t &get_nn_stream_config() override;
-    virtual hailo_status activate_stream() override;
-    virtual hailo_status deactivate_stream() override;
-    virtual hailo_stream_interface_t get_interface() const override;
-    virtual std::chrono::milliseconds get_timeout() const override;
-    virtual hailo_status abort() override;
-    virtual hailo_status clear_abort() override;
-    virtual bool is_scheduled() override;
-
-    virtual hailo_status launch_transfer(const device_id_t &device_id) override;
-    virtual Expected<size_t> get_buffer_frames_size() const override;
-
-protected:
-    virtual hailo_status write_impl(const MemoryView &buffer) override;
-
-private:
-    VDeviceInputStreamMultiplexerWrapper(std::shared_ptr<ScheduledInputStream> base_stream,
-        std::string network_name, std::shared_ptr<PipelineMultiplexer> multiplexer,
-        multiplexer_core_op_handle_t core_op_multiplexer_handle, hailo_status &status);
-
-    virtual hailo_status set_timeout(std::chrono::milliseconds timeout) override;
-    virtual hailo_status flush() override;
-
-    std::shared_ptr<ScheduledInputStream> m_base_stream;
-    std::shared_ptr<PipelineMultiplexer> m_multiplexer;
-    multiplexer_core_op_handle_t m_core_op_multiplexer_handle;
-    std::string m_network_name;
-
-    std::unique_ptr<std::atomic_bool> m_is_aborted;
-};
-
-class VDeviceOutputStreamMultiplexerWrapper : public OutputStreamBase {
-public:
-    virtual ~VDeviceOutputStreamMultiplexerWrapper() noexcept = default;
-
-    static Expected<std::unique_ptr<VDeviceOutputStreamMultiplexerWrapper>> create(
-        std::shared_ptr<OutputStreamBase> base_stream,
-        std::string network_name, std::shared_ptr<PipelineMultiplexer> multiplexer,
-        multiplexer_core_op_handle_t core_op_multiplexer_handle = 0);
-    Expected<std::unique_ptr<VDeviceOutputStreamMultiplexerWrapper>> clone(multiplexer_core_op_handle_t core_op_multiplexer_handle);
-
-    virtual hailo_status set_buffer_mode(StreamBufferMode buffer_mode) override;
-    virtual const hailo_stream_info_t &get_info() const override;
-    virtual const CONTROL_PROTOCOL__nn_stream_config_t &get_nn_stream_config() override;
-    virtual hailo_status activate_stream() override;
-    virtual hailo_status deactivate_stream() override;
-    virtual hailo_stream_interface_t get_interface() const override;
-    virtual std::chrono::milliseconds get_timeout() const override;
-    virtual hailo_status launch_transfer(const device_id_t &device_id) override;
-    virtual hailo_status abort() override;
-    virtual hailo_status clear_abort() override;
-    virtual bool is_scheduled() override;
-    virtual Expected<size_t> get_buffer_frames_size() const override;
-
-private:
-    VDeviceOutputStreamMultiplexerWrapper(std::shared_ptr<OutputStreamBase> base_stream,
-        std::string network_name, std::shared_ptr<PipelineMultiplexer> multiplexer,
-        multiplexer_core_op_handle_t core_op_multiplexer_handle, hailo_status &status);
-
-    virtual hailo_status set_timeout(std::chrono::milliseconds timeout) override;
-    virtual hailo_status read_impl(MemoryView buffer) override;
-
-    std::shared_ptr<OutputStreamBase> m_base_stream;
-    std::shared_ptr<PipelineMultiplexer> m_multiplexer;
-    multiplexer_core_op_handle_t m_core_op_multiplexer_handle;
-    std::string m_network_name;
-    EventPtr m_read_event;
-
-    std::unique_ptr<std::atomic_bool> m_is_aborted;
-};
-
-} /* namespace hailort */
-
-#endif /* HAILO_VDEVICE_STREAM_MULTIPLEXER_WRAPPER_HPP_ */
diff --git a/hailort/libhailort/src/vdma/CMakeLists.txt b/hailort/libhailort/src/vdma/CMakeLists.txt
index 1e72b24..aed185a 100644
--- a/hailort/libhailort/src/vdma/CMakeLists.txt
+++ b/hailort/libhailort/src/vdma/CMakeLists.txt
@@ -19,6 +19,7 @@ set(SRC_FILES
     ${CMAKE_CURRENT_SOURCE_DIR}/memory/dma_able_buffer.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/memory/sg_buffer.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/memory/continuous_buffer.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/memory/mapping_manager.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/memory/buffer_requirements.cpp
 )
 
diff --git a/hailort/libhailort/src/vdma/channel/boundary_channel.cpp b/hailort/libhailort/src/vdma/channel/boundary_channel.cpp
index 3feb827..3287366 100644
--- a/hailort/libhailort/src/vdma/channel/boundary_channel.cpp
+++ b/hailort/libhailort/src/vdma/channel/boundary_channel.cpp
@@ -25,24 +25,25 @@ namespace vdma {
 
 
 Expected<BoundaryChannelPtr> BoundaryChannel::create(vdma::ChannelId channel_id, Direction direction,
-    HailoRTDriver &driver, uint32_t descs_count, uint16_t desc_page_size, const std::string &stream_name,
+    VdmaDevice &vdma_device, uint32_t descs_count, uint16_t desc_page_size, const std::string &stream_name,
     LatencyMeterPtr latency_meter)
 {
     hailo_status status = HAILO_UNINITIALIZED;
-    auto channel_ptr = make_shared_nothrow<BoundaryChannel>(channel_id, direction, driver, descs_count,
+    auto channel_ptr = make_shared_nothrow<BoundaryChannel>(channel_id, direction, vdma_device, descs_count,
         desc_page_size, stream_name, latency_meter, status);
     CHECK_NOT_NULL_AS_EXPECTED(channel_ptr, HAILO_OUT_OF_HOST_MEMORY);
     CHECK_SUCCESS_AS_EXPECTED(status, "Failed creating BoundaryChannel");
     return channel_ptr;
 }
 
-BoundaryChannel::BoundaryChannel(vdma::ChannelId channel_id, Direction direction, HailoRTDriver &driver,
+BoundaryChannel::BoundaryChannel(vdma::ChannelId channel_id, Direction direction, VdmaDevice &vdma_device,
                                  uint32_t descs_count, uint16_t desc_page_size, const std::string &stream_name,
                                  LatencyMeterPtr latency_meter, hailo_status &status) :
     m_channel_id(channel_id),
     m_direction(direction),
-    m_driver(driver),
-    m_host_registers(driver, channel_id, direction),
+    m_vdma_device(vdma_device),
+    m_driver(vdma_device.get_driver()),
+    m_host_registers(vdma_device.get_driver(), channel_id, direction),
     m_desc_list(nullptr),
     m_stream_name(stream_name),
     m_latency_meter(latency_meter),
@@ -62,8 +63,8 @@ BoundaryChannel::BoundaryChannel(vdma::ChannelId channel_id, Direction direction
         return;
     }
 
-    if (channel_id.engine_index >= driver.dma_engines_count()) {
-        LOGGER__ERROR("Invalid DMA engine index {}, max {}", channel_id.engine_index, driver.dma_engines_count());
+    if (channel_id.engine_index >= m_driver.dma_engines_count()) {
+        LOGGER__ERROR("Invalid DMA engine index {}, max {}", channel_id.engine_index, m_driver.dma_engines_count());
         status = HAILO_INVALID_ARGUMENT;
         return;
     }
@@ -122,10 +123,11 @@ hailo_status BoundaryChannel::trigger_channel_completion(uint16_t hw_num_process
         hailo_status complete_status = HAILO_SUCCESS;
 
         #ifndef NDEBUG
-            auto &last_desc = (*m_desc_list)[transfer.last_desc];
+            assert(!transfer.last_descs.empty());
+            auto &last_desc = (*m_desc_list)[transfer.last_descs.back()];
             if (!last_desc.is_done() || last_desc.is_error()) {
                 LOGGER__ERROR("Error while processing descriptor {} of DMA {} on device {} DESC_STATUS=0x{:x}.",
-                    transfer.last_desc, m_channel_id, m_driver.device_id(), last_desc.status());
+                    transfer.last_descs.back(), m_channel_id, m_driver.device_id(), last_desc.status());
                 complete_status = HAILO_INTERNAL_FAILURE;
             }
         #endif
@@ -176,37 +178,57 @@ hailo_status BoundaryChannel::launch_transfer(TransferRequest &&transfer_request
         return HAILO_STREAM_NOT_ACTIVATED;
     }
 
-    if (m_ongoing_transfers.size() >= get_max_ongoing_transfers(transfer_request.buffer.size())) {
+    if (m_ongoing_transfers.size() >= get_max_ongoing_transfers(transfer_request.get_total_transfer_size())) {
         return HAILO_QUEUE_IS_FULL;
     }
 
-    auto mapped_buffer_exp = transfer_request.buffer.map_buffer(m_driver, m_direction);
-    CHECK_EXPECTED_AS_STATUS(mapped_buffer_exp);
-    auto mapped_buffer = mapped_buffer_exp.release();
+    auto num_available = get_num_available();
+    const uint16_t first_desc = num_available;
+    std::vector<uint16_t> transfer_last_descs;
+    uint16_t total_descs_count = 0;
+
+    for (size_t i = 0; i < transfer_request.transfer_buffers.size(); i++) {
+        auto mapped_buffer_exp = transfer_request.transfer_buffers[i].map_buffer(m_vdma_device, m_direction);
+        CHECK_EXPECTED_AS_STATUS(mapped_buffer_exp);
+        auto mapped_buffer = mapped_buffer_exp.release();
+
+        // Syncing the buffer to device change its ownership from host to the device.
+        // We sync on D2H as well if the user owns the buffer since the buffer might have been changed by
+        // the host between the time it was mapped and the current async transfer. If the buffer is not owned by the user,
+        // it won't be accessed for write.
+        if ((Direction::H2D == m_direction) || user_owns_buffer) {
+            auto status = transfer_request.transfer_buffers[i].synchronize(m_vdma_device, HailoRTDriver::DmaSyncDirection::TO_DEVICE);
+            CHECK_SUCCESS(status);
+        }
 
-    // Syncing the buffer to device change its ownership from host to the device.
-    // We sync on D2H as well if the user owns the buffer since the buffer might have been changed by
-    // the host between the time it was mapped and the current async transfer. If the buffer is not owned by the user,
-    // it won't be accessed for write.
-    if ((Direction::H2D == m_direction) || user_owns_buffer) {
-        auto status = transfer_request.buffer.synchronize(m_driver, HailoRTDriver::DmaSyncDirection::TO_DEVICE);
-        CHECK_SUCCESS(status);
-    }
+        const auto desired_desc_num = m_desc_list->descriptors_in_buffer(transfer_request.transfer_buffers[i].size());
+        CHECK(desired_desc_num <= MAX_DESCS_COUNT, HAILO_INTERNAL_FAILURE);
+        const uint16_t desc_num = static_cast<uint16_t>(desired_desc_num);
+        assert(total_descs_count + desc_num < MAX_DESCS_COUNT);
+        total_descs_count = static_cast<uint16_t>(total_descs_count + desc_num);
 
-    const auto desired_desc_num = m_desc_list->descriptors_in_buffer(transfer_request.buffer.size());
-    CHECK(desired_desc_num <= MAX_DESCS_COUNT, HAILO_INTERNAL_FAILURE);
-    const uint16_t desc_num = static_cast<uint16_t>(desired_desc_num);
+        const auto last_desc_avail = static_cast<uint16_t>((num_available + desc_num - 1) & m_descs.size_mask);
 
-    const auto num_available = get_num_available();
-    const auto last_desc_avail = static_cast<uint16_t>((num_available + desc_num - 1) & m_descs.size_mask);
+        transfer_last_descs.emplace_back(last_desc_avail);
 
-    auto status = prepare_descriptors(transfer_request.buffer.size(), num_available, mapped_buffer,
-        transfer_request.buffer.offset());
-    CHECK_SUCCESS(status);
+        // Raise interrupt on last buffer
+        const auto should_buffer_raise_int = (i == (transfer_request.transfer_buffers.size() - 1));
+        auto status = prepare_descriptors(transfer_request.transfer_buffers[i].size(), num_available, mapped_buffer,
+            transfer_request.transfer_buffers[i].offset(), should_buffer_raise_int);
+        CHECK_SUCCESS(status);
 
-    add_ongoing_transfer(std::move(transfer_request), num_available, last_desc_avail);
+        num_available = static_cast<uint16_t>((last_desc_avail + 1) & m_descs.size_mask);
+    }
 
-    status = inc_num_available(desc_num);
+    if ((nullptr != m_latency_meter) && (m_direction == Direction::H2D)) {
+        // If we measure latency, we need an interrupt on the first descriptor for each H2D channel.
+        m_desc_list->program_single_descriptor((*m_desc_list)[first_desc], m_desc_list->desc_page_size(),
+            InterruptsDomain::HOST);
+    }
+
+    add_ongoing_transfer(std::move(transfer_request), first_desc, std::move(transfer_last_descs));
+
+    auto status = inc_num_available(total_descs_count);
     CHECK_SUCCESS(status);
 
     return HAILO_SUCCESS;
@@ -225,7 +247,8 @@ void BoundaryChannel::cancel_pending_transfers()
 
 size_t BoundaryChannel::get_max_ongoing_transfers(size_t transfer_size) const
 {
-    const auto descs_in_transfer = m_desc_list->descriptors_in_buffer(transfer_size);
+    // Add desc for boundary channel because might need extra for non aligned async API
+    const auto descs_in_transfer = m_desc_list->descriptors_in_buffer(transfer_size) + 1;
     const auto descs_count = CB_SIZE(m_descs);
     size_t max_transfers_in_buffer = (descs_count - 1) / descs_in_transfer;
 
@@ -276,7 +299,8 @@ bool BoundaryChannel::is_transfer_complete(const OngoingTransfer &transfer, uint
 {
     // Transfer is complete if its last descriptor is in [previous_num_processed, current_num_processed) or
     // the the buffer is empty (previous_num_processed == get_num_available())
-    return is_desc_between(previous_num_processed, current_num_processed, transfer.last_desc) ||
+    assert(!transfer.last_descs.empty());
+    return is_desc_between(previous_num_processed, current_num_processed, transfer.last_descs.back()) ||
         (current_num_processed == get_num_available());
 }
 
@@ -287,12 +311,16 @@ void BoundaryChannel::on_transfer_complete(std::unique_lock<std::mutex> &lock,
     if (nullptr != m_latency_meter) {
         m_desc_list->clear_descriptor(transfer.latency_measure_desc);
     }
-    m_desc_list->clear_descriptor(transfer.last_desc);
+
+    assert(!transfer.last_descs.empty());
+    for (const auto& last_desc : transfer.last_descs) {
+        m_desc_list->clear_descriptor(last_desc);
+    }
 
     // We increase desc num_proc (can happen only in this flow). After it is increased -
     //  1. On D2H channels - the output can be read by the user.
     //  2. On H2D channels - new input can be written to the buffer.
-    _CB_SET(m_descs.tail, (transfer.last_desc + 1) & m_descs.size_mask);
+    _CB_SET(m_descs.tail, (transfer.last_descs.back() + 1) & m_descs.size_mask);
 
     // Finally, we notify user callbacks registered with the transfer.
     // We want to make sure that the callbacks are called after the descriptors can be reused (So the user will
@@ -300,14 +328,15 @@ void BoundaryChannel::on_transfer_complete(std::unique_lock<std::mutex> &lock,
     lock.unlock();
 
     if (Direction::D2H == m_direction) {
-        auto sync_status = transfer.request.buffer.synchronize(m_driver, HailoRTDriver::DmaSyncDirection::TO_HOST);
-        if (HAILO_SUCCESS != sync_status) {
-            LOGGER__ERROR("Failed to sync buffer for output channel {} device {}", m_channel_id, m_driver.device_id());
-            if (HAILO_SUCCESS != complete_status) {
-                complete_status = sync_status;
+        for (auto& transfer_buffer : transfer.request.transfer_buffers) {
+            auto sync_status = transfer_buffer.synchronize(m_vdma_device, HailoRTDriver::DmaSyncDirection::TO_HOST);
+            if (HAILO_SUCCESS != sync_status) {
+                LOGGER__ERROR("Failed to sync buffer for output channel {} device {}", m_channel_id, m_driver.device_id());
+                if (HAILO_SUCCESS != complete_status) {
+                    complete_status = sync_status;
+                }
             }
         }
-
     }
 
     transfer.request.callback(complete_status);
@@ -315,7 +344,7 @@ void BoundaryChannel::on_transfer_complete(std::unique_lock<std::mutex> &lock,
 }
 
 hailo_status BoundaryChannel::prepare_descriptors(size_t transfer_size, uint16_t starting_desc,
-    MappedBufferPtr mapped_buffer, size_t buffer_offset)
+    MappedBufferPtr mapped_buffer, size_t buffer_offset, bool raise_interrupt)
 {
     if (mapped_buffer != nullptr) {
         CHECK((buffer_offset % m_desc_list->desc_page_size()) == 0, HAILO_INTERNAL_FAILURE,
@@ -342,12 +371,7 @@ hailo_status BoundaryChannel::prepare_descriptors(size_t transfer_size, uint16_t
         }
     }
 
-    if ((nullptr != m_latency_meter) && (m_direction == Direction::H2D)) {
-        // If we measure latency, we need an interrupt on the first descriptor for each H2D channel.
-        m_desc_list->program_single_descriptor((*m_desc_list)[starting_desc], m_desc_list->desc_page_size(),
-            InterruptsDomain::HOST);
-    }
-    auto last_desc_interrupts_domain = InterruptsDomain::HOST;
+    auto last_desc_interrupts_domain = raise_interrupt ? InterruptsDomain::HOST : InterruptsDomain::NONE;
     // TODO: HRT-11188 - fix starting_desc parameter
     auto actual_desc_count = m_desc_list->program_last_descriptor(transfer_size, last_desc_interrupts_domain,
         starting_desc);
@@ -373,12 +397,14 @@ bool BoundaryChannel::is_buffer_already_configured(MappedBufferPtr buffer, size_
     return starting_desc_diff == buffer_offset_diff_in_descs;
 }
 
-void BoundaryChannel::add_ongoing_transfer(TransferRequest &&transfer_request, uint16_t first_desc, uint16_t last_desc)
+void BoundaryChannel::add_ongoing_transfer(TransferRequest &&transfer_request, uint16_t first_desc,
+        std::vector<uint16_t> &&last_descs)
 {
     OngoingTransfer transfer{};
     transfer.request = std::move(transfer_request);
-    transfer.last_desc = last_desc;
-    transfer.latency_measure_desc = (m_direction == HailoRTDriver::DmaDirection::H2D) ? first_desc : last_desc;
+    transfer.last_descs = std::move(last_descs);
+    transfer.latency_measure_desc = (m_direction == HailoRTDriver::DmaDirection::H2D) ? first_desc :
+        transfer.last_descs.back();
     m_ongoing_transfers.push_back(std::move(transfer));
 }
 
diff --git a/hailort/libhailort/src/vdma/channel/boundary_channel.hpp b/hailort/libhailort/src/vdma/channel/boundary_channel.hpp
index e2533d5..38b7d02 100644
--- a/hailort/libhailort/src/vdma/channel/boundary_channel.hpp
+++ b/hailort/libhailort/src/vdma/channel/boundary_channel.hpp
@@ -10,6 +10,7 @@
 #ifndef _HAILO_VDMA_BOUNDARY_CHANNEL_HPP_
 #define _HAILO_VDMA_BOUNDARY_CHANNEL_HPP_
 
+#include "vdma/vdma_device.hpp"
 #include "vdma/channel/vdma_channel_regs.hpp"
 #include "vdma/channel/channel_id.hpp"
 #include "vdma/memory/descriptor_list.hpp"
@@ -27,7 +28,7 @@ namespace vdma {
 
 struct OngoingTransfer {
     TransferRequest request;
-    uint16_t last_desc;
+    std::vector<uint16_t> last_descs;
     uint16_t latency_measure_desc;
 };
 
@@ -38,10 +39,10 @@ class BoundaryChannel final
 public:
     using Direction = HailoRTDriver::DmaDirection;
 
-    static Expected<BoundaryChannelPtr> create(vdma::ChannelId channel_id, Direction direction, HailoRTDriver &driver,
+    static Expected<BoundaryChannelPtr> create(vdma::ChannelId channel_id, Direction direction, VdmaDevice &vdma_device,
         uint32_t descs_count, uint16_t desc_page_size, const std::string &stream_name = "", LatencyMeterPtr latency_meter = nullptr);
 
-    BoundaryChannel(vdma::ChannelId channel_id, Direction direction, HailoRTDriver &driver, uint32_t descs_count,
+    BoundaryChannel(vdma::ChannelId channel_id, Direction direction, VdmaDevice &vdma_device, uint32_t descs_count,
         uint16_t desc_page_size, const std::string &stream_name, LatencyMeterPtr latency_meter,
         hailo_status &status);
     BoundaryChannel(const BoundaryChannel &other) = delete;
@@ -97,10 +98,11 @@ private:
     void on_transfer_complete(std::unique_lock<std::mutex> &lock, OngoingTransfer &transfer,
         hailo_status complete_status);
     hailo_status prepare_descriptors(size_t transfer_size, uint16_t starting_desc,
-        MappedBufferPtr mapped_buffer, size_t buffer_offset);
+        MappedBufferPtr mapped_buffer, size_t buffer_offset, bool raise_interrupt = true);
 
     bool is_buffer_already_configured(MappedBufferPtr buffer, size_t buffer_offset_in_descs, size_t starting_desc) const;
-    void add_ongoing_transfer(TransferRequest &&transfer_request, uint16_t first_desc, uint16_t last_desc);
+    void add_ongoing_transfer(TransferRequest &&transfer_request, uint16_t first_desc,
+        std::vector<uint16_t> &&last_descs);
 
     static bool is_desc_between(uint16_t begin, uint16_t end, uint16_t desc);
     uint16_t get_num_available() const;
@@ -109,6 +111,7 @@ private:
 
     const vdma::ChannelId m_channel_id;
     const Direction m_direction;
+    VdmaDevice &m_vdma_device;
     HailoRTDriver &m_driver;
     VdmaChannelRegs m_host_registers;
     std::shared_ptr<DescriptorList> m_desc_list; // Host side descriptor list
diff --git a/hailort/libhailort/src/vdma/channel/interrupts_dispatcher.cpp b/hailort/libhailort/src/vdma/channel/interrupts_dispatcher.cpp
index a59699f..0f71c2c 100644
--- a/hailort/libhailort/src/vdma/channel/interrupts_dispatcher.cpp
+++ b/hailort/libhailort/src/vdma/channel/interrupts_dispatcher.cpp
@@ -63,7 +63,11 @@ hailo_status InterruptsDispatcher::start(const ChannelsBitmap &channels_bitmap,
 hailo_status InterruptsDispatcher::stop()
 {
     std::unique_lock<std::mutex> lock(m_mutex);
-    CHECK(m_wait_context != nullptr, HAILO_INVALID_OPERATION, "Interrupt thread not running");
+
+    if (!m_wait_context) {
+        // Already stopped
+        return HAILO_SUCCESS;
+    }
 
     // Nullify wait context so the thread will pause
     const auto bitmap = m_wait_context->bitmap;
diff --git a/hailort/libhailort/src/vdma/circular_stream_buffer_pool.cpp b/hailort/libhailort/src/vdma/circular_stream_buffer_pool.cpp
index 18234e5..f6d8e4b 100644
--- a/hailort/libhailort/src/vdma/circular_stream_buffer_pool.cpp
+++ b/hailort/libhailort/src/vdma/circular_stream_buffer_pool.cpp
@@ -14,7 +14,7 @@
 namespace hailort
 {
 
-Expected<std::unique_ptr<CircularStreamBufferPool>> CircularStreamBufferPool::create(HailoRTDriver &driver,
+Expected<std::unique_ptr<CircularStreamBufferPool>> CircularStreamBufferPool::create(VdmaDevice &device,
     HailoRTDriver::DmaDirection direction, size_t desc_page_size, size_t descs_count, size_t transfer_size)
 {
     // TODO: HRT-11220 calculate desc_count/desc_page_size base on transfer_size and queue_size
@@ -26,7 +26,7 @@ Expected<std::unique_ptr<CircularStreamBufferPool>> CircularStreamBufferPool::cr
     CHECK_AS_EXPECTED(transfer_size < buffer_size, HAILO_INTERNAL_FAILURE, "Transfer size {} must be smaller than buffer size {}",
         transfer_size, buffer_size);
 
-    auto mapped_buffer = allocate_buffer(driver, direction, buffer_size);
+    auto mapped_buffer = allocate_buffer(device, direction, buffer_size);
     CHECK_EXPECTED(mapped_buffer);
 
     auto circular_buffer_pool = make_unique_nothrow<CircularStreamBufferPool>(desc_page_size, descs_count,
@@ -99,10 +99,10 @@ void CircularStreamBufferPool::reset_pointers()
     m_next_enqueue_desc_offset = 0;
 }
 
-Expected<BufferPtr> CircularStreamBufferPool::allocate_buffer(HailoRTDriver &driver,
+Expected<BufferPtr> CircularStreamBufferPool::allocate_buffer(VdmaDevice &device,
     HailoRTDriver::DmaDirection direction, size_t size)
 {
-    auto dma_able_buffer = vdma::DmaAbleBuffer::create(driver, size);
+    auto dma_able_buffer = vdma::DmaAbleBuffer::create_by_allocation(size, device.get_driver());
     CHECK_EXPECTED(dma_able_buffer);
 
     auto dma_storage = make_shared_nothrow<DmaStorage>(dma_able_buffer.release());
@@ -110,7 +110,7 @@ Expected<BufferPtr> CircularStreamBufferPool::allocate_buffer(HailoRTDriver &dri
 
     // TODO HRT-11595: We map the buffer here to avoid mapping buffer during descriptors list creation (it cause
     // deadlock on the linux driver). After HRT-11595, we won't need to call dma_map.
-    auto map_result = dma_storage->dma_map(driver, to_hailo_dma_direction(direction));
+    auto map_result = dma_storage->dma_map(device, to_hailo_dma_direction(direction));
     CHECK_EXPECTED(map_result);
 
     auto mapped_buffer = make_shared_nothrow<Buffer>(std::move(dma_storage));
diff --git a/hailort/libhailort/src/vdma/circular_stream_buffer_pool.hpp b/hailort/libhailort/src/vdma/circular_stream_buffer_pool.hpp
index edb0caa..04fd902 100644
--- a/hailort/libhailort/src/vdma/circular_stream_buffer_pool.hpp
+++ b/hailort/libhailort/src/vdma/circular_stream_buffer_pool.hpp
@@ -13,6 +13,7 @@
 #include "vdma/memory/mapped_buffer.hpp"
 #include "common/circular_buffer.hpp"
 #include "stream_common/stream_buffer_pool.hpp"
+#include "vdma/vdma_device.hpp"
 
 #include <condition_variable>
 
@@ -27,7 +28,7 @@ namespace hailort
 // and one producer (calls enqueue).
 class CircularStreamBufferPool final : public StreamBufferPool {
 public:
-    static Expected<std::unique_ptr<CircularStreamBufferPool>> create(HailoRTDriver &driver,
+    static Expected<std::unique_ptr<CircularStreamBufferPool>> create(VdmaDevice &device,
         HailoRTDriver::DmaDirection direction, size_t desc_page_size, size_t descs_count, size_t transfer_size);
 
     CircularStreamBufferPool(size_t desc_page_size, size_t descs_count, size_t transfer_size,
@@ -45,7 +46,7 @@ public:
     virtual void reset_pointers() override;
 
 private:
-    static Expected<BufferPtr> allocate_buffer(HailoRTDriver &driver,
+    static Expected<BufferPtr> allocate_buffer(VdmaDevice &device,
         HailoRTDriver::DmaDirection direction, size_t size);
 
     size_t descs_in_transfer() const;
diff --git a/hailort/libhailort/src/vdma/memory/buffer_requirements.cpp b/hailort/libhailort/src/vdma/memory/buffer_requirements.cpp
index 9f36b99..24c0c72 100644
--- a/hailort/libhailort/src/vdma/memory/buffer_requirements.cpp
+++ b/hailort/libhailort/src/vdma/memory/buffer_requirements.cpp
@@ -20,7 +20,7 @@ static constexpr uint32_t MIN_CCB_DESCRIPTORS_COUNT = 16;
 
 Expected<BufferSizesRequirements> BufferSizesRequirements::get_sg_buffer_requirements_single_transfer(
     uint16_t max_desc_page_size, uint16_t min_batch_size, uint16_t max_batch_size, uint32_t transfer_size,
-    bool is_circular, const bool force_default_page_size, const bool force_batch_size)
+    bool is_circular, const bool force_default_page_size, const bool force_batch_size, const bool is_vdma_aligned_buffer)
 {
     // First, get the result for the min size
     auto results = get_sg_buffer_requirements_multiple_transfers(max_desc_page_size, min_batch_size,
@@ -30,7 +30,11 @@ Expected<BufferSizesRequirements> BufferSizesRequirements::get_sg_buffer_require
     // In order to fetch all descriptors, the amount of active descs is lower by one that the amount
     // of descs given  (Otherwise we won't be able to determine if the buffer is empty or full).
     // Therefore we add 1 in order to compensate.
-    const uint32_t descs_per_transfer = DIV_ROUND_UP(transfer_size, results->desc_page_size());
+    uint32_t descs_per_transfer = DIV_ROUND_UP(transfer_size, results->desc_page_size());
+    if (!is_vdma_aligned_buffer) {
+        // Add desc for boundary channel because might need extra descriptor for user non aligned buffer async API
+        descs_per_transfer++;
+    }
     uint32_t descs_count = std::min((descs_per_transfer * max_batch_size) + 1, MAX_DESCS_COUNT);
     if (is_circular) {
         descs_count = get_nearest_powerof_2(descs_count, MIN_DESCS_COUNT);
@@ -43,8 +47,7 @@ Expected<BufferSizesRequirements> BufferSizesRequirements::get_sg_buffer_require
     uint16_t max_desc_page_size, uint16_t batch_size, const std::vector<uint32_t> &transfer_sizes,
     bool is_circular, const bool force_default_page_size, const bool force_batch_size)
 {
-    const uint16_t initial_desc_page_size = force_default_page_size ?
-        DEFAULT_DESC_PAGE_SIZE : find_initial_desc_page_size(transfer_sizes);
+    const uint16_t initial_desc_page_size = find_initial_desc_page_size(transfer_sizes, max_desc_page_size, force_default_page_size);
 
     CHECK_AS_EXPECTED(max_desc_page_size <= MAX_DESC_PAGE_SIZE, HAILO_INTERNAL_FAILURE,
         "max_desc_page_size given {} is bigger than hw max desc page size {}",
@@ -133,17 +136,20 @@ Expected<BufferSizesRequirements> BufferSizesRequirements::get_ccb_buffer_requir
 }
 
 
-uint16_t BufferSizesRequirements::find_initial_desc_page_size(const std::vector<uint32_t> &transfer_sizes)
+uint16_t BufferSizesRequirements::find_initial_desc_page_size(const std::vector<uint32_t> &transfer_sizes,
+    const uint16_t max_desc_page_size, const bool force_default_page_size)
 {
+    const uint16_t channel_max_page_size = std::min(DEFAULT_DESC_PAGE_SIZE, max_desc_page_size);
     const auto max_transfer_size = *std::max_element(transfer_sizes.begin(), transfer_sizes.end());
     // Note: If the pages pointed to by the descriptors are copied in their entirety, then DEFAULT_DESC_PAGE_SIZE
     //       is the optimal value. For transfer_sizes smaller than DEFAULT_DESC_PAGE_SIZE using smaller descriptor page
     //       sizes will save memory consuption without harming performance. In the case of nms for example, only one bbox
     //       is copied from each page. Hence, we'll use MIN_DESC_PAGE_SIZE for nms.
-    const uint16_t initial_desc_page_size = (DEFAULT_DESC_PAGE_SIZE > max_transfer_size) ?
-        static_cast<uint16_t>(get_nearest_powerof_2(max_transfer_size, MIN_DESC_PAGE_SIZE)) : 
-        DEFAULT_DESC_PAGE_SIZE;
-    if (DEFAULT_DESC_PAGE_SIZE != initial_desc_page_size) {
+    const auto optimize_low_page_size = ((channel_max_page_size > max_transfer_size) && !force_default_page_size);
+    const uint16_t initial_desc_page_size = optimize_low_page_size ?
+        static_cast<uint16_t>(get_nearest_powerof_2(max_transfer_size, MIN_DESC_PAGE_SIZE)) :
+        channel_max_page_size;
+    if (channel_max_page_size != initial_desc_page_size) {
         LOGGER__INFO("Using non-default initial_desc_page_size of {}, due to a small transfer size ({})",
             initial_desc_page_size, max_transfer_size);
     }
diff --git a/hailort/libhailort/src/vdma/memory/buffer_requirements.hpp b/hailort/libhailort/src/vdma/memory/buffer_requirements.hpp
index 2e713f2..c709887 100644
--- a/hailort/libhailort/src/vdma/memory/buffer_requirements.hpp
+++ b/hailort/libhailort/src/vdma/memory/buffer_requirements.hpp
@@ -37,7 +37,7 @@ public:
 
     static Expected<BufferSizesRequirements> get_sg_buffer_requirements_single_transfer(uint16_t max_desc_page_size,
         uint16_t min_batch_size, uint16_t max_batch_size, uint32_t transfer_size, bool is_circular,
-        const bool force_default_page_size, const bool force_batch_size);
+        const bool force_default_page_size, const bool force_batch_size, const bool is_vdma_aligned_buffer);
     static Expected<BufferSizesRequirements> get_sg_buffer_requirements_multiple_transfers(uint16_t max_desc_page_size,
         uint16_t batch_size, const std::vector<uint32_t> &transfer_sizes, bool is_circular,
         const bool force_default_page_size, const bool force_batch_size);
@@ -46,7 +46,8 @@ public:
         uint32_t transfer_size, bool is_circular);
 
 private:
-    static uint16_t find_initial_desc_page_size(const std::vector<uint32_t> &transfer_sizes);
+    static uint16_t find_initial_desc_page_size(const std::vector<uint32_t> &transfer_sizes, const uint16_t max_desc_page_size,
+        const bool force_default_page_size);
     static uint32_t get_required_descriptor_count(const std::vector<uint32_t> &transfer_sizes, uint16_t desc_page_size);
 
     const uint32_t m_descs_count;
diff --git a/hailort/libhailort/src/vdma/memory/continuous_buffer.cpp b/hailort/libhailort/src/vdma/memory/continuous_buffer.cpp
index 1f0b63a..f975fe1 100644
--- a/hailort/libhailort/src/vdma/memory/continuous_buffer.cpp
+++ b/hailort/libhailort/src/vdma/memory/continuous_buffer.cpp
@@ -33,45 +33,27 @@ Expected<ContinuousBuffer> ContinuousBuffer::create(size_t size, HailoRTDriver &
         CHECK_EXPECTED(result);
     }
 
-    uintptr_t handle = 0;
-    uint64_t dma_address = 0;
-    std::tie(handle, dma_address) = result.release();
-
-    auto mmap = MmapBuffer<void>::create_file_map(size, driver.fd(), handle);
-    if (!mmap) {
-        LOGGER__ERROR("Failed mmap continuous buffer");
-        driver.vdma_continuous_buffer_free(handle);
-        return make_unexpected(mmap.status());
-    }
-
-    return ContinuousBuffer(size, driver, handle, dma_address, mmap.release());
+    return ContinuousBuffer(driver, result.release());
 }
 
 ContinuousBuffer::~ContinuousBuffer()
 {
-    if (0 != m_handle) {
-        auto status = m_mmap.unmap();
-        if (HAILO_SUCCESS != status) {
-            LOGGER__ERROR("Failed unmap mmap buffer {}", status);
-        }
-
-        status = m_driver.vdma_continuous_buffer_free(m_handle);
+    if (HailoRTDriver::INVALID_DRIVER_BUFFER_HANDLE_VALUE != m_buffer_info.handle) {
+        auto status = m_driver.vdma_continuous_buffer_free(m_buffer_info);
         if (HAILO_SUCCESS != status) {
             LOGGER__ERROR("Failed free continuous buffer, {}", status);
         }
-
-        m_handle = 0;
     }
 }
 
 size_t ContinuousBuffer::size() const
 {
-    return m_size;
+    return m_buffer_info.size;
 }
 
 uint64_t ContinuousBuffer::dma_address() const
 {
-    return m_dma_address;
+    return m_buffer_info.dma_address;
 }
 
 uint16_t ContinuousBuffer::desc_page_size() const
@@ -82,25 +64,25 @@ uint16_t ContinuousBuffer::desc_page_size() const
 
 uint32_t ContinuousBuffer::descs_count() const
 {
-    return descriptors_in_buffer(m_size);
+    return descriptors_in_buffer(m_buffer_info.size);
 }
 
 hailo_status ContinuousBuffer::read(void *buf_dst, size_t count, size_t offset)
 {
-    CHECK((count + offset) <= m_size, HAILO_INSUFFICIENT_BUFFER,
-        "Requested size {} from offset {} is more than the buffer size {}", count, offset, m_size);
+    CHECK((count + offset) <= m_buffer_info.size, HAILO_INSUFFICIENT_BUFFER,
+        "Requested size {} from offset {} is more than the buffer size {}", count, offset, m_buffer_info.size);
     // We use dma coherent mmap, so no need to sync the buffer after the memcpy.
-    const auto src_address = reinterpret_cast<uint8_t*>(m_mmap.address()) + offset;
+    const auto src_address = reinterpret_cast<uint8_t*>(m_buffer_info.user_address) + offset;
     memcpy(buf_dst, src_address, count);
     return HAILO_SUCCESS;
 }
 
 hailo_status ContinuousBuffer::write(const void *buf_src, size_t count, size_t offset)
 {
-    CHECK((count + offset) <= m_size, HAILO_INSUFFICIENT_BUFFER,
-        "Requested size {} from offset {} is more than the buffer size {}", count, offset, m_size);
+    CHECK((count + offset) <= m_buffer_info.size, HAILO_INSUFFICIENT_BUFFER,
+        "Requested size {} from offset {} is more than the buffer size {}", count, offset, m_buffer_info.size);
     // We use dma coherent mmap, so no need to sync the buffer after the memcpy.
-    const auto dst_address = reinterpret_cast<uint8_t*>(m_mmap.address()) + offset;
+    const auto dst_address = reinterpret_cast<uint8_t*>(m_buffer_info.user_address) + offset;
     memcpy(dst_address, buf_src, count);
     return HAILO_SUCCESS;
 }
@@ -115,13 +97,9 @@ Expected<uint32_t> ContinuousBuffer::program_descriptors(size_t transfer_size, I
     return descriptors_in_buffer(transfer_size);
 }
 
-ContinuousBuffer::ContinuousBuffer(size_t size, HailoRTDriver &driver, uintptr_t handle, uint64_t dma_address,
-    MmapBuffer<void> &&mmap) :
-    m_size(size),
+ContinuousBuffer::ContinuousBuffer(HailoRTDriver &driver, const ContinousBufferInfo &buffer_info) :
     m_driver(driver),
-    m_handle(handle),
-    m_dma_address(dma_address),
-    m_mmap(std::move(mmap))
+    m_buffer_info(buffer_info)
 {}
 
 }; /* namespace vdma */
diff --git a/hailort/libhailort/src/vdma/memory/continuous_buffer.hpp b/hailort/libhailort/src/vdma/memory/continuous_buffer.hpp
index 98e9303..a4c109b 100644
--- a/hailort/libhailort/src/vdma/memory/continuous_buffer.hpp
+++ b/hailort/libhailort/src/vdma/memory/continuous_buffer.hpp
@@ -30,11 +30,9 @@ public:
 
     ContinuousBuffer(ContinuousBuffer &&other) noexcept :
         VdmaBuffer(std::move(other)),
-        m_size(other.m_size),
         m_driver(other.m_driver),
-        m_handle(std::exchange(other.m_handle, 0)),
-        m_dma_address(std::exchange(other.m_dma_address, 0)),
-        m_mmap(std::move(other.m_mmap))
+        m_buffer_info(std::exchange(other.m_buffer_info,
+            ContinousBufferInfo{HailoRTDriver::INVALID_DRIVER_BUFFER_HANDLE_VALUE, 0, 0, nullptr}))
     {}
 
     virtual Type type() const override
@@ -54,14 +52,11 @@ public:
         size_t desc_offset) override;
 
 private:
-    ContinuousBuffer(size_t size, HailoRTDriver &driver, uintptr_t handle, uint64_t dma_address,
-        MmapBuffer<void> &&mmap);
+    ContinuousBuffer(HailoRTDriver &driver, const ContinousBufferInfo &buffer_info);
 
-    const size_t m_size;
     HailoRTDriver &m_driver;
-    uintptr_t m_handle;
-    uint64_t m_dma_address;
-    MmapBuffer<void> m_mmap;
+
+    ContinousBufferInfo m_buffer_info;
 };
 
 }; /* namespace vdma */
diff --git a/hailort/libhailort/src/vdma/memory/dma_able_buffer.cpp b/hailort/libhailort/src/vdma/memory/dma_able_buffer.cpp
index 6b7d16b..a03b2e0 100644
--- a/hailort/libhailort/src/vdma/memory/dma_able_buffer.cpp
+++ b/hailort/libhailort/src/vdma/memory/dma_able_buffer.cpp
@@ -8,6 +8,7 @@
  *        See hpp for more information.
  **/
 
+#include "hailo/hailort_common.hpp"
 #include "dma_able_buffer.hpp"
 #include "common/os_utils.hpp"
 
@@ -30,9 +31,13 @@ class UserAllocatedDmaAbleBuffer : public DmaAbleBuffer {
 public:
     static Expected<DmaAbleBufferPtr> create(void *user_address, size_t size)
     {
-        CHECK_AS_EXPECTED(0 == (reinterpret_cast<size_t>(user_address) % OsUtils::get_page_size()),
-            HAILO_INVALID_ARGUMENT, "User address mapped as dma must be paged aligned (page size {})",
-            OsUtils::get_page_size());
+        CHECK_ARG_NOT_NULL_AS_EXPECTED(user_address);
+        CHECK_AS_EXPECTED(0 != size, HAILO_INVALID_ARGUMENT);
+
+        const auto dma_able_alignment = OsUtils::get_dma_able_alignment();
+
+        CHECK_AS_EXPECTED(0 == (reinterpret_cast<size_t>(user_address) % dma_able_alignment),
+            HAILO_INVALID_ARGUMENT, "User address mapped as dma must be aligned (alignment value {})", dma_able_alignment);
 
         auto buffer = make_shared_nothrow<UserAllocatedDmaAbleBuffer>(user_address, size);
         CHECK_NOT_NULL_AS_EXPECTED(buffer, HAILO_OUT_OF_HOST_MEMORY);
@@ -172,22 +177,23 @@ private:
     MmapBuffer<void> m_mmapped_buffer;
 };
 
-Expected<DmaAbleBufferPtr> DmaAbleBuffer::create(size_t size, void *user_address)
+Expected<DmaAbleBufferPtr> DmaAbleBuffer::create_from_user_address(void *user_address, size_t size)
 {
-    if (nullptr != user_address) {
-        return UserAllocatedDmaAbleBuffer::create(user_address, size);
-    } else {
-        return PageAlignedDmaAbleBuffer::create(size);
-    }
+    return UserAllocatedDmaAbleBuffer::create(user_address, size);
 }
 
-Expected<DmaAbleBufferPtr> DmaAbleBuffer::create(HailoRTDriver &driver, size_t size, void *user_address)
+Expected<DmaAbleBufferPtr> DmaAbleBuffer::create_by_allocation(size_t size)
 {
-    if ((nullptr == user_address) && driver.allocate_driver_buffer()) {
+    return PageAlignedDmaAbleBuffer::create(size);
+}
+
+Expected<DmaAbleBufferPtr> DmaAbleBuffer::create_by_allocation(size_t size, HailoRTDriver &driver)
+{
+    if (driver.allocate_driver_buffer()) {
         return DriverAllocatedDmaAbleBuffer::create(driver, size);
     } else {
         // The driver is not needed.
-        return create(size, user_address);
+        return create_by_allocation(size);
     }
 }
 
@@ -246,17 +252,23 @@ private:
     MmapBuffer<void> m_mmapped_buffer;
 };
 
-Expected<DmaAbleBufferPtr> DmaAbleBuffer::create(size_t size, void *user_address)
+Expected<DmaAbleBufferPtr> DmaAbleBuffer::create_from_user_address(void */* user_address */, size_t /* size */)
+{
+    LOGGER__ERROR("Mapping user address is not supported on QNX");
+
+    return make_unexpected(HAILO_NOT_SUPPORTED);
+}
+
+Expected<DmaAbleBufferPtr> DmaAbleBuffer::create_by_allocation(size_t size)
 {
-    CHECK_AS_EXPECTED(nullptr == user_address, HAILO_NOT_SUPPORTED, "Mapping user address is not supported on QNX");
     return SharedMemoryDmaAbleBuffer::create(size);
 }
 
-Expected<DmaAbleBufferPtr> DmaAbleBuffer::create(HailoRTDriver &driver, size_t size, void *user_address)
+Expected<DmaAbleBufferPtr> DmaAbleBuffer::create_by_allocation(size_t size, HailoRTDriver &driver)
 {
-    // qnx don't need the driver for the allocation
+    // qnx doesn't need the driver for the allocation
     (void)driver;
-    return DmaAbleBuffer::create(size, user_address);
+    return create_by_allocation(size);
 }
 
 #else
diff --git a/hailort/libhailort/src/vdma/memory/dma_able_buffer.hpp b/hailort/libhailort/src/vdma/memory/dma_able_buffer.hpp
index 66e1c70..0123e62 100644
--- a/hailort/libhailort/src/vdma/memory/dma_able_buffer.hpp
+++ b/hailort/libhailort/src/vdma/memory/dma_able_buffer.hpp
@@ -28,13 +28,18 @@ namespace vdma {
 class DmaAbleBuffer;
 using DmaAbleBufferPtr = std::shared_ptr<DmaAbleBuffer>;
 
-class DmaAbleBuffer {
+class DmaAbleBuffer
+{
 public:
-    // If user_address is not nullptr, allocation is not needed.
-    static Expected<DmaAbleBufferPtr> create(size_t size, void *user_address = nullptr);
+    // Create a DmaAbleBuffer from the user's provided address.
+    static Expected<DmaAbleBufferPtr> create_from_user_address(void *user_address, size_t size);
 
-    // The driver is used only if driver.allocate_driver_buffer is true, and that the user address is nullptr.
-    static Expected<DmaAbleBufferPtr> create(HailoRTDriver &driver, size_t size, void *user_address = nullptr);
+    // Create a DmaAbleBuffer by allocating memory.
+    static Expected<DmaAbleBufferPtr> create_by_allocation(size_t size);
+
+    // Create a DmaAbleBuffer by allocating memory, using the driver if needed (i.e.
+    // if driver.allocate_driver_buffer is true)
+    static Expected<DmaAbleBufferPtr> create_by_allocation(size_t size, HailoRTDriver &driver);
 
     DmaAbleBuffer() = default;
     DmaAbleBuffer(DmaAbleBuffer &&other) = delete;
diff --git a/hailort/libhailort/src/vdma/memory/mapped_buffer.cpp b/hailort/libhailort/src/vdma/memory/mapped_buffer.cpp
index 2a0d8eb..99812ab 100644
--- a/hailort/libhailort/src/vdma/memory/mapped_buffer.cpp
+++ b/hailort/libhailort/src/vdma/memory/mapped_buffer.cpp
@@ -15,50 +15,27 @@
 namespace hailort {
 namespace vdma {
 
-Expected<MappedBuffer> MappedBuffer::create(HailoRTDriver &driver,
-    std::shared_ptr<DmaAbleBuffer> buffer, HailoRTDriver::DmaDirection data_direction)
+Expected<MappedBufferPtr> MappedBuffer::create_shared(DmaAbleBufferPtr buffer, HailoRTDriver &driver,
+    HailoRTDriver::DmaDirection data_direction)
 {
     auto status = HAILO_UNINITIALIZED;
-    auto result = MappedBuffer(driver, buffer, data_direction, status);
+    auto result = make_shared_nothrow<MappedBuffer>(driver, buffer, data_direction, status);
     CHECK_SUCCESS_AS_EXPECTED(status);
-
-    return result;
-}
-
-Expected<MappedBufferPtr> MappedBuffer::create_shared(HailoRTDriver &driver, std::shared_ptr<DmaAbleBuffer> buffer,
-    HailoRTDriver::DmaDirection data_direction)
-{
-    auto dma_mapped_buffer = create(driver, buffer, data_direction);
-    CHECK_EXPECTED(dma_mapped_buffer);
-
-    auto result = make_shared_nothrow<MappedBuffer>(dma_mapped_buffer.release());
     CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY);
 
     return result;
 }
 
-Expected<MappedBuffer> MappedBuffer::create(HailoRTDriver &driver,
-    HailoRTDriver::DmaDirection data_direction, size_t size, void *user_address)
+Expected<MappedBufferPtr> MappedBuffer::create_shared_by_allocation(size_t size, HailoRTDriver &driver,
+    HailoRTDriver::DmaDirection data_direction)
 {
-    auto buffer = DmaAbleBuffer::create(driver, size, user_address);
+    auto buffer = DmaAbleBuffer::create_by_allocation(size, driver);
     CHECK_EXPECTED(buffer);
 
-    return create(driver, buffer.release(), data_direction);
-}
-
-Expected<MappedBufferPtr> MappedBuffer::create_shared(HailoRTDriver &driver,
-    HailoRTDriver::DmaDirection data_direction, size_t size, void *user_address)
-{
-    auto dma_mapped_buffer = create(driver, data_direction, size, user_address);
-    CHECK_EXPECTED(dma_mapped_buffer);
-
-    auto result = make_shared_nothrow<MappedBuffer>(dma_mapped_buffer.release());
-    CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY);
-
-    return result;
+    return create_shared(buffer.release(), driver, data_direction);
 }
 
-MappedBuffer::MappedBuffer(HailoRTDriver &driver, std::shared_ptr<DmaAbleBuffer> buffer,
+MappedBuffer::MappedBuffer(HailoRTDriver &driver, DmaAbleBufferPtr buffer,
                            HailoRTDriver::DmaDirection data_direction, hailo_status &status) :
     m_driver(driver),
     m_buffer(buffer),
diff --git a/hailort/libhailort/src/vdma/memory/mapped_buffer.hpp b/hailort/libhailort/src/vdma/memory/mapped_buffer.hpp
index 9c864aa..884c15e 100644
--- a/hailort/libhailort/src/vdma/memory/mapped_buffer.hpp
+++ b/hailort/libhailort/src/vdma/memory/mapped_buffer.hpp
@@ -38,20 +38,16 @@ using MappedBufferPtr = std::shared_ptr<MappedBuffer>;
 class MappedBuffer final
 {
 public:
-    // Maps the given DmaAbleBuffer in the right direction.
-    static Expected<MappedBuffer> create(HailoRTDriver &driver, std::shared_ptr<DmaAbleBuffer> buffer,
+    // Maps the given DmaAbleBuffer in 'data_direction'
+    static Expected<MappedBufferPtr> create_shared(DmaAbleBufferPtr buffer, HailoRTDriver &driver,
         HailoRTDriver::DmaDirection data_direction);
-    static Expected<MappedBufferPtr> create_shared(HailoRTDriver &driver, std::shared_ptr<DmaAbleBuffer> buffer,
-        HailoRTDriver::DmaDirection data_direction);
-
-    // If user_address is nullptr, a buffer of size 'size' will be allocated and mapped to dma in 'data_direction'
-    // Otherwise, the buffer pointed to by user_address will be mapped to dma in 'data_direction'
-    static Expected<MappedBuffer> create(HailoRTDriver &driver, HailoRTDriver::DmaDirection data_direction,
-        size_t size, void *user_address = nullptr);
-    static Expected<MappedBufferPtr> create_shared(HailoRTDriver &driver, HailoRTDriver::DmaDirection data_direction,
-        size_t size, void *user_address = nullptr);
 
+    // A DmaAbleBuffer of 'size' bytes will be allocated and mapped to dma in 'data_direction'
+    static Expected<MappedBufferPtr> create_shared_by_allocation(size_t size, HailoRTDriver &driver,
+        HailoRTDriver::DmaDirection data_direction);
 
+    MappedBuffer(HailoRTDriver &driver, DmaAbleBufferPtr buffer, HailoRTDriver::DmaDirection data_direction,
+        hailo_status &status);
     MappedBuffer(MappedBuffer &&other) noexcept;
     MappedBuffer(const MappedBuffer &other) = delete;
     MappedBuffer &operator=(const MappedBuffer &other) = delete;
@@ -92,11 +88,9 @@ public:
     hailo_status read_cyclic(void *buf_dst, size_t count, size_t offset, bool should_sync = true);
 
 private:
-    MappedBuffer(HailoRTDriver &driver, std::shared_ptr<DmaAbleBuffer> buffer, HailoRTDriver::DmaDirection data_direction,
-        hailo_status &status);
-
     HailoRTDriver &m_driver;
-    std::shared_ptr<DmaAbleBuffer> m_buffer;
+    // TODO: do we need to hold a DmaAbleBuffer here? (HRT-12389)
+    DmaAbleBufferPtr m_buffer;
     HailoRTDriver::VdmaBufferHandle m_mapping_handle;
     const HailoRTDriver::DmaDirection m_data_direction;
 };
diff --git a/hailort/libhailort/src/vdma/memory/mapping_manager.cpp b/hailort/libhailort/src/vdma/memory/mapping_manager.cpp
new file mode 100644
index 0000000..ba2a21d
--- /dev/null
+++ b/hailort/libhailort/src/vdma/memory/mapping_manager.cpp
@@ -0,0 +1,99 @@
+/**
+ * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+**/
+/**
+ * @file mapping_manager.cpp
+ * @brief DMA mapping registry on a given device
+ **/
+
+#include "mapping_manager.hpp"
+#include "hailo/hailort.h"
+
+namespace hailort {
+namespace vdma {
+
+MappingManager::MappingManager(HailoRTDriver &driver) :
+    m_driver(driver),
+    m_mutex(),
+    m_h2d_mappings(),
+    m_d2h_mappings()
+{}
+
+hailo_status MappingManager::map_buffer(void *address, size_t size, hailo_stream_direction_t direction)
+{
+    static const auto CREATE_DMAABLE_BUFFER = nullptr;
+    auto mapping_result = try_dma_map(CREATE_DMAABLE_BUFFER, address, size, direction);
+    CHECK_EXPECTED_AS_STATUS(mapping_result);
+
+    const auto new_mapping = mapping_result->second;
+    return new_mapping ? HAILO_SUCCESS : HAILO_DMA_MAPPING_ALREADY_EXISTS;
+}
+
+hailo_status MappingManager::unmap_buffer(void *address, hailo_stream_direction_t direction)
+{
+    auto &mappings = get_mapping_storage(direction);
+    std::lock_guard<std::mutex> lock_guard(m_mutex);
+    auto it = mappings.find(address);
+    if (it == mappings.end()) {
+        LOGGER__TRACE("Buffer {} not mapped in direction {}", address, direction);
+        return HAILO_NOT_FOUND;
+    }
+
+    mappings.erase(it);
+    return HAILO_SUCCESS;
+}
+
+Expected<std::pair<MappedBufferPtr, bool>> MappingManager::try_dma_map(DmaAbleBufferPtr buffer,
+    hailo_stream_direction_t direction)
+{
+    CHECK_ARG_NOT_NULL_AS_EXPECTED(buffer);
+
+    return try_dma_map(buffer, buffer->user_address(), buffer->size(), direction);
+}
+
+Expected<std::pair<MappedBufferPtr, bool>> MappingManager::try_dma_map(DmaAbleBufferPtr buffer,
+    void *address, size_t size, hailo_stream_direction_t direction)
+{
+    assert((nullptr == buffer) || ((buffer->user_address() == address) && (buffer->size() == size)));
+    CHECK_ARG_NOT_NULL_AS_EXPECTED(address);
+    CHECK_AS_EXPECTED(0 < size, HAILO_INVALID_ARGUMENT);
+    CHECK_AS_EXPECTED(HAILO_STREAM_DIRECTION_MAX_ENUM > direction, HAILO_INVALID_ARGUMENT);
+
+    auto &mappings = get_mapping_storage(direction);
+    std::lock_guard<std::mutex> lock_guard(m_mutex);
+    if (mappings.end() != mappings.find(address)) {
+        // Mapping exists
+        return std::make_pair(mappings[address], false);
+    }
+
+    // New mapping
+    if (nullptr == buffer) {
+        // We only want to create a dma-able buffer if the address hasn't been mapped and we haven't gotten
+        // a dma-able buffer from the user
+        auto buffer_exp = DmaAbleBuffer::create_from_user_address(address, size);
+        CHECK_EXPECTED(buffer_exp);
+        buffer = buffer_exp.release();
+    }
+
+    const auto data_direction = (direction == HAILO_H2D_STREAM) ?
+        HailoRTDriver::DmaDirection::H2D :
+        HailoRTDriver::DmaDirection::D2H;
+    auto mapped_buffer = MappedBuffer::create_shared(buffer, m_driver, data_direction);
+    CHECK_EXPECTED(mapped_buffer);
+
+    mappings[address] = mapped_buffer.release();
+
+    return std::make_pair(mappings[address], true);
+}
+
+std::unordered_map<void *, MappedBufferPtr> &MappingManager::get_mapping_storage(hailo_stream_direction_t direction)
+{
+    // No point in failing if direction is invalid (i.e. HAILO_STREAM_DIRECTION_MAX_ENUM),
+    // because the direction is checked before mappings are added (see try_dma_map). So an invalid direction
+    // will result in the mapping not being found
+    return (direction == HAILO_H2D_STREAM) ? m_h2d_mappings : m_d2h_mappings;
+}
+
+} /* namespace vdma */
+} /* namespace hailort */
diff --git a/hailort/libhailort/src/vdma/memory/mapping_manager.hpp b/hailort/libhailort/src/vdma/memory/mapping_manager.hpp
new file mode 100644
index 0000000..a211f9d
--- /dev/null
+++ b/hailort/libhailort/src/vdma/memory/mapping_manager.hpp
@@ -0,0 +1,54 @@
+/**
+ * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+**/
+/**
+ * @file mapping_manager.hpp
+ * @brief DMA mapping registry on a given device
+ **/
+
+#ifndef _HAILO_MAPPING_MANAGER_HPP_
+#define _HAILO_MAPPING_MANAGER_HPP_
+
+#include "hailo/hailort.h"
+#include "vdma/memory/mapped_buffer.hpp"
+#include "os/hailort_driver.hpp"
+
+#include <mutex>
+#include <unordered_map>
+#include <memory>
+
+namespace hailort {
+namespace vdma {
+
+class MappingManager final
+{
+public:
+    MappingManager(HailoRTDriver &driver);
+    MappingManager(MappingManager &&) = delete;
+    MappingManager(const MappingManager &) = delete;
+    MappingManager &operator=(MappingManager &&) = delete;
+    MappingManager &operator=(const MappingManager &) = delete;
+    ~MappingManager() = default;
+
+    hailo_status map_buffer(void *address, size_t size, hailo_stream_direction_t direction);
+    hailo_status unmap_buffer(void *address, hailo_stream_direction_t direction);
+    // Returns (MappedBufferPtr, true) if the mapping is new
+    // Returns (MappedBufferPtr, false) if the mapping is pre-existing
+    Expected<std::pair<MappedBufferPtr, bool>> try_dma_map(DmaAbleBufferPtr buffer, hailo_stream_direction_t direction);
+
+private:
+    inline std::unordered_map<void *, MappedBufferPtr> &get_mapping_storage(hailo_stream_direction_t direction);
+    Expected<std::pair<MappedBufferPtr, bool>> try_dma_map(DmaAbleBufferPtr buffer, void *address, size_t size,
+        hailo_stream_direction_t direction);
+
+    HailoRTDriver &m_driver;
+    std::mutex m_mutex;
+    std::unordered_map<void *, MappedBufferPtr> m_h2d_mappings;
+    std::unordered_map<void *, MappedBufferPtr> m_d2h_mappings;
+};
+
+} /* namespace vdma */
+} /* namespace hailort */
+
+#endif /* _HAILO_mapping_manager_HPP_ */
diff --git a/hailort/libhailort/src/vdma/memory/sg_buffer.cpp b/hailort/libhailort/src/vdma/memory/sg_buffer.cpp
index e85de25..5ef0132 100644
--- a/hailort/libhailort/src/vdma/memory/sg_buffer.cpp
+++ b/hailort/libhailort/src/vdma/memory/sg_buffer.cpp
@@ -22,7 +22,7 @@ Expected<SgBuffer> SgBuffer::create(HailoRTDriver &driver, size_t size, uint32_t
     CHECK_AS_EXPECTED((size % desc_page_size) == 0, HAILO_INTERNAL_FAILURE,
         "SgBuffer size must be a multiple of descriptors page size (size {})", size);
 
-    auto mapped_buffer = MappedBuffer::create_shared(driver, data_direction, size);
+    auto mapped_buffer = MappedBuffer::create_shared_by_allocation(size, driver, data_direction);
     CHECK_EXPECTED(mapped_buffer);
 
     auto desc_list_exp = DescriptorList::create(desc_count, desc_page_size, is_circular, driver);
diff --git a/hailort/libhailort/src/vdma/vdma_config_core_op.cpp b/hailort/libhailort/src/vdma/vdma_config_core_op.cpp
index e400744..1e363b1 100644
--- a/hailort/libhailort/src/vdma/vdma_config_core_op.cpp
+++ b/hailort/libhailort/src/vdma/vdma_config_core_op.cpp
@@ -29,6 +29,30 @@ VdmaConfigCoreOp::VdmaConfigCoreOp(ActiveCoreOpHolder &active_core_op_holder,
         m_resources_manager(std::move(resources_manager))
 {}
 
+
+hailo_status VdmaConfigCoreOp::cancel_pending_transfers()
+{
+    // Best effort
+    auto status = HAILO_SUCCESS;
+    auto deactivate_status = HAILO_UNINITIALIZED;
+    for (const auto &name_pair : m_input_streams) {
+        deactivate_status = name_pair.second->cancel_pending_transfers();
+        if (HAILO_SUCCESS != deactivate_status) {
+            LOGGER__ERROR("Failed to cancel pending transfers for input stream {}", name_pair.first);
+            status = deactivate_status;
+        }
+    }
+    for (const auto &name_pair : m_output_streams) {
+        deactivate_status = name_pair.second->cancel_pending_transfers();
+        if (HAILO_SUCCESS != deactivate_status) {
+            LOGGER__ERROR("Failed to cancel pending transfers for output stream {}", name_pair.first);
+            status = deactivate_status;
+        }
+    }
+
+    return status;
+}
+
 hailo_status VdmaConfigCoreOp::activate_impl(uint16_t dynamic_batch_size)
 {
     auto status = HAILO_UNINITIALIZED;
@@ -69,12 +93,33 @@ hailo_status VdmaConfigCoreOp::deactivate_impl()
 
     // After the state machine has been reset the vdma channels are no longer active, so we
     // can cancel pending transfers, thus allowing vdma buffers linked to said transfers to be freed
-    status = m_resources_manager->cancel_pending_transfers();
+    status = cancel_pending_transfers();
     CHECK_SUCCESS(status, "Failed to cancel pending transfers");
 
     return HAILO_SUCCESS;
 }
 
+hailo_status VdmaConfigCoreOp::shutdown()
+{
+    hailo_status status = HAILO_SUCCESS; // Success oriented
+
+    auto abort_status = abort_low_level_streams();
+    if (HAILO_SUCCESS != abort_status) {
+        LOGGER__ERROR("Failed abort low level streams {}", abort_status);
+        status = abort_status;
+    }
+
+    // On VdmaConfigCoreOp, shutdown is the same as deactivate. In the future, we can release the resources inside
+    // the resource manager and free space in the firmware SRAM
+    auto deactivate_status = deactivate_impl();
+    if (HAILO_SUCCESS != deactivate_status) {
+        LOGGER__ERROR("Failed deactivate core op with status {}", deactivate_status);
+        status = deactivate_status;
+    }
+
+    return status;
+}
+
 hailo_status VdmaConfigCoreOp::deactivate_host_resources()
 {
     auto status = deactivate_low_level_streams();
diff --git a/hailort/libhailort/src/vdma/vdma_config_core_op.hpp b/hailort/libhailort/src/vdma/vdma_config_core_op.hpp
index bb6aa1f..f923e09 100644
--- a/hailort/libhailort/src/vdma/vdma_config_core_op.hpp
+++ b/hailort/libhailort/src/vdma/vdma_config_core_op.hpp
@@ -48,8 +48,10 @@ public:
     virtual hailo_status activate_impl(uint16_t dynamic_batch_size) override;
     // Will first deactivate host resources (via deactivate_host_resources) and then reset the core-op on the fw
     virtual hailo_status deactivate_impl() override;
+    virtual hailo_status shutdown() override;
     // Deactivate all resources related to the core-op on the host, but without resetting the core-op on the fw
     hailo_status deactivate_host_resources();
+    hailo_status cancel_pending_transfers();
 
     virtual Expected<hailo_stream_interface_t> get_default_streams_interface() override;
 
diff --git a/hailort/libhailort/src/vdma/vdma_config_manager.cpp b/hailort/libhailort/src/vdma/vdma_config_manager.cpp
index 0f19696..73c650f 100644
--- a/hailort/libhailort/src/vdma/vdma_config_manager.cpp
+++ b/hailort/libhailort/src/vdma/vdma_config_manager.cpp
@@ -47,7 +47,7 @@ hailo_status VdmaConfigManager::switch_core_op(std::shared_ptr<VdmaConfigCoreOp>
         CHECK_SUCCESS(status, "Failed activating next core-op");
 
         // Current core-op is now deactivated (we are not on batch switch), so we can cancel pending transfers.
-        status = current_active_core_op->get_resources_manager()->cancel_pending_transfers();
+        status = current_active_core_op->cancel_pending_transfers();
         CHECK_SUCCESS(status, "Failed canceling pending transfers from previous core-op");
     }
 
diff --git a/hailort/libhailort/src/vdma/vdma_device.cpp b/hailort/libhailort/src/vdma/vdma_device.cpp
index 3eb774f..c06c3b0 100644
--- a/hailort/libhailort/src/vdma/vdma_device.cpp
+++ b/hailort/libhailort/src/vdma/vdma_device.cpp
@@ -11,6 +11,7 @@
 
 #include "vdma/vdma_device.hpp"
 #include "vdma/memory/descriptor_list.hpp"
+#include "vdma/memory/mapping_manager.hpp"
 #include "vdma/vdma_config_manager.hpp"
 #include "vdma/pcie/pcie_device.hpp"
 #include "vdma/integrated/integrated_device.hpp"
@@ -33,7 +34,9 @@ static constexpr std::chrono::milliseconds DEFAULT_TIMEOUT(50000);
 
 VdmaDevice::VdmaDevice(std::unique_ptr<HailoRTDriver> &&driver, Device::Type type) :
     DeviceBase::DeviceBase(type),
-    m_driver(std::move(driver)), m_is_configured(false)
+    m_driver(std::move(driver)),
+    m_mapping_manager(*m_driver),
+    m_is_configured(false)
 {
     activate_notifications(get_dev_id());
 }
@@ -247,6 +250,22 @@ VdmaDevice::~VdmaDevice()
     }
 }
 
+hailo_status VdmaDevice::dma_map(void *address, size_t size, hailo_stream_direction_t direction)
+{
+    return m_mapping_manager.map_buffer(address, size, direction);
+}
+
+hailo_status VdmaDevice::dma_unmap(void *address, hailo_stream_direction_t direction)
+{
+    return m_mapping_manager.unmap_buffer(address, direction);
+}
+
+Expected<std::pair<vdma::MappedBufferPtr, bool>> VdmaDevice::try_dma_map(vdma::DmaAbleBufferPtr buffer,
+    hailo_stream_direction_t direction)
+{
+    return m_mapping_manager.try_dma_map(buffer, direction);
+}
+
 Expected<ConfiguredNetworkGroupVector> VdmaDevice::create_networks_group_vector(Hef &hef, const NetworkGroupsParamsMap &configure_params)
 {
     auto partial_clusters_layout_bitmap_exp = Control::get_partial_clusters_layout_bitmap(*this);
diff --git a/hailort/libhailort/src/vdma/vdma_device.hpp b/hailort/libhailort/src/vdma/vdma_device.hpp
index 7a4baba..30dc64e 100644
--- a/hailort/libhailort/src/vdma/vdma_device.hpp
+++ b/hailort/libhailort/src/vdma/vdma_device.hpp
@@ -16,8 +16,9 @@
 
 #include "device_common/device_internal.hpp"
 #include "network_group/network_group_internal.hpp"
-#include "os/hailort_driver.hpp"
 #include "vdma/channel/interrupts_dispatcher.hpp"
+#include "vdma/memory/mapping_manager.hpp"
+#include "os/hailort_driver.hpp"
 
 
 namespace hailort
@@ -48,6 +49,11 @@ public:
 
     ExpectedRef<vdma::InterruptsDispatcher> get_vdma_interrupts_dispatcher();
 
+    virtual hailo_status dma_map(void *address, size_t size, hailo_stream_direction_t direction) override;
+    virtual hailo_status dma_unmap(void *address, hailo_stream_direction_t direction) override;
+    virtual Expected<std::pair<vdma::MappedBufferPtr, bool>> try_dma_map(vdma::DmaAbleBufferPtr buffer,
+        hailo_stream_direction_t direction) override;
+
 protected:
     VdmaDevice(std::unique_ptr<HailoRTDriver> &&driver, Type type);
 
@@ -57,7 +63,10 @@ protected:
         uint8_t *response_buffer, size_t *response_size, hailo_cpu_id_t cpu_id) override;
     virtual Expected<ConfiguredNetworkGroupVector> add_hef(Hef &hef, const NetworkGroupsParamsMap &configure_params) override;
 
+    // Initialization dependency: MappingManager holds dma mappings for all buffers relative to this device!
+    // (CoreOp for example holds streams with mapped buffers)
     std::unique_ptr<HailoRTDriver> m_driver;
+    vdma::MappingManager m_mapping_manager;
     std::vector<std::shared_ptr<CoreOp>> m_core_ops;
     std::vector<std::shared_ptr<ConfiguredNetworkGroup>> m_network_groups; // TODO: HRT-9547 - Remove when ConfiguredNetworkGroup will be kept in global context
 
diff --git a/hailort/libhailort/src/vdma/vdma_stream.cpp b/hailort/libhailort/src/vdma/vdma_stream.cpp
index 6709ee2..6379ead 100644
--- a/hailort/libhailort/src/vdma/vdma_stream.cpp
+++ b/hailort/libhailort/src/vdma/vdma_stream.cpp
@@ -11,6 +11,7 @@
 #include "vdma/vdma_stream.hpp"
 #include "vdma/circular_stream_buffer_pool.hpp"
 #include "utils/profiler/tracer_macros.hpp"
+#include "common/os_utils.hpp"
 
 
 namespace hailort
@@ -31,13 +32,39 @@ Expected<std::shared_ptr<VdmaInputStream>> VdmaInputStream::create(hailo_stream_
     return result;
 }
 
+std::unique_ptr<StreamBufferPool> VdmaInputStream::init_dma_bounce_buffer_pool(
+    vdma::BoundaryChannelPtr channel, const LayerInfo &edge_layer, hailo_status &status)
+{
+    const auto dma_able_alignment = OsUtils::get_dma_able_alignment();
+    const auto dma_bounce_buffer_pool_size = channel->get_max_ongoing_transfers(
+        LayerInfoUtils::get_layer_transfer_size(edge_layer));
+
+    // Checking status for base class c'tor
+    if (HAILO_SUCCESS != status) {
+        return nullptr;
+    }
+
+    // Initialize dma buffer pool for support for non-aligned user buffers
+    auto dma_queued_pool = QueuedStreamBufferPool::create(dma_bounce_buffer_pool_size, dma_able_alignment,
+        BufferStorageParams::create_dma());
+    if (dma_queued_pool.status() != HAILO_SUCCESS) {
+        LOGGER__ERROR("Failed creating DMA bounce buffer pool with status {}", dma_queued_pool.status());
+        status = dma_queued_pool.status();
+        return nullptr;
+    }
+
+    return std::unique_ptr<StreamBufferPool>(dma_queued_pool.release());
+}
+
 VdmaInputStream::VdmaInputStream(VdmaDevice &device, vdma::BoundaryChannelPtr channel,
                                  const LayerInfo &edge_layer, EventPtr core_op_activated_event,
                                  hailo_stream_interface_t stream_interface, hailo_status &status) :
-    AsyncInputStreamBase(edge_layer, stream_interface, std::move(core_op_activated_event), status),
+    AsyncInputStreamBase(edge_layer, std::move(core_op_activated_event), status),
     m_device(device),
+    m_dma_bounce_buffer_pool(init_dma_bounce_buffer_pool(channel, edge_layer, status)),
     m_channel(std::move(channel)),
-    m_interface(stream_interface)
+    m_interface(stream_interface),
+    m_core_op_handle(INVALID_CORE_OP_HANDLE)
 {
     // Checking status for base class c'tor
     if (HAILO_SUCCESS != status) {
@@ -62,11 +89,6 @@ hailo_stream_interface_t VdmaInputStream::get_interface() const
     return m_interface;
 }
 
-vdevice_core_op_handle_t VdmaInputStream::get_vdevice_core_op_handle()
-{
-    return m_core_op_handle;
-}
-
 void VdmaInputStream::set_vdevice_core_op_handle(vdevice_core_op_handle_t core_op_handle)
 {
     m_core_op_handle = core_op_handle;
@@ -74,7 +96,7 @@ void VdmaInputStream::set_vdevice_core_op_handle(vdevice_core_op_handle_t core_o
 
 Expected<std::unique_ptr<StreamBufferPool>> VdmaInputStream::allocate_buffer_pool()
 {
-    auto circular_pool = CircularStreamBufferPool::create(m_device.get_driver(), HailoRTDriver::DmaDirection::H2D,
+    auto circular_pool = CircularStreamBufferPool::create(m_device, HailoRTDriver::DmaDirection::H2D,
         m_channel->get_desc_list()->desc_page_size(), m_channel->get_desc_list()->count(), get_frame_size());
     CHECK_EXPECTED(circular_pool);
 
@@ -86,11 +108,73 @@ size_t VdmaInputStream::get_max_ongoing_transfers() const
     return m_channel->get_max_ongoing_transfers(get_frame_size());
 }
 
+Expected<TransferRequest> VdmaInputStream::align_transfer_request(TransferRequest &&transfer_request)
+{
+    const auto dma_alignment = OsUtils::get_dma_able_alignment();
+    std::vector<TransferBuffer> transfer_buffers;
+    TransferBuffer dma_able_bounce_buffer;
+    const auto buffer_address = transfer_request.transfer_buffers[0].base_buffer()->data();
+    const auto buffer_size = transfer_request.transfer_buffers[0].size();
+
+    {
+        std::unique_lock<std::mutex> lock(m_dma_pool_mutex);
+        // Initialize dma able bounce buffer the size of alignment size to read pre alignment data
+        auto dma_able_bounce_buffer_exp = m_dma_bounce_buffer_pool->dequeue();
+        CHECK_EXPECTED(dma_able_bounce_buffer_exp);
+        dma_able_bounce_buffer = dma_able_bounce_buffer_exp.release();
+    }
+
+    // If buffer size is larger than alignment size - will create bounce buffer for non aligned buffer part and then use
+    // User's buffer from aligned address - otherwise will create bounce buffer size of user buffer and copy whole frame
+    if (buffer_size > dma_alignment) {
+        transfer_buffers.reserve(2);
+
+        // Get first aligned address in user buffer
+        const auto aligned_user_buffer_addr = HailoRTCommon::align_to(reinterpret_cast<size_t>(buffer_address), dma_alignment);
+        const auto bounce_buffer_exact_size = aligned_user_buffer_addr - reinterpret_cast<size_t>(buffer_address);
+        const auto user_buffer_size = buffer_size - bounce_buffer_exact_size;
+
+        // Create another transfer buffer with same base address but exact size for actual transfer
+        auto dma_able_exact_bounce_buffer = TransferBuffer(dma_able_bounce_buffer.base_buffer(), bounce_buffer_exact_size, 0);
+        memcpy((dma_able_exact_bounce_buffer.base_buffer())->data(), buffer_address, bounce_buffer_exact_size);
+        transfer_buffers.emplace_back(dma_able_exact_bounce_buffer);
+
+        auto dma_able_user_buffer = DmaStorage::create_dma_able_buffer_from_user_size(
+            reinterpret_cast<uint8_t*>(aligned_user_buffer_addr), user_buffer_size);
+        CHECK_EXPECTED(dma_able_user_buffer);
+        transfer_buffers.emplace_back(dma_able_user_buffer.release());
+    } else {
+        auto dma_able_exact_bounce_buffer = TransferBuffer(dma_able_bounce_buffer.base_buffer(), buffer_size, 0);
+        memcpy((dma_able_exact_bounce_buffer.base_buffer())->data(), buffer_address, buffer_size);
+        transfer_buffers.emplace_back(dma_able_exact_bounce_buffer);
+    }
+
+    auto wrapped_callback = [user_callback=transfer_request.callback, dma_able_bounce_buffer, this](hailo_status callback_status) {
+        {
+            std::unique_lock<std::mutex> lock(m_dma_pool_mutex);
+            m_dma_bounce_buffer_pool->enqueue(TransferBuffer{dma_able_bounce_buffer});
+        }
+        user_callback(callback_status);
+    };
+
+    return TransferRequest(std::move(transfer_buffers), wrapped_callback);
+}
+
 hailo_status VdmaInputStream::write_async_impl(TransferRequest &&transfer_request)
 {
-    TRACE(InputVdmaDequeueTrace, m_device.get_dev_id(), m_core_op_handle, name());
+    TRACE(FrameDequeueH2DTrace, m_device.get_dev_id(), m_core_op_handle, name());
     const auto user_owns_buffer = (buffer_mode() == StreamBufferMode::NOT_OWNING);
-    return m_channel->launch_transfer(std::move(transfer_request), user_owns_buffer);
+
+    const auto dma_able_alignment = OsUtils::get_dma_able_alignment();
+    if (reinterpret_cast<size_t>(transfer_request.transfer_buffers[0].base_buffer()->data()) % dma_able_alignment == 0) {
+        return m_channel->launch_transfer(std::move(transfer_request), user_owns_buffer);
+    } else {
+        auto unaligned_transfer_request = align_transfer_request(std::move(transfer_request));
+        CHECK_EXPECTED_AS_STATUS(unaligned_transfer_request);
+        return m_channel->launch_transfer(unaligned_transfer_request.release(), user_owns_buffer);
+    }
+
+    return HAILO_INTERNAL_FAILURE;
 }
 
 hailo_status VdmaInputStream::activate_stream_impl()
@@ -103,6 +187,13 @@ hailo_status VdmaInputStream::deactivate_stream_impl()
     return m_channel->deactivate();
 }
 
+hailo_status VdmaInputStream::cancel_pending_transfers()
+{
+    m_channel->cancel_pending_transfers();
+
+    return HAILO_SUCCESS;
+}
+
 /** Output stream **/
 Expected<std::shared_ptr<VdmaOutputStream>> VdmaOutputStream::create(hailo_stream_interface_t interface,
     VdmaDevice &device, vdma::BoundaryChannelPtr channel, const LayerInfo &edge_layer,
@@ -123,11 +214,12 @@ VdmaOutputStream::VdmaOutputStream(VdmaDevice &device, vdma::BoundaryChannelPtr
                                    EventPtr core_op_activated_event,
                                    hailo_stream_interface_t interface,
                                    hailo_status &status) :
-    AsyncOutputStreamBase(edge_layer, interface, std::move(core_op_activated_event), status),
+    AsyncOutputStreamBase(edge_layer, std::move(core_op_activated_event), status),
     m_device(device),
     m_channel(std::move(channel)),
     m_interface(interface),
-    m_transfer_size(get_transfer_size(m_stream_info, get_layer_info()))
+    m_transfer_size(get_transfer_size(m_stream_info, get_layer_info())),
+    m_core_op_handle(INVALID_CORE_OP_HANDLE)
 {
     // Check status for base class c'tor
     if (HAILO_SUCCESS != status) {
@@ -154,7 +246,7 @@ hailo_stream_interface_t VdmaOutputStream::get_interface() const
 
 Expected<std::unique_ptr<StreamBufferPool>> VdmaOutputStream::allocate_buffer_pool()
 {
-    auto circular_pool = CircularStreamBufferPool::create(m_device.get_driver(), HailoRTDriver::DmaDirection::D2H,
+    auto circular_pool = CircularStreamBufferPool::create(m_device, HailoRTDriver::DmaDirection::D2H,
         m_channel->get_desc_list()->desc_page_size(), m_channel->get_desc_list()->count(), m_transfer_size);
     CHECK_EXPECTED(circular_pool);
 
@@ -166,10 +258,46 @@ size_t VdmaOutputStream::get_max_ongoing_transfers() const
     return m_channel->get_max_ongoing_transfers(m_transfer_size);
 }
 
+Expected<TransferRequest> VdmaOutputStream::align_transfer_request(TransferRequest &&transfer_request)
+{
+    auto aligned_bounce_buffer_exp = DmaStorage::create_dma_able_buffer_from_user_size(nullptr,
+        transfer_request.transfer_buffers[0].size());
+    CHECK_EXPECTED(aligned_bounce_buffer_exp);
+    auto aligned_bounce_buffer = aligned_bounce_buffer_exp.release();
+
+    auto wrapped_callback = [unaligned_user_buffer = transfer_request.transfer_buffers[0].base_buffer(),
+            aligned_bounce_buffer, user_callback = transfer_request.callback](hailo_status callback_status) {
+        memcpy(const_cast<uint8_t*>(unaligned_user_buffer->data()), aligned_bounce_buffer->data(), unaligned_user_buffer->size());
+        user_callback(callback_status);
+    };
+
+    return TransferRequest(std::move(aligned_bounce_buffer), wrapped_callback);
+}
+
 hailo_status VdmaOutputStream::read_async_impl(TransferRequest &&transfer_request)
 {
+    if ((INVALID_CORE_OP_HANDLE != m_core_op_handle) && (HAILO_FORMAT_ORDER_HAILO_NMS != m_stream_info.format.order)) {
+        // On NMS stream we trace EnqueueD2H inside nms_stream
+        transfer_request.callback = [original_callback=transfer_request.callback, this](hailo_status status) {
+            if (HAILO_SUCCESS == status) {
+                TRACE(FrameEnqueueD2HTrace, m_device.get_dev_id(), m_core_op_handle, name());
+            }
+            original_callback(status);
+        };
+    }
     const auto user_owns_buffer = (buffer_mode() == StreamBufferMode::NOT_OWNING);
-    return m_channel->launch_transfer(std::move(transfer_request), user_owns_buffer);
+    const auto dma_able_alignment = OsUtils::get_dma_able_alignment();
+    if (reinterpret_cast<size_t>(transfer_request.transfer_buffers[0].base_buffer()->data()) % dma_able_alignment == 0) {
+        return m_channel->launch_transfer(std::move(transfer_request), user_owns_buffer);
+    } else {
+        // In case of read unaligned - currently doesnt support using users buffer - so allocate complete new buffer size of user's buffer
+        LOGGER__WARNING("read_async() was provided an unaligned buffer (address=0x{:x}), which causes performance degradation. Use buffers algined to {} bytes for optimal performance",
+            reinterpret_cast<size_t>(transfer_request.transfer_buffers[0].base_buffer()->data()), dma_able_alignment);
+
+        auto realigned_transfer_request = align_transfer_request(std::move(transfer_request));
+        CHECK_EXPECTED_AS_STATUS(realigned_transfer_request);
+        return m_channel->launch_transfer(realigned_transfer_request.release(), user_owns_buffer);
+    }
 }
 
 hailo_status VdmaOutputStream::activate_stream_impl()
@@ -182,9 +310,21 @@ hailo_status VdmaOutputStream::deactivate_stream_impl()
     return m_channel->deactivate();
 }
 
+void VdmaOutputStream::set_vdevice_core_op_handle(vdevice_core_op_handle_t core_op_handle)
+{
+    m_core_op_handle = core_op_handle;
+}
+
 uint32_t VdmaOutputStream::get_transfer_size(const hailo_stream_info_t &stream_info, const LayerInfo &layer_info)
 {
     return LayerInfoUtils::get_stream_transfer_size(stream_info, layer_info);
 }
 
+hailo_status VdmaOutputStream::cancel_pending_transfers()
+{
+    m_channel->cancel_pending_transfers();
+
+    return HAILO_SUCCESS;
+}
+
 } /* namespace hailort */
diff --git a/hailort/libhailort/src/vdma/vdma_stream.hpp b/hailort/libhailort/src/vdma/vdma_stream.hpp
index 814925a..c2203a4 100644
--- a/hailort/libhailort/src/vdma/vdma_stream.hpp
+++ b/hailort/libhailort/src/vdma/vdma_stream.hpp
@@ -32,10 +32,8 @@ public:
     virtual ~VdmaInputStream();
 
     virtual hailo_stream_interface_t get_interface() const override;
-
-    virtual vdevice_core_op_handle_t get_vdevice_core_op_handle() override;
-
     virtual void set_vdevice_core_op_handle(vdevice_core_op_handle_t core_op_handle) override;
+    virtual hailo_status cancel_pending_transfers() override;
 
 private:
     Expected<std::unique_ptr<StreamBufferPool>> allocate_buffer_pool() override;
@@ -44,7 +42,17 @@ private:
     virtual hailo_status activate_stream_impl() override;
     virtual hailo_status deactivate_stream_impl() override;
 
+    static std::unique_ptr<StreamBufferPool> init_dma_bounce_buffer_pool(vdma::BoundaryChannelPtr channel,
+        const LayerInfo &edge_layer, hailo_status &status);
+    Expected<TransferRequest> align_transfer_request(TransferRequest &&transfer_request);
+
     VdmaDevice &m_device;
+
+    // Buffer pool for DMA able bounce buffers
+    // TODO HRT-12542- create new class for bounce buffers
+    std::mutex m_dma_pool_mutex;
+    std::unique_ptr<StreamBufferPool> m_dma_bounce_buffer_pool;
+
     vdma::BoundaryChannelPtr m_channel;
     const hailo_stream_interface_t m_interface;
     vdevice_core_op_handle_t m_core_op_handle;
@@ -69,13 +77,23 @@ public:
     virtual hailo_status read_async_impl(TransferRequest &&transfer_request) override;
     virtual hailo_status activate_stream_impl() override;
     virtual hailo_status deactivate_stream_impl() override;
+
+    virtual void set_vdevice_core_op_handle(vdevice_core_op_handle_t core_op_handle) override;
+
+    virtual inline const char *get_device_id() override { return m_device.get_dev_id(); };
+    // TODO - HRT-11739 - remove vdevice related members/functions (get/set_vdevice_core_op_handle)
+    virtual inline vdevice_core_op_handle_t get_vdevice_core_op_handle() override { return m_core_op_handle; };
+    virtual hailo_status cancel_pending_transfers() override;
+
 private:
     static uint32_t get_transfer_size(const hailo_stream_info_t &stream_info, const LayerInfo &layer_info);
+    Expected<TransferRequest> align_transfer_request(TransferRequest &&transfer_request);
 
     VdmaDevice &m_device;
     vdma::BoundaryChannelPtr m_channel;
     const hailo_stream_interface_t m_interface;
     const uint32_t m_transfer_size;
+    vdevice_core_op_handle_t m_core_op_handle;
 };
 
 
diff --git a/hailort/libhailort/tracer_profiler.proto b/hailort/libhailort/tracer_profiler.proto
index 7c5330b..5b7e37b 100644
--- a/hailort/libhailort/tracer_profiler.proto
+++ b/hailort/libhailort/tracer_profiler.proto
@@ -57,6 +57,7 @@ message ProtoProfilerAddCoreOpTrace {
     uint64 time_stamp = 1; // nanosec
     int32 core_op_handle = 2;
     string core_op_name = 3;
+    uint32 max_batch_size = 4;
 }
 
 // Frame dequeue means transferring a frame from the buffer to device
diff --git a/hailort/pre_build/CMakeLists.txt b/hailort/pre_build/CMakeLists.txt
deleted file mode 100644
index 9c32415..0000000
--- a/hailort/pre_build/CMakeLists.txt
+++ /dev/null
@@ -1,20 +0,0 @@
-cmake_minimum_required(VERSION 3.0.0)
-project(hailort_prebuild)
-
-if(NOT HAILO_OFFLINE_COMPILATION)
-    set(HAILO_PRE_BUILD_EXTERNAL_DIR ${CMAKE_CURRENT_LIST_DIR}/external)
-    include(../cmake/execute_cmake.cmake)
-    message("Downloading dependencies to ${HAILO_EXTERNAL_DIR} ...")
-    execute_cmake(
-        SOURCE_DIR ${HAILO_PRE_BUILD_EXTERNAL_DIR}
-        BUILD_DIR ${HAILO_PRE_BUILD_EXTERNAL_DIR}/build
-        CONFIGURE_ARGS
-            -DHAILO_EXTERNAL_DIR=${HAILO_EXTERNAL_DIR}
-            -DHAILO_BUILD_SERVICE=${HAILO_BUILD_SERVICE}
-    )
-    message("Finished downloading dependencies")
-else()
-    message("Offline compilation, skipped dependencies download")
-endif()
-
-add_subdirectory(tools)
\ No newline at end of file
diff --git a/hailort/pre_build/external/CMakeLists.txt b/hailort/pre_build/external/CMakeLists.txt
deleted file mode 100644
index 66c5f52..0000000
--- a/hailort/pre_build/external/CMakeLists.txt
+++ /dev/null
@@ -1,35 +0,0 @@
-cmake_minimum_required(VERSION 3.0.0)
-project(hailort_prebuild_external)
-
-include(ExternalProject)
-
-function(git_clone proj repo tag)
-    ExternalProject_Add(git_clone_${proj}
-        GIT_REPOSITORY  ${repo}
-        GIT_TAG         ${tag}
-        # GIT_SHALLOW     TRUE
-        SOURCE_DIR      ${HAILO_EXTERNAL_DIR}/${proj}
-        CONFIGURE_COMMAND ""
-        BUILD_COMMAND ""
-        INSTALL_COMMAND ""
-        TEST_COMMAND ""
-    )
-endfunction()
-
-include(${CMAKE_CURRENT_LIST_DIR}/../../cmake/external/pybind11.cmake)
-include(${CMAKE_CURRENT_LIST_DIR}/../../cmake/external/catch2.cmake)
-include(${CMAKE_CURRENT_LIST_DIR}/../../cmake/external/spdlog.cmake)
-include(${CMAKE_CURRENT_LIST_DIR}/../../cmake/external/json.cmake)
-include(${CMAKE_CURRENT_LIST_DIR}/../../cmake/external/dotwriter.cmake)
-include(${CMAKE_CURRENT_LIST_DIR}/../../cmake/external/benchmark.cmake)
-include(${CMAKE_CURRENT_LIST_DIR}/../../cmake/external/readerwriterqueue.cmake)
-if(CMAKE_SYSTEM_NAME STREQUAL QNX)
-    include(${CMAKE_CURRENT_LIST_DIR}/../../cmake/external/pevents.cmake)
-endif()
-
-git_clone(CLI11             https://github.com/hailo-ai/CLI11.git                                   f1644f15f219303b7ad670732c21018a1e6f0e11)
-git_clone(protobuf          https://github.com/protocolbuffers/protobuf.git                         22d0e265de7d2b3d2e9a00d071313502e7d4cccf)
-
-if(HAILO_BUILD_SERVICE)
-    git_clone(grpc              https://github.com/grpc/grpc                                            53d69cc581c5b7305708587f4f1939278477c28a)
-endif()
diff --git a/hailort/pre_build/tools/CMakeLists.txt b/hailort/pre_build/tools/CMakeLists.txt
deleted file mode 100644
index 2254251..0000000
--- a/hailort/pre_build/tools/CMakeLists.txt
+++ /dev/null
@@ -1,28 +0,0 @@
-cmake_minimum_required(VERSION 3.0.0)
-
-message(STATUS "Building protobuf::protoc...")
-if(NOT protobuf_BUILD_TESTS)
-    set(protobuf_BUILD_TESTS OFF CACHE BOOL "Build protobuf tests")
-endif()
-if(MSVC AND NOT protobuf_MSVC_STATIC_RUNTIME)
-    set(protobuf_MSVC_STATIC_RUNTIME OFF CACHE BOOL "Protobuf MSVC static runtime")
-endif()
-if(NOT protobuf_WITH_ZLIB)
-    set(protobuf_WITH_ZLIB OFF CACHE BOOL "Compile protobuf with zlib")
-endif()
-add_subdirectory(${HAILO_EXTERNAL_DIR}/protobuf/cmake build_protoc)
-
-if(HAILO_BUILD_SERVICE)
-    message(STATUS "Building grpc...")
-    # The following is an awful hack needed in order to force grpc to use our libprotobuf+liborotoc targets
-    # ('formal' options are to let grpc recompile it which causes a name conflict,
-    # or let it use find_package and take the risk it will use a different installed lib)
-    set(gRPC_PROTOBUF_PROVIDER "hack" CACHE STRING "Provider of protobuf library")
-    # Avoiding "formal" gRPC_PROTOBUF_PROVIDER option, the following variables should be set independently
-    set(_gRPC_PROTOBUF_LIBRARIES protobuf::libprotobuf)
-    set(_gRPC_PROTOBUF_PROTOC_LIBRARIES protobuf::libprotoc)
-    # Build grpc_cpp_plugin target only
-    add_subdirectory(${HAILO_EXTERNAL_DIR}/grpc build_grpc EXCLUDE_FROM_ALL)
-    add_custom_target(grpc_cpp_plugin_target ALL)
-    add_dependencies(grpc_cpp_plugin_target grpc_cpp_plugin)
-endif()
\ No newline at end of file
diff --git a/hailort/prepare_externals.cmake b/hailort/prepare_externals.cmake
index 399cde2..7d3fdbb 100644
--- a/hailort/prepare_externals.cmake
+++ b/hailort/prepare_externals.cmake
@@ -1,22 +1,11 @@
 cmake_minimum_required(VERSION 3.0.0)
 
-message(STATUS "Building pre_build")
-
-include(${CMAKE_CURRENT_LIST_DIR}/cmake/execute_cmake.cmake)
-set(HAILO_EXTERNAL_DIR ${CMAKE_CURRENT_LIST_DIR}/external)
-set(HAILO_PRE_BUILD_BUILD_TOOLS ${CMAKE_CURRENT_LIST_DIR}/pre_build/build/tools)
-set(PRE_BUILD_BUILD_TYPE "Release")
-
-execute_cmake(
-    SOURCE_DIR ${CMAKE_CURRENT_LIST_DIR}/pre_build
-    BUILD_DIR ${CMAKE_CURRENT_LIST_DIR}/pre_build/build
-    CONFIGURE_ARGS
-        -DCMAKE_BUILD_TYPE=${PRE_BUILD_BUILD_TYPE}
-        -DCMAKE_INSTALL_PREFIX=${CMAKE_CURRENT_LIST_DIR}/pre_build/install
-        -DHAILO_EXTERNAL_DIR=${HAILO_EXTERNAL_DIR}
-        -DHAILO_OFFLINE_COMPILATION=${HAILO_OFFLINE_COMPILATION}
-        -DHAILO_BUILD_SERVICE=${HAILO_BUILD_SERVICE}
-    BUILD_ARGS
-        --config ${PRE_BUILD_BUILD_TYPE} --target install ${CMAKE_EXTRA_BUILD_ARGS}
-    PARALLEL_BUILD
+# TODO: remove execute_cmake. support script mode?
+execute_process(COMMAND
+    ${CMAKE_COMMAND}
+    -S ${CMAKE_CURRENT_LIST_DIR}/prepare_externals
+    -B ${CMAKE_CURRENT_LIST_DIR}/prepare_externals/build
+    -G "${CMAKE_GENERATOR}"
+    -DHAILO_EXTERNAL_DIR=${CMAKE_CURRENT_LIST_DIR}/external
+    -DHAILO_BUILD_SERVICE=${HAILO_BUILD_SERVICE}
 )
diff --git a/hailort/prepare_externals/CMakeLists.txt b/hailort/prepare_externals/CMakeLists.txt
new file mode 100644
index 0000000..d73f9bd
--- /dev/null
+++ b/hailort/prepare_externals/CMakeLists.txt
@@ -0,0 +1,22 @@
+cmake_minimum_required(VERSION 3.0.0)
+project(hailort_prebuild)
+
+set(HAILO_EXTERNALS_EXCLUDE_TARGETS ON)
+
+message("Downloading dependencies to ${HAILO_EXTERNAL_DIR} ...")
+include(${CMAKE_CURRENT_LIST_DIR}/../cmake/external/pybind11.cmake)
+include(${CMAKE_CURRENT_LIST_DIR}/../cmake/external/catch2.cmake)
+include(${CMAKE_CURRENT_LIST_DIR}/../cmake/external/spdlog.cmake)
+include(${CMAKE_CURRENT_LIST_DIR}/../cmake/external/json.cmake)
+include(${CMAKE_CURRENT_LIST_DIR}/../cmake/external/dotwriter.cmake)
+include(${CMAKE_CURRENT_LIST_DIR}/../cmake/external/benchmark.cmake)
+include(${CMAKE_CURRENT_LIST_DIR}/../cmake/external/readerwriterqueue.cmake)
+include(${CMAKE_CURRENT_LIST_DIR}/../cmake/external/cli11.cmake)
+include(${CMAKE_CURRENT_LIST_DIR}/../cmake/external/protobuf.cmake)
+if(HAILO_BUILD_SERVICE)
+    include(${CMAKE_CURRENT_LIST_DIR}/../cmake/external/grpc.cmake)
+endif()
+if(CMAKE_SYSTEM_NAME STREQUAL QNX)
+    include(${CMAKE_CURRENT_LIST_DIR}/../cmake/external/pevents.cmake)
+endif()
+message("Finished downloading dependencies")
diff --git a/hailort/rpc/CMakeLists.txt b/hailort/rpc/CMakeLists.txt
index 7cf3b64..7823cb5 100644
--- a/hailort/rpc/CMakeLists.txt
+++ b/hailort/rpc/CMakeLists.txt
@@ -9,6 +9,15 @@ set(hailort_rpc_proto_hdrs "${CMAKE_CURRENT_BINARY_DIR}/hailort_rpc.pb.h")
 set(hailort_rpc_grpc_srcs "${CMAKE_CURRENT_BINARY_DIR}/hailort_rpc.grpc.pb.cc")
 set(hailort_rpc_grpc_hdrs "${CMAKE_CURRENT_BINARY_DIR}/hailort_rpc.grpc.pb.h")
 
+set(HAILO_PROTOBUF_PROTOC $<TARGET_FILE:protobuf::protoc>)
+if(HAILO_BUILD_SERVICE)
+    if(CMAKE_HOST_UNIX)
+        set(HAILO_GRPC_CPP_PLUGIN_EXECUTABLE "${HAILO_EXTERNAL_DIR}/grpc-build/grpc_cpp_plugin")
+    else()
+        set(HAILO_GRPC_CPP_PLUGIN_EXECUTABLE "${HAILO_EXTERNAL_DIR}/grpc-build/Release/grpc_cpp_plugin.exe")
+    endif()
+endif()
+
 add_custom_command(
     OUTPUT "${hailort_rpc_proto_srcs}" "${hailort_rpc_proto_hdrs}" "${hailort_rpc_grpc_srcs}" "${hailort_rpc_grpc_hdrs}"
     COMMAND ${HAILO_PROTOBUF_PROTOC}
diff --git a/hailort/rpc/hailort_rpc.proto b/hailort/rpc/hailort_rpc.proto
index 1ebdd8b..b0c9dab 100644
--- a/hailort/rpc/hailort_rpc.proto
+++ b/hailort/rpc/hailort_rpc.proto
@@ -11,6 +11,8 @@ service ProtoHailoRtRpc {
     rpc VDevice_configure (VDevice_configure_Request) returns (VDevice_configure_Reply) {}
     rpc VDevice_get_physical_devices_ids (VDevice_get_physical_devices_ids_Request) returns (VDevice_get_physical_devices_ids_Reply) {}
     rpc VDevice_get_default_streams_interface (VDevice_get_default_streams_interface_Request) returns (VDevice_get_default_streams_interface_Reply) {}
+    rpc VDevice_get_callback_id (VDevice_get_callback_id_Request) returns (VDevice_get_callback_id_Reply) {}
+    rpc VDevice_finish_callback_listener (VDevice_finish_callback_listener_Request) returns (VDevice_finish_callback_listener_Reply) {}
 
     rpc ConfiguredNetworkGroup_dup_handle (ConfiguredNetworkGroup_dup_handle_Request) returns (ConfiguredNetworkGroup_dup_handle_Reply) {}
     rpc ConfiguredNetworkGroup_release (Release_Request) returns (Release_Reply) {}
@@ -21,6 +23,7 @@ service ProtoHailoRtRpc {
     rpc ConfiguredNetworkGroup_get_network_infos (ConfiguredNetworkGroup_get_network_infos_Request) returns (ConfiguredNetworkGroup_get_network_infos_Reply) {}
     rpc ConfiguredNetworkGroup_get_all_stream_infos (ConfiguredNetworkGroup_get_all_stream_infos_Request) returns (ConfiguredNetworkGroup_get_all_stream_infos_Reply) {}
     rpc ConfiguredNetworkGroup_get_default_stream_interface (ConfiguredNetworkGroup_get_default_stream_interface_Request) returns (ConfiguredNetworkGroup_get_default_stream_interface_Reply) {}
+    rpc ConfiguredNetworkGroup_shutdown (ConfiguredNetworkGroup_shutdown_Request) returns (ConfiguredNetworkGroup_shutdown_Reply) {}
     rpc ConfiguredNetworkGroup_get_output_vstream_groups (ConfiguredNetworkGroup_get_output_vstream_groups_Request) returns (ConfiguredNetworkGroup_get_output_vstream_groups_Reply) {}
     rpc ConfiguredNetworkGroup_get_input_vstream_infos (ConfiguredNetworkGroup_get_vstream_infos_Request) returns (ConfiguredNetworkGroup_get_vstream_infos_Reply) {}
     rpc ConfiguredNetworkGroup_get_output_vstream_infos (ConfiguredNetworkGroup_get_vstream_infos_Request) returns (ConfiguredNetworkGroup_get_vstream_infos_Reply) {}
@@ -33,8 +36,16 @@ service ProtoHailoRtRpc {
     rpc ConfiguredNetworkGroup_is_multi_context (ConfiguredNetworkGroup_is_multi_context_Request) returns (ConfiguredNetworkGroup_is_multi_context_Reply) {}
     rpc ConfiguredNetworkGroup_get_config_params(ConfiguredNetworkGroup_get_config_params_Request) returns (ConfiguredNetworkGroup_get_config_params_Reply) {}
     rpc ConfiguredNetworkGroup_get_sorted_output_names(ConfiguredNetworkGroup_get_sorted_output_names_Request) returns (ConfiguredNetworkGroup_get_sorted_output_names_Reply) {}
+    rpc ConfiguredNetworkGroup_get_min_buffer_pool_size(ConfiguredNetworkGroup_get_min_buffer_pool_size_Request) returns (ConfiguredNetworkGroup_get_min_buffer_pool_size_Reply) {}
     rpc ConfiguredNetworkGroup_get_stream_names_from_vstream_name(ConfiguredNetworkGroup_get_stream_names_from_vstream_name_Request) returns (ConfiguredNetworkGroup_get_stream_names_from_vstream_name_Reply) {}
     rpc ConfiguredNetworkGroup_get_vstream_names_from_stream_name(ConfiguredNetworkGroup_get_vstream_names_from_stream_name_Request) returns (ConfiguredNetworkGroup_get_vstream_names_from_stream_name_Reply) {}
+    rpc ConfiguredNetworkGroup_infer_async(ConfiguredNetworkGroup_infer_async_Request) returns (ConfiguredNetworkGroup_infer_async_Reply) {}
+    rpc ConfiguredNetworkGroup_get_layer_info(ConfiguredNetworkGroup_get_layer_info_Request) returns (ConfiguredNetworkGroup_get_layer_info_Reply) {}
+    rpc ConfiguredNetworkGroup_get_ops_metadata(ConfiguredNetworkGroup_get_ops_metadata_Request) returns (ConfiguredNetworkGroup_get_ops_metadata_Reply) {}
+    rpc ConfiguredNetworkGroup_set_nms_score_threshold(ConfiguredNetworkGroup_set_nms_score_threshold_Request) returns (ConfiguredNetworkGroup_set_nms_score_threshold_Reply) {}
+    rpc ConfiguredNetworkGroup_set_nms_iou_threshold(ConfiguredNetworkGroup_set_nms_iou_threshold_Request) returns (ConfiguredNetworkGroup_set_nms_iou_threshold_Reply) {}
+    rpc ConfiguredNetworkGroup_set_nms_max_bboxes_per_class(ConfiguredNetworkGroup_set_nms_max_bboxes_per_class_Request) returns (ConfiguredNetworkGroup_set_nms_max_bboxes_per_class_Reply) {}
+
 
     rpc InputVStreams_create (VStream_create_Request) returns (VStreams_create_Reply) {}
     rpc InputVStream_release (Release_Request) returns (Release_Reply) {}
@@ -91,6 +102,35 @@ message ProtoVStreamIdentifier {
     uint32 vstream_handle = 3;
 }
 
+message ProtoCallbackIdentifier {
+    uint32 vdevice_handle = 1;
+    uint32 network_group_handle = 2;
+    uint32 cb_type = 3;
+    uint32 cb_idx = 4;
+    string stream_name = 5;
+    uint32 direction = 6;
+    bytes data = 7;
+    uint32 status = 8;
+}
+
+message ProtoTransferRequest {
+    string stream_name = 1;
+    uint32 direction = 2;
+    bytes data = 3;
+    uint32 size = 4;
+    uint32 cb_idx = 5;
+}
+
+message ConfiguredNetworkGroup_infer_async_Request {
+    ProtoConfiguredNetworkGroupIdentifier identifier = 1;
+    uint32 infer_request_done_cb_idx = 2;
+    repeated ProtoTransferRequest transfer_requests = 3;
+}
+
+message ConfiguredNetworkGroup_infer_async_Reply {
+    uint32 status = 1;
+}
+
 message ProtoVDeviceParams {
     uint32 device_count = 1;
     repeated string device_ids = 2;
@@ -119,6 +159,7 @@ message ConfiguredNetworkGroup_dup_handle_Request {
 
 message ConfiguredNetworkGroup_dup_handle_Reply {
     uint32 handle = 1;
+    uint32 status = 2;
 }
 
 message VDevice_create_Request {
@@ -273,6 +314,160 @@ message ProtoVStreamInfo {
     ProtoQuantInfo quant_info = 7;
 }
 
+message ProtoUnimplementedMessage {
+    // Currently not used on client side, so isnt implemented
+}
+
+message ProtoLayerInfo {
+    uint32 type = 1;
+    uint32 direction = 2;
+    uint32 stream_index = 3;
+    uint32 dma_engine_index = 4;
+    string name = 5;
+    string network_name = 6;
+    uint32 network_index = 7;
+    ProtoUnimplementedMessage nn_stream_config = 8;
+    uint32 max_shmifo_size = 9;
+    uint32 context_index = 10;
+    uint32 pad_index = 11;
+
+    // Transformation and shape info
+    ProtoThreeDImageShape shape = 12;
+    ProtoThreeDImageShape hw_shape = 13;
+    uint32 hw_data_bytes = 14;
+    ProtoHailoFormat format = 15;
+    ProtoQuantInfo quant_info = 16;
+    repeated ProtoQuantInfo quant_infos = 17;
+    ProtoNmsInfo nms_info = 18;
+
+    // Mux info
+    bool is_mux = 19;
+    repeated ProtoLayerInfo predecessor = 20;
+    uint32 height_gcd = 21;
+    repeated uint32 height_ratios = 22;
+
+    // Multi planes info
+    bool is_multi_planar = 23;
+    repeated ProtoLayerInfo planes = 24;
+    uint32 plane_index = 25; // relevant for the underlying planes only
+
+    // Defused nms info
+    bool is_defused_nms = 26;
+    // TODO HRT-4441 change fused_layer from vector.
+    repeated ProtoLayerInfo fused_nms_layer = 27;
+
+    // Simulation Info
+    ProtoUnimplementedMessage buffer_indices = 28;
+
+    // Context switch info TODO: we should use std::optional for this structures (or implement our self).
+    ProtoUnimplementedMessage connected_context_info = 29;
+    ProtoUnimplementedMessage ddr_info = 30;
+}
+
+message ProtoBufferMetaData {
+    ProtoThreeDImageShape shape = 1;
+    ProtoThreeDImageShape padded_shape = 2;
+    ProtoHailoFormat format = 3;
+    ProtoQuantInfo quant_info = 4;
+}
+
+message ProtoNamedMetadata {
+    string name = 1;
+    ProtoBufferMetaData params = 2;
+}
+
+message ProtoYolov5Anchors {
+    string layer = 1;
+    repeated uint32 anchors = 2;
+}
+
+message ProtoYolov5Config {
+    float image_height = 1;
+    float image_width = 2;
+    repeated ProtoYolov5Anchors yolov5_anchors = 3;
+}
+
+message ProtoSSDRegToClsInputs {
+    string reg = 1;
+    string cls = 2;
+}
+
+message ProtoSSDAnchors {
+    string layer = 1;
+    repeated float anchors_per_layer = 2;
+}
+
+message ProtoSSDConfig {
+    float image_height = 1;
+    float image_width = 2;
+    uint32 centers_scale_factor = 3;
+    uint32 bbox_dimensions_scale_factor = 4;
+    uint32 ty_index = 5;
+    uint32 tx_index = 6;
+    uint32 th_index = 7;
+    uint32 tw_index = 8;
+    bool normalize_boxes = 9;
+    repeated ProtoSSDRegToClsInputs reg_to_cls_inputs = 10;
+    repeated ProtoSSDAnchors anchors = 11;
+}
+
+message ProtoNmsPostProcessConfig {
+    double nms_score_th = 1;
+    double nms_iou_th = 2;
+    uint32 max_proposals_per_class = 3;
+    uint32 number_of_classes = 4;
+    bool background_removal = 5;
+    uint32 background_removal_index = 6;
+    bool cross_classes = 7;
+}
+
+message ProtoYolov8MatchingLayersNames {
+    string reg = 1;
+    string cls = 2;
+    uint32 stride = 3;
+}
+
+message ProtoYolov8PostProcessConfig {
+    float image_height = 1;
+    float image_width = 2;
+    repeated ProtoYolov8MatchingLayersNames reg_to_cls_inputs = 3;
+}
+
+message ProtoYoloxMatchingLayersNames {
+    string reg = 1;
+    string obj = 2;
+    string cls = 3;
+}
+
+message ProtoYoloxPostProcessConfig {
+    float image_height = 1;
+    float image_width = 2;
+    repeated ProtoYoloxMatchingLayersNames input_names = 3;
+}
+
+message ProtoYoloV5SegPostProcessConfig {
+    double mask_threshold = 1;
+    string layer_name = 2;
+}
+
+message ProtoOpMetadata {
+    string name = 1;
+    string network_name = 2;
+    uint32 type = 3;
+    repeated ProtoNamedMetadata inputs_metadata = 4;
+    repeated ProtoNamedMetadata outputs_metadata = 5;
+    ProtoNmsPostProcessConfig nms_post_process_config = 6;
+    ProtoYolov5Config yolov5_config = 7;
+    ProtoSSDConfig ssd_config = 8;
+    ProtoYolov8PostProcessConfig yolov8_config = 9;
+    ProtoYoloxPostProcessConfig yolox_config = 10;
+    ProtoYoloV5SegPostProcessConfig yolov5seg_config = 11;
+}
+
+message ProtoOpsMetadata {
+    repeated ProtoOpMetadata ops_metadata = 1;
+}
+
 message ProtoConfigureNetworkParams {
     uint32 batch_size = 1;
     uint32 power_mode = 2;
@@ -320,9 +515,25 @@ message VDevice_get_default_streams_interface_Reply {
     uint32 stream_interface = 2;
 }
 
+message VDevice_get_callback_id_Request {
+    ProtoVDeviceIdentifier identifier = 1;
+}
+
+message VDevice_get_callback_id_Reply {
+    uint32 status = 1;
+    ProtoCallbackIdentifier callback_id = 2;
+}
+
+message VDevice_finish_callback_listener_Request {
+    ProtoVDeviceIdentifier identifier = 1;
+}
+
+message VDevice_finish_callback_listener_Reply {
+    uint32 status = 1;
+}
+
 message ConfiguredNetworkGroup_make_input_vstream_params_Request {
     ProtoConfiguredNetworkGroupIdentifier identifier = 1;
-    bool quantized = 2;
     uint32 format_type = 3;
     uint32 timeout_ms = 4;
     uint32 queue_size = 5;
@@ -341,7 +552,6 @@ message ConfiguredNetworkGroup_make_input_vstream_params_Reply {
 // TODO: Why do we have this struct for both input and output instead of 1 struct for both
 message ConfiguredNetworkGroup_make_output_vstream_params_Request {
     ProtoConfiguredNetworkGroupIdentifier identifier = 1;
-    bool quantized = 2;
     uint32 format_type = 3;
     uint32 timeout_ms = 4;
     uint32 queue_size = 5;
@@ -355,7 +565,6 @@ message ConfiguredNetworkGroup_make_output_vstream_params_Reply {
 
 message ConfiguredNetworkGroup_make_output_vstream_params_groups_Request {
     ProtoConfiguredNetworkGroupIdentifier identifier = 1;
-    bool quantized = 2;
     uint32 format_type = 3;
     uint32 timeout_ms = 4;
     uint32 queue_size = 5;
@@ -403,6 +612,14 @@ message ConfiguredNetworkGroup_get_default_stream_interface_Reply {
     uint32 stream_interface = 2;
 }
 
+message ConfiguredNetworkGroup_shutdown_Request {
+    ProtoConfiguredNetworkGroupIdentifier identifier = 1;
+}
+
+message ConfiguredNetworkGroup_shutdown_Reply {
+    uint32 status = 1;
+}
+
 message ConfiguredNetworkGroup_get_output_vstream_groups_Request {
     ProtoConfiguredNetworkGroupIdentifier identifier = 1;
 }
@@ -498,6 +715,64 @@ message ConfiguredNetworkGroup_get_sorted_output_names_Reply {
     repeated string sorted_output_names = 2;
 }
 
+message ConfiguredNetworkGroup_get_min_buffer_pool_size_Request {
+    ProtoConfiguredNetworkGroupIdentifier identifier = 1;
+}
+
+message ConfiguredNetworkGroup_get_min_buffer_pool_size_Reply {
+    uint32 status = 1;
+    uint32 min_buffer_pool_size = 2;
+}
+
+message ConfiguredNetworkGroup_get_layer_info_Request {
+    ProtoConfiguredNetworkGroupIdentifier identifier = 1;
+    string stream_name = 2;
+}
+
+message ConfiguredNetworkGroup_get_layer_info_Reply {
+    uint32 status = 1;
+    ProtoLayerInfo layer_info = 2;
+}
+
+message ConfiguredNetworkGroup_get_ops_metadata_Request {
+    ProtoConfiguredNetworkGroupIdentifier identifier = 1;
+}
+
+message ConfiguredNetworkGroup_get_ops_metadata_Reply {
+    uint32 status = 1;
+    ProtoOpsMetadata ops_metadata = 2;
+}
+
+message ConfiguredNetworkGroup_set_nms_score_threshold_Request {
+    ProtoConfiguredNetworkGroupIdentifier identifier = 1;
+    string edge_name = 2;
+    float nms_score_th = 3;
+}
+
+message ConfiguredNetworkGroup_set_nms_score_threshold_Reply {
+    uint32 status = 1;
+}
+
+message ConfiguredNetworkGroup_set_nms_iou_threshold_Request {
+    ProtoConfiguredNetworkGroupIdentifier identifier = 1;
+    string edge_name = 2;
+    float nms_iou_th = 3;
+}
+
+message ConfiguredNetworkGroup_set_nms_iou_threshold_Reply {
+    uint32 status = 1;
+}
+
+message ConfiguredNetworkGroup_set_nms_max_bboxes_per_class_Request {
+    ProtoConfiguredNetworkGroupIdentifier identifier = 1;
+    string edge_name = 2;
+    uint32 nms_max_bboxes_per_class = 3;
+}
+
+message ConfiguredNetworkGroup_set_nms_max_bboxes_per_class_Reply {
+    uint32 status = 1;
+}
+
 message ConfiguredNetworkGroup_get_stream_names_from_vstream_name_Request {
     ProtoConfiguredNetworkGroupIdentifier identifier = 1;
     string vstream_name = 2;
diff --git a/hailort/rpc/rpc_definitions.hpp b/hailort/rpc/rpc_definitions.hpp
index 7e999b1..024d5c3 100644
--- a/hailort/rpc/rpc_definitions.hpp
+++ b/hailort/rpc/rpc_definitions.hpp
@@ -32,6 +32,11 @@ static const std::string HAILORT_SERVICE_ADDRESS = []() {
     }
 }();
 
+typedef enum {
+    CALLBACK_TYPE_TRANSFER              = 0,
+    CALLBACK_TYPE_INFER_REQUEST         = 1,
+} callback_type_t;
+
 class VDeviceIdentifier {
 public:
     VDeviceIdentifier(uint32_t vdevice_handle) : m_vdevice_handle(vdevice_handle)
diff --git a/hailort/scripts/download_firmware_eth.cmd b/hailort/scripts/download_firmware_eth.cmd
index df3e2db..9c33925 100644
--- a/hailort/scripts/download_firmware_eth.cmd
+++ b/hailort/scripts/download_firmware_eth.cmd
@@ -2,7 +2,7 @@
 @ECHO OFF
 
 set BASE_URI=https://hailo-hailort.s3.eu-west-2.amazonaws.com
-set HRT_VERSION=4.15.0
+set HRT_VERSION=4.16.0
 set FW_DIR=Hailo8/%HRT_VERSION%/FW
 set FW=hailo8_fw.%HRT_VERSION%_eth.bin
 
diff --git a/hailort/scripts/download_firmware_eth.sh b/hailort/scripts/download_firmware_eth.sh
index 1c5904e..572d3b6 100755
--- a/hailort/scripts/download_firmware_eth.sh
+++ b/hailort/scripts/download_firmware_eth.sh
@@ -2,7 +2,7 @@
 set -e
 
 readonly BASE_URI="https://hailo-hailort.s3.eu-west-2.amazonaws.com"
-readonly HRT_VERSION=4.15.0
+readonly HRT_VERSION=4.16.0
 readonly FW_AWS_DIR="Hailo8/${HRT_VERSION}/FW"
 readonly FW="hailo8_fw.${HRT_VERSION}_eth.bin"
 
diff --git a/hailort/scripts/download_hefs.cmd b/hailort/scripts/download_hefs.cmd
index d5a8943..3fb8cac 100644
--- a/hailort/scripts/download_hefs.cmd
+++ b/hailort/scripts/download_hefs.cmd
@@ -1,7 +1,7 @@
 :: cmd
 @ECHO OFF
 set BASE_URI=https://hailo-hailort.s3.eu-west-2.amazonaws.com
-set HRT_VERSION=4.15.0
+set HRT_VERSION=4.16.0
 set REMOTE_HEF_DIR=Hailo8/%HRT_VERSION%/HEFS
 set LOCAL_EXAMPLES_HEF_DIR=..\libhailort\examples\hefs
 set LOCAL_TUTORIALS_HEF_DIR=..\libhailort\bindings\python\platform\hailo_tutorials\hefs
diff --git a/hailort/scripts/download_hefs.sh b/hailort/scripts/download_hefs.sh
index 2073ded..47d3b93 100755
--- a/hailort/scripts/download_hefs.sh
+++ b/hailort/scripts/download_hefs.sh
@@ -2,7 +2,7 @@
 set -e
 
 readonly BASE_URI="https://hailo-hailort.s3.eu-west-2.amazonaws.com"
-readonly HRT_VERSION=4.15.0
+readonly HRT_VERSION=4.16.0
 readonly REMOTE_HEF_DIR="Hailo8/${HRT_VERSION}/HEFS"
 readonly LOCAL_EXAMPLES_HEF_DIR="../libhailort/examples/hefs"
 readonly LOCAL_TUTORIALS_HEF_DIR="../libhailort/bindings/python/platform/hailo_tutorials/hefs"
diff --git a/hailort/tools/hailo15-scripts/load_hrt.sh b/hailort/tools/hailo15-scripts/load_hrt.sh
index 8c6947d..e85594b 100755
--- a/hailort/tools/hailo15-scripts/load_hrt.sh
+++ b/hailort/tools/hailo15-scripts/load_hrt.sh
@@ -6,9 +6,10 @@ script_directory=$(cd $(dirname "${BASH_SOURCE[0]}") && pwd)
 source "$script_directory"/hailo15_env_vars.sh
 
 cd $local_platform_sw_path
-./build.sh -aaarch64 -brelease install
+build_config=release
+./build.sh -n pG -aaarch64 -b$build_config install
 
-scp lib/linux.aarch64.release/libhailort.* root@$h15:/usr/lib/
-scp bin/linux.aarch64.release/hailortcli root@$h15:/usr/bin/
-scp bin/linux.aarch64.release/debalex root@$h15:/usr/bin/
-scp bin/linux.aarch64.release/board_tests root@$h15:/usr/bin/
+scp lib/linux.aarch64.$build_config/libhailort.* root@$h15:/usr/lib/
+scp bin/linux.aarch64.$build_config/hailortcli root@$h15:/usr/bin/
+scp bin/linux.aarch64.$build_config/debalex root@$h15:/usr/bin/
+scp bin/linux.aarch64.$build_config/board_tests root@$h15:/usr/bin/
diff --git a/hailort/tools/hw_debug/CMakeLists.txt b/hailort/tools/hw_debug/CMakeLists.txt
index a38c3a1..5cdfa81 100644
--- a/hailort/tools/hw_debug/CMakeLists.txt
+++ b/hailort/tools/hw_debug/CMakeLists.txt
@@ -1,6 +1,7 @@
 cmake_minimum_required(VERSION 3.0.0)
 
 include(${HAILO_EXTERNALS_CMAKE_SCRIPTS}/spdlog.cmake)
+include(${HAILO_EXTERNALS_CMAKE_SCRIPTS}/cli11.cmake)
 
 set(FILES
     main.cpp
-- 
2.34.1